1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   // If any floating point comparison instruction is used, unordered case always triggers jump
 1708   // for below condition, CF=1 is true when at least one input is NaN
 1709   Label done;
 1710   __ movl(dst, -1);
 1711   __ jcc(Assembler::below, done);
 1712   __ setcc(Assembler::notEqual, dst);
 1713   __ bind(done);
 1714 }
 1715 
 1716 // Math.min()    # Math.max()
 1717 // --------------------------
 1718 // ucomis[s/d]   #
 1719 // ja   -> b     # a
 1720 // jp   -> NaN   # NaN
 1721 // jb   -> a     # b
 1722 // je            #
 1723 // |-jz -> a | b # a & b
 1724 // |    -> a     #
 1725 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1726                             XMMRegister a, XMMRegister b,
 1727                             XMMRegister xmmt, Register rt,
 1728                             bool min, bool single) {
 1729 
 1730   Label nan, zero, below, above, done;
 1731 
 1732   if (single)
 1733     __ ucomiss(a, b);
 1734   else
 1735     __ ucomisd(a, b);
 1736 
 1737   if (dst->encoding() != (min ? b : a)->encoding())
 1738     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1739   else
 1740     __ jccb(Assembler::above, done);
 1741 
 1742   __ jccb(Assembler::parity, nan);  // PF=1
 1743   __ jccb(Assembler::below, below); // CF=1
 1744 
 1745   // equal
 1746   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1747   if (single) {
 1748     __ ucomiss(a, xmmt);
 1749     __ jccb(Assembler::equal, zero);
 1750 
 1751     __ movflt(dst, a);
 1752     __ jmp(done);
 1753   }
 1754   else {
 1755     __ ucomisd(a, xmmt);
 1756     __ jccb(Assembler::equal, zero);
 1757 
 1758     __ movdbl(dst, a);
 1759     __ jmp(done);
 1760   }
 1761 
 1762   __ bind(zero);
 1763   if (min)
 1764     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1765   else
 1766     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1767 
 1768   __ jmp(done);
 1769 
 1770   __ bind(above);
 1771   if (single)
 1772     __ movflt(dst, min ? b : a);
 1773   else
 1774     __ movdbl(dst, min ? b : a);
 1775 
 1776   __ jmp(done);
 1777 
 1778   __ bind(nan);
 1779   if (single) {
 1780     __ movl(rt, 0x7fc00000); // Float.NaN
 1781     __ movdl(dst, rt);
 1782   }
 1783   else {
 1784     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1785     __ movdq(dst, rt);
 1786   }
 1787   __ jmp(done);
 1788 
 1789   __ bind(below);
 1790   if (single)
 1791     __ movflt(dst, min ? a : b);
 1792   else
 1793     __ movdbl(dst, min ? a : b);
 1794 
 1795   __ bind(done);
 1796 }
 1797 
 1798 //=============================================================================
 1799 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1800 
 1801 int ConstantTable::calculate_table_base_offset() const {
 1802   return 0;  // absolute addressing, no offset
 1803 }
 1804 
 1805 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1806 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1807   ShouldNotReachHere();
 1808 }
 1809 
 1810 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1811   // Empty encoding
 1812 }
 1813 
 1814 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1815   return 0;
 1816 }
 1817 
 1818 #ifndef PRODUCT
 1819 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1820   st->print("# MachConstantBaseNode (empty encoding)");
 1821 }
 1822 #endif
 1823 
 1824 
 1825 //=============================================================================
 1826 #ifndef PRODUCT
 1827 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1828   Compile* C = ra_->C;
 1829 
 1830   int framesize = C->output()->frame_size_in_bytes();
 1831   int bangsize = C->output()->bang_size_in_bytes();
 1832   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1833   // Remove wordSize for return addr which is already pushed.
 1834   framesize -= wordSize;
 1835 
 1836   if (C->output()->need_stack_bang(bangsize)) {
 1837     framesize -= wordSize;
 1838     st->print("# stack bang (%d bytes)", bangsize);
 1839     st->print("\n\t");
 1840     st->print("pushq   rbp\t# Save rbp");
 1841     if (PreserveFramePointer) {
 1842         st->print("\n\t");
 1843         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1844     }
 1845     if (framesize) {
 1846       st->print("\n\t");
 1847       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1848     }
 1849   } else {
 1850     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1851     st->print("\n\t");
 1852     framesize -= wordSize;
 1853     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1854     if (PreserveFramePointer) {
 1855       st->print("\n\t");
 1856       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1857       if (framesize > 0) {
 1858         st->print("\n\t");
 1859         st->print("addq    rbp, #%d", framesize);
 1860       }
 1861     }
 1862   }
 1863 
 1864   if (VerifyStackAtCalls) {
 1865     st->print("\n\t");
 1866     framesize -= wordSize;
 1867     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1868 #ifdef ASSERT
 1869     st->print("\n\t");
 1870     st->print("# stack alignment check");
 1871 #endif
 1872   }
 1873   if (C->stub_function() != nullptr) {
 1874     st->print("\n\t");
 1875     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1876     st->print("\n\t");
 1877     st->print("je      fast_entry\t");
 1878     st->print("\n\t");
 1879     st->print("call    #nmethod_entry_barrier_stub\t");
 1880     st->print("\n\tfast_entry:");
 1881   }
 1882   st->cr();
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888 
 1889   __ verified_entry(C);
 1890 
 1891   if (ra_->C->stub_function() == nullptr) {
 1892     __ entry_barrier();
 1893   }
 1894 
 1895   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1896     __ bind(*_verified_entry);
 1897   }
 1898 
 1899   C->output()->set_frame_complete(__ offset());
 1900 
 1901   if (C->has_mach_constant_base_node()) {
 1902     // NOTE: We set the table base offset here because users might be
 1903     // emitted before MachConstantBaseNode.
 1904     ConstantTable& constant_table = C->output()->constant_table();
 1905     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1906   }
 1907 }
 1908 
 1909 
 1910 int MachPrologNode::reloc() const
 1911 {
 1912   return 0; // a large enough number
 1913 }
 1914 
 1915 //=============================================================================
 1916 #ifndef PRODUCT
 1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1918 {
 1919   Compile* C = ra_->C;
 1920   if (generate_vzeroupper(C)) {
 1921     st->print("vzeroupper");
 1922     st->cr(); st->print("\t");
 1923   }
 1924 
 1925   int framesize = C->output()->frame_size_in_bytes();
 1926   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1927   // Remove word for return adr already pushed
 1928   // and RBP
 1929   framesize -= 2*wordSize;
 1930 
 1931   if (framesize) {
 1932     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1933     st->print("\t");
 1934   }
 1935 
 1936   st->print_cr("popq    rbp");
 1937   if (do_polling() && C->is_method_compilation()) {
 1938     st->print("\t");
 1939     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1940                  "ja      #safepoint_stub\t"
 1941                  "# Safepoint: poll for GC");
 1942   }
 1943 }
 1944 #endif
 1945 
 1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1947 {
 1948   Compile* C = ra_->C;
 1949 
 1950   if (generate_vzeroupper(C)) {
 1951     // Clear upper bits of YMM registers when current compiled code uses
 1952     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1953     __ vzeroupper();
 1954   }
 1955 
 1956   // Subtract two words to account for return address and rbp
 1957   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1958   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1959 
 1960   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1961     __ reserved_stack_check();
 1962   }
 1963 
 1964   if (do_polling() && C->is_method_compilation()) {
 1965     Label dummy_label;
 1966     Label* code_stub = &dummy_label;
 1967     if (!C->output()->in_scratch_emit_size()) {
 1968       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1969       C->output()->add_stub(stub);
 1970       code_stub = &stub->entry();
 1971     }
 1972     __ relocate(relocInfo::poll_return_type);
 1973     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1974   }
 1975 }
 1976 
 1977 int MachEpilogNode::reloc() const
 1978 {
 1979   return 2; // a large enough number
 1980 }
 1981 
 1982 const Pipeline* MachEpilogNode::pipeline() const
 1983 {
 1984   return MachNode::pipeline_class();
 1985 }
 1986 
 1987 //=============================================================================
 1988 
 1989 enum RC {
 1990   rc_bad,
 1991   rc_int,
 1992   rc_kreg,
 1993   rc_float,
 1994   rc_stack
 1995 };
 1996 
 1997 static enum RC rc_class(OptoReg::Name reg)
 1998 {
 1999   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2000 
 2001   if (OptoReg::is_stack(reg)) return rc_stack;
 2002 
 2003   VMReg r = OptoReg::as_VMReg(reg);
 2004 
 2005   if (r->is_Register()) return rc_int;
 2006 
 2007   if (r->is_KRegister()) return rc_kreg;
 2008 
 2009   assert(r->is_XMMRegister(), "must be");
 2010   return rc_float;
 2011 }
 2012 
 2013 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2014 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2015                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2016 
 2017 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2018                      int stack_offset, int reg, uint ireg, outputStream* st);
 2019 
 2020 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2021                                       int dst_offset, uint ireg, outputStream* st) {
 2022   if (masm) {
 2023     switch (ireg) {
 2024     case Op_VecS:
 2025       __ movq(Address(rsp, -8), rax);
 2026       __ movl(rax, Address(rsp, src_offset));
 2027       __ movl(Address(rsp, dst_offset), rax);
 2028       __ movq(rax, Address(rsp, -8));
 2029       break;
 2030     case Op_VecD:
 2031       __ pushq(Address(rsp, src_offset));
 2032       __ popq (Address(rsp, dst_offset));
 2033       break;
 2034     case Op_VecX:
 2035       __ pushq(Address(rsp, src_offset));
 2036       __ popq (Address(rsp, dst_offset));
 2037       __ pushq(Address(rsp, src_offset+8));
 2038       __ popq (Address(rsp, dst_offset+8));
 2039       break;
 2040     case Op_VecY:
 2041       __ vmovdqu(Address(rsp, -32), xmm0);
 2042       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2043       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2044       __ vmovdqu(xmm0, Address(rsp, -32));
 2045       break;
 2046     case Op_VecZ:
 2047       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2048       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2049       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2050       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2051       break;
 2052     default:
 2053       ShouldNotReachHere();
 2054     }
 2055 #ifndef PRODUCT
 2056   } else {
 2057     switch (ireg) {
 2058     case Op_VecS:
 2059       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2060                 "movl    rax, [rsp + #%d]\n\t"
 2061                 "movl    [rsp + #%d], rax\n\t"
 2062                 "movq    rax, [rsp - #8]",
 2063                 src_offset, dst_offset);
 2064       break;
 2065     case Op_VecD:
 2066       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2067                 "popq    [rsp + #%d]",
 2068                 src_offset, dst_offset);
 2069       break;
 2070      case Op_VecX:
 2071       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2072                 "popq    [rsp + #%d]\n\t"
 2073                 "pushq   [rsp + #%d]\n\t"
 2074                 "popq    [rsp + #%d]",
 2075                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2076       break;
 2077     case Op_VecY:
 2078       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2079                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2080                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2081                 "vmovdqu xmm0, [rsp - #32]",
 2082                 src_offset, dst_offset);
 2083       break;
 2084     case Op_VecZ:
 2085       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2086                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2087                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2088                 "vmovdqu xmm0, [rsp - #64]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     default:
 2092       ShouldNotReachHere();
 2093     }
 2094 #endif
 2095   }
 2096 }
 2097 
 2098 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2099                                        PhaseRegAlloc* ra_,
 2100                                        bool do_size,
 2101                                        outputStream* st) const {
 2102   assert(masm != nullptr || st  != nullptr, "sanity");
 2103   // Get registers to move
 2104   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2105   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2106   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2107   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2108 
 2109   enum RC src_second_rc = rc_class(src_second);
 2110   enum RC src_first_rc = rc_class(src_first);
 2111   enum RC dst_second_rc = rc_class(dst_second);
 2112   enum RC dst_first_rc = rc_class(dst_first);
 2113 
 2114   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2115          "must move at least 1 register" );
 2116 
 2117   if (src_first == dst_first && src_second == dst_second) {
 2118     // Self copy, no move
 2119     return 0;
 2120   }
 2121   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2122     uint ireg = ideal_reg();
 2123     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2124     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2125     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2126       // mem -> mem
 2127       int src_offset = ra_->reg2offset(src_first);
 2128       int dst_offset = ra_->reg2offset(dst_first);
 2129       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2130     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2131       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2132     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2133       int stack_offset = ra_->reg2offset(dst_first);
 2134       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2135     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2136       int stack_offset = ra_->reg2offset(src_first);
 2137       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2138     } else {
 2139       ShouldNotReachHere();
 2140     }
 2141     return 0;
 2142   }
 2143   if (src_first_rc == rc_stack) {
 2144     // mem ->
 2145     if (dst_first_rc == rc_stack) {
 2146       // mem -> mem
 2147       assert(src_second != dst_first, "overlap");
 2148       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2149           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2150         // 64-bit
 2151         int src_offset = ra_->reg2offset(src_first);
 2152         int dst_offset = ra_->reg2offset(dst_first);
 2153         if (masm) {
 2154           __ pushq(Address(rsp, src_offset));
 2155           __ popq (Address(rsp, dst_offset));
 2156 #ifndef PRODUCT
 2157         } else {
 2158           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2159                     "popq    [rsp + #%d]",
 2160                      src_offset, dst_offset);
 2161 #endif
 2162         }
 2163       } else {
 2164         // 32-bit
 2165         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2166         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2167         // No pushl/popl, so:
 2168         int src_offset = ra_->reg2offset(src_first);
 2169         int dst_offset = ra_->reg2offset(dst_first);
 2170         if (masm) {
 2171           __ movq(Address(rsp, -8), rax);
 2172           __ movl(rax, Address(rsp, src_offset));
 2173           __ movl(Address(rsp, dst_offset), rax);
 2174           __ movq(rax, Address(rsp, -8));
 2175 #ifndef PRODUCT
 2176         } else {
 2177           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2178                     "movl    rax, [rsp + #%d]\n\t"
 2179                     "movl    [rsp + #%d], rax\n\t"
 2180                     "movq    rax, [rsp - #8]",
 2181                      src_offset, dst_offset);
 2182 #endif
 2183         }
 2184       }
 2185       return 0;
 2186     } else if (dst_first_rc == rc_int) {
 2187       // mem -> gpr
 2188       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2189           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2190         // 64-bit
 2191         int offset = ra_->reg2offset(src_first);
 2192         if (masm) {
 2193           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2194 #ifndef PRODUCT
 2195         } else {
 2196           st->print("movq    %s, [rsp + #%d]\t# spill",
 2197                      Matcher::regName[dst_first],
 2198                      offset);
 2199 #endif
 2200         }
 2201       } else {
 2202         // 32-bit
 2203         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2204         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2205         int offset = ra_->reg2offset(src_first);
 2206         if (masm) {
 2207           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2208 #ifndef PRODUCT
 2209         } else {
 2210           st->print("movl    %s, [rsp + #%d]\t# spill",
 2211                      Matcher::regName[dst_first],
 2212                      offset);
 2213 #endif
 2214         }
 2215       }
 2216       return 0;
 2217     } else if (dst_first_rc == rc_float) {
 2218       // mem-> xmm
 2219       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2220           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2221         // 64-bit
 2222         int offset = ra_->reg2offset(src_first);
 2223         if (masm) {
 2224           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2225 #ifndef PRODUCT
 2226         } else {
 2227           st->print("%s  %s, [rsp + #%d]\t# spill",
 2228                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2229                      Matcher::regName[dst_first],
 2230                      offset);
 2231 #endif
 2232         }
 2233       } else {
 2234         // 32-bit
 2235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("movss   %s, [rsp + #%d]\t# spill",
 2243                      Matcher::regName[dst_first],
 2244                      offset);
 2245 #endif
 2246         }
 2247       }
 2248       return 0;
 2249     } else if (dst_first_rc == rc_kreg) {
 2250       // mem -> kreg
 2251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2253         // 64-bit
 2254         int offset = ra_->reg2offset(src_first);
 2255         if (masm) {
 2256           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2257 #ifndef PRODUCT
 2258         } else {
 2259           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2260                      Matcher::regName[dst_first],
 2261                      offset);
 2262 #endif
 2263         }
 2264       }
 2265       return 0;
 2266     }
 2267   } else if (src_first_rc == rc_int) {
 2268     // gpr ->
 2269     if (dst_first_rc == rc_stack) {
 2270       // gpr -> mem
 2271       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2272           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2273         // 64-bit
 2274         int offset = ra_->reg2offset(dst_first);
 2275         if (masm) {
 2276           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2277 #ifndef PRODUCT
 2278         } else {
 2279           st->print("movq    [rsp + #%d], %s\t# spill",
 2280                      offset,
 2281                      Matcher::regName[src_first]);
 2282 #endif
 2283         }
 2284       } else {
 2285         // 32-bit
 2286         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2287         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2288         int offset = ra_->reg2offset(dst_first);
 2289         if (masm) {
 2290           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2291 #ifndef PRODUCT
 2292         } else {
 2293           st->print("movl    [rsp + #%d], %s\t# spill",
 2294                      offset,
 2295                      Matcher::regName[src_first]);
 2296 #endif
 2297         }
 2298       }
 2299       return 0;
 2300     } else if (dst_first_rc == rc_int) {
 2301       // gpr -> gpr
 2302       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2303           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2304         // 64-bit
 2305         if (masm) {
 2306           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2307                   as_Register(Matcher::_regEncode[src_first]));
 2308 #ifndef PRODUCT
 2309         } else {
 2310           st->print("movq    %s, %s\t# spill",
 2311                      Matcher::regName[dst_first],
 2312                      Matcher::regName[src_first]);
 2313 #endif
 2314         }
 2315         return 0;
 2316       } else {
 2317         // 32-bit
 2318         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2319         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2320         if (masm) {
 2321           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2322                   as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movl    %s, %s\t# spill",
 2326                      Matcher::regName[dst_first],
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330         return 0;
 2331       }
 2332     } else if (dst_first_rc == rc_float) {
 2333       // gpr -> xmm
 2334       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2335           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2336         // 64-bit
 2337         if (masm) {
 2338           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2339 #ifndef PRODUCT
 2340         } else {
 2341           st->print("movdq   %s, %s\t# spill",
 2342                      Matcher::regName[dst_first],
 2343                      Matcher::regName[src_first]);
 2344 #endif
 2345         }
 2346       } else {
 2347         // 32-bit
 2348         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2349         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2350         if (masm) {
 2351           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2352 #ifndef PRODUCT
 2353         } else {
 2354           st->print("movdl   %s, %s\t# spill",
 2355                      Matcher::regName[dst_first],
 2356                      Matcher::regName[src_first]);
 2357 #endif
 2358         }
 2359       }
 2360       return 0;
 2361     } else if (dst_first_rc == rc_kreg) {
 2362       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2363           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2364         // 64-bit
 2365         if (masm) {
 2366           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2367   #ifndef PRODUCT
 2368         } else {
 2369            st->print("kmovq   %s, %s\t# spill",
 2370                        Matcher::regName[dst_first],
 2371                        Matcher::regName[src_first]);
 2372   #endif
 2373         }
 2374       }
 2375       Unimplemented();
 2376       return 0;
 2377     }
 2378   } else if (src_first_rc == rc_float) {
 2379     // xmm ->
 2380     if (dst_first_rc == rc_stack) {
 2381       // xmm -> mem
 2382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2384         // 64-bit
 2385         int offset = ra_->reg2offset(dst_first);
 2386         if (masm) {
 2387           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2388 #ifndef PRODUCT
 2389         } else {
 2390           st->print("movsd   [rsp + #%d], %s\t# spill",
 2391                      offset,
 2392                      Matcher::regName[src_first]);
 2393 #endif
 2394         }
 2395       } else {
 2396         // 32-bit
 2397         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2398         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2399         int offset = ra_->reg2offset(dst_first);
 2400         if (masm) {
 2401           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2402 #ifndef PRODUCT
 2403         } else {
 2404           st->print("movss   [rsp + #%d], %s\t# spill",
 2405                      offset,
 2406                      Matcher::regName[src_first]);
 2407 #endif
 2408         }
 2409       }
 2410       return 0;
 2411     } else if (dst_first_rc == rc_int) {
 2412       // xmm -> gpr
 2413       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2414           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2415         // 64-bit
 2416         if (masm) {
 2417           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2418 #ifndef PRODUCT
 2419         } else {
 2420           st->print("movdq   %s, %s\t# spill",
 2421                      Matcher::regName[dst_first],
 2422                      Matcher::regName[src_first]);
 2423 #endif
 2424         }
 2425       } else {
 2426         // 32-bit
 2427         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2428         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2429         if (masm) {
 2430           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2431 #ifndef PRODUCT
 2432         } else {
 2433           st->print("movdl   %s, %s\t# spill",
 2434                      Matcher::regName[dst_first],
 2435                      Matcher::regName[src_first]);
 2436 #endif
 2437         }
 2438       }
 2439       return 0;
 2440     } else if (dst_first_rc == rc_float) {
 2441       // xmm -> xmm
 2442       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2443           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2444         // 64-bit
 2445         if (masm) {
 2446           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2447 #ifndef PRODUCT
 2448         } else {
 2449           st->print("%s  %s, %s\t# spill",
 2450                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2451                      Matcher::regName[dst_first],
 2452                      Matcher::regName[src_first]);
 2453 #endif
 2454         }
 2455       } else {
 2456         // 32-bit
 2457         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2458         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2459         if (masm) {
 2460           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2461 #ifndef PRODUCT
 2462         } else {
 2463           st->print("%s  %s, %s\t# spill",
 2464                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2465                      Matcher::regName[dst_first],
 2466                      Matcher::regName[src_first]);
 2467 #endif
 2468         }
 2469       }
 2470       return 0;
 2471     } else if (dst_first_rc == rc_kreg) {
 2472       assert(false, "Illegal spilling");
 2473       return 0;
 2474     }
 2475   } else if (src_first_rc == rc_kreg) {
 2476     if (dst_first_rc == rc_stack) {
 2477       // mem -> kreg
 2478       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2479           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2480         // 64-bit
 2481         int offset = ra_->reg2offset(dst_first);
 2482         if (masm) {
 2483           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2484 #ifndef PRODUCT
 2485         } else {
 2486           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2487                      offset,
 2488                      Matcher::regName[src_first]);
 2489 #endif
 2490         }
 2491       }
 2492       return 0;
 2493     } else if (dst_first_rc == rc_int) {
 2494       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2495           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2496         // 64-bit
 2497         if (masm) {
 2498           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2499 #ifndef PRODUCT
 2500         } else {
 2501          st->print("kmovq   %s, %s\t# spill",
 2502                      Matcher::regName[dst_first],
 2503                      Matcher::regName[src_first]);
 2504 #endif
 2505         }
 2506       }
 2507       Unimplemented();
 2508       return 0;
 2509     } else if (dst_first_rc == rc_kreg) {
 2510       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2511           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2512         // 64-bit
 2513         if (masm) {
 2514           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2515 #ifndef PRODUCT
 2516         } else {
 2517          st->print("kmovq   %s, %s\t# spill",
 2518                      Matcher::regName[dst_first],
 2519                      Matcher::regName[src_first]);
 2520 #endif
 2521         }
 2522       }
 2523       return 0;
 2524     } else if (dst_first_rc == rc_float) {
 2525       assert(false, "Illegal spill");
 2526       return 0;
 2527     }
 2528   }
 2529 
 2530   assert(0," foo ");
 2531   Unimplemented();
 2532   return 0;
 2533 }
 2534 
 2535 #ifndef PRODUCT
 2536 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2537   implementation(nullptr, ra_, false, st);
 2538 }
 2539 #endif
 2540 
 2541 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2542   implementation(masm, ra_, false, nullptr);
 2543 }
 2544 
 2545 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2546   return MachNode::size(ra_);
 2547 }
 2548 
 2549 //=============================================================================
 2550 #ifndef PRODUCT
 2551 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2552 {
 2553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2554   int reg = ra_->get_reg_first(this);
 2555   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2556             Matcher::regName[reg], offset);
 2557 }
 2558 #endif
 2559 
 2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2561 {
 2562   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2563   int reg = ra_->get_encode(this);
 2564 
 2565   __ lea(as_Register(reg), Address(rsp, offset));
 2566 }
 2567 
 2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2569 {
 2570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2571   if (ra_->get_encode(this) > 15) {
 2572     return (offset < 0x80) ? 6 : 9; // REX2
 2573   } else {
 2574     return (offset < 0x80) ? 5 : 8; // REX
 2575   }
 2576 }
 2577 
 2578 //=============================================================================
 2579 #ifndef PRODUCT
 2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2581 {
 2582   st->print_cr("MachVEPNode");
 2583 }
 2584 #endif
 2585 
 2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   CodeBuffer* cbuf = masm->code();
 2589   uint insts_size = cbuf->insts_size();
 2590   if (!_verified) {
 2591     __ ic_check(1);
 2592   } else {
 2593     // TODO 8284443 Avoid creation of temporary frame
 2594     if (ra_->C->stub_function() == nullptr) {
 2595       __ verified_entry(ra_->C, 0);
 2596       __ entry_barrier();
 2597       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2598       __ remove_frame(initial_framesize, false);
 2599     }
 2600     // Unpack inline type args passed as oop and then jump to
 2601     // the verified entry point (skipping the unverified entry).
 2602     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2603     // Emit code for verified entry and save increment for stack repair on return
 2604     __ verified_entry(ra_->C, sp_inc);
 2605     if (Compile::current()->output()->in_scratch_emit_size()) {
 2606       Label dummy_verified_entry;
 2607       __ jmp(dummy_verified_entry);
 2608     } else {
 2609       __ jmp(*_verified_entry);
 2610     }
 2611   }
 2612   /* WARNING these NOPs are critical so that verified entry point is properly
 2613      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2614   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2616   if (nops_cnt > 0) {
 2617     __ nop(nops_cnt);
 2618   }
 2619 }
 2620 
 2621 //=============================================================================
 2622 #ifndef PRODUCT
 2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2624 {
 2625   if (UseCompressedClassPointers) {
 2626     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2627     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2628   } else {
 2629     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2630     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2631   }
 2632   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2633 }
 2634 #endif
 2635 
 2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2637 {
 2638   __ ic_check(InteriorEntryAlignment);
 2639 }
 2640 
 2641 
 2642 //=============================================================================
 2643 
 2644 bool Matcher::supports_vector_calling_convention(void) {
 2645   return EnableVectorSupport;
 2646 }
 2647 
 2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2649   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2650 }
 2651 
 2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2653   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2654 }
 2655 
 2656 #ifdef ASSERT
 2657 static bool is_ndd_demotable(const MachNode* mdef) {
 2658   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2659 }
 2660 #endif
 2661 
 2662 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2663                                             int oper_index) {
 2664   if (mdef == nullptr) {
 2665     return false;
 2666   }
 2667 
 2668   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2669       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2670     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2671     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2672     return false;
 2673   }
 2674 
 2675   // Complex memory operand covers multiple incoming edges needed for
 2676   // address computation. Biasing def towards any address component will not
 2677   // result in NDD demotion by assembler.
 2678   if (mdef->operand_num_edges(oper_index) != 1) {
 2679     return false;
 2680   }
 2681 
 2682   // Demotion candidate must be register mask compatible with definition.
 2683   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2684   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2685     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2686     return false;
 2687   }
 2688 
 2689   switch (oper_index) {
 2690   // First operand of MachNode corresponding to Intel APX NDD selection
 2691   // pattern can share its assigned register with definition operand if
 2692   // their live ranges do not overlap. In such a scenario we can demote
 2693   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2694   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2695   // are decorated with a special flag by instruction selector.
 2696   case 1:
 2697     return is_ndd_demotable_opr1(mdef);
 2698 
 2699   // Definition operand of commutative operation can be biased towards second
 2700   // operand.
 2701   case 2:
 2702     return is_ndd_demotable_opr2(mdef);
 2703 
 2704   // Current scheme only selects up to two biasing candidates
 2705   default:
 2706     assert(false, "unhandled operand index: %s", mdef->Name());
 2707     break;
 2708   }
 2709 
 2710   return false;
 2711 }
 2712 
 2713 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2714   assert(EnableVectorSupport, "sanity");
 2715   int lo = XMM0_num;
 2716   int hi = XMM0b_num;
 2717   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2718   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2719   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2720   return OptoRegPair(hi, lo);
 2721 }
 2722 
 2723 // Is this branch offset short enough that a short branch can be used?
 2724 //
 2725 // NOTE: If the platform does not provide any short branch variants, then
 2726 //       this method should return false for offset 0.
 2727 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2728   // The passed offset is relative to address of the branch.
 2729   // On 86 a branch displacement is calculated relative to address
 2730   // of a next instruction.
 2731   offset -= br_size;
 2732 
 2733   // the short version of jmpConUCF2 contains multiple branches,
 2734   // making the reach slightly less
 2735   if (rule == jmpConUCF2_rule)
 2736     return (-126 <= offset && offset <= 125);
 2737   return (-128 <= offset && offset <= 127);
 2738 }
 2739 
 2740 #ifdef ASSERT
 2741 // Return whether or not this register is ever used as an argument.
 2742 bool Matcher::can_be_java_arg(int reg)
 2743 {
 2744   return
 2745     reg ==  RDI_num || reg == RDI_H_num ||
 2746     reg ==  RSI_num || reg == RSI_H_num ||
 2747     reg ==  RDX_num || reg == RDX_H_num ||
 2748     reg ==  RCX_num || reg == RCX_H_num ||
 2749     reg ==   R8_num || reg ==  R8_H_num ||
 2750     reg ==   R9_num || reg ==  R9_H_num ||
 2751     reg ==  R12_num || reg == R12_H_num ||
 2752     reg == XMM0_num || reg == XMM0b_num ||
 2753     reg == XMM1_num || reg == XMM1b_num ||
 2754     reg == XMM2_num || reg == XMM2b_num ||
 2755     reg == XMM3_num || reg == XMM3b_num ||
 2756     reg == XMM4_num || reg == XMM4b_num ||
 2757     reg == XMM5_num || reg == XMM5b_num ||
 2758     reg == XMM6_num || reg == XMM6b_num ||
 2759     reg == XMM7_num || reg == XMM7b_num;
 2760 }
 2761 #endif
 2762 
 2763 uint Matcher::int_pressure_limit()
 2764 {
 2765   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2766 }
 2767 
 2768 uint Matcher::float_pressure_limit()
 2769 {
 2770   // After experiment around with different values, the following default threshold
 2771   // works best for LCM's register pressure scheduling on x64.
 2772   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2773   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2774   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2775 }
 2776 
 2777 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2778   // In 64 bit mode a code which use multiply when
 2779   // devisor is constant is faster than hardware
 2780   // DIV instruction (it uses MulHiL).
 2781   return false;
 2782 }
 2783 
 2784 // Register for DIVI projection of divmodI
 2785 const RegMask& Matcher::divI_proj_mask() {
 2786   return INT_RAX_REG_mask();
 2787 }
 2788 
 2789 // Register for MODI projection of divmodI
 2790 const RegMask& Matcher::modI_proj_mask() {
 2791   return INT_RDX_REG_mask();
 2792 }
 2793 
 2794 // Register for DIVL projection of divmodL
 2795 const RegMask& Matcher::divL_proj_mask() {
 2796   return LONG_RAX_REG_mask();
 2797 }
 2798 
 2799 // Register for MODL projection of divmodL
 2800 const RegMask& Matcher::modL_proj_mask() {
 2801   return LONG_RDX_REG_mask();
 2802 }
 2803 
 2804 %}
 2805 
 2806 source_hpp %{
 2807 // Header information of the source block.
 2808 // Method declarations/definitions which are used outside
 2809 // the ad-scope can conveniently be defined here.
 2810 //
 2811 // To keep related declarations/definitions/uses close together,
 2812 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2813 
 2814 #include "runtime/vm_version.hpp"
 2815 
 2816 class NativeJump;
 2817 
 2818 class CallStubImpl {
 2819 
 2820   //--------------------------------------------------------------
 2821   //---<  Used for optimization in Compile::shorten_branches  >---
 2822   //--------------------------------------------------------------
 2823 
 2824  public:
 2825   // Size of call trampoline stub.
 2826   static uint size_call_trampoline() {
 2827     return 0; // no call trampolines on this platform
 2828   }
 2829 
 2830   // number of relocations needed by a call trampoline stub
 2831   static uint reloc_call_trampoline() {
 2832     return 0; // no call trampolines on this platform
 2833   }
 2834 };
 2835 
 2836 class HandlerImpl {
 2837 
 2838  public:
 2839 
 2840   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2841 
 2842   static uint size_deopt_handler() {
 2843     // one call and one jmp.
 2844     return 7;
 2845   }
 2846 };
 2847 
 2848 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2849   switch(bytes) {
 2850     case  4: // fall-through
 2851     case  8: // fall-through
 2852     case 16: return Assembler::AVX_128bit;
 2853     case 32: return Assembler::AVX_256bit;
 2854     case 64: return Assembler::AVX_512bit;
 2855 
 2856     default: {
 2857       ShouldNotReachHere();
 2858       return Assembler::AVX_NoVec;
 2859     }
 2860   }
 2861 }
 2862 
 2863 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2864   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2865 }
 2866 
 2867 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2868   uint def_idx = use->operand_index(opnd);
 2869   Node* def = use->in(def_idx);
 2870   return vector_length_encoding(def);
 2871 }
 2872 
 2873 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2874   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2875          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2876 }
 2877 
 2878 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2879   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2880            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2881 }
 2882 
 2883 class Node::PD {
 2884 public:
 2885   enum NodeFlags : uint64_t {
 2886     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2887     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2888     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2889     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2890     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2891     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2892     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2893     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2894     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2895     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2896     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2897     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2898     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2899     _last_flag                = Flag_ndd_demotable_opr2
 2900   };
 2901 };
 2902 
 2903 %} // end source_hpp
 2904 
 2905 source %{
 2906 
 2907 #include "opto/addnode.hpp"
 2908 #include "c2_intelJccErratum_x86.hpp"
 2909 
 2910 void PhaseOutput::pd_perform_mach_node_analysis() {
 2911   if (VM_Version::has_intel_jcc_erratum()) {
 2912     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2913     _buf_sizes._code += extra_padding;
 2914   }
 2915 }
 2916 
 2917 int MachNode::pd_alignment_required() const {
 2918   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2919     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2920     return IntelJccErratum::largest_jcc_size() + 1;
 2921   } else {
 2922     return 1;
 2923   }
 2924 }
 2925 
 2926 int MachNode::compute_padding(int current_offset) const {
 2927   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2928     Compile* C = Compile::current();
 2929     PhaseOutput* output = C->output();
 2930     Block* block = output->block();
 2931     int index = output->index();
 2932     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2933   } else {
 2934     return 0;
 2935   }
 2936 }
 2937 
 2938 // Emit deopt handler code.
 2939 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2940 
 2941   // Note that the code buffer's insts_mark is always relative to insts.
 2942   // That's why we must use the macroassembler to generate a handler.
 2943   address base = __ start_a_stub(size_deopt_handler());
 2944   if (base == nullptr) {
 2945     ciEnv::current()->record_failure("CodeCache is full");
 2946     return 0;  // CodeBuffer::expand failed
 2947   }
 2948   int offset = __ offset();
 2949 
 2950   Label start;
 2951   __ bind(start);
 2952 
 2953   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2954 
 2955   int entry_offset = __ offset();
 2956 
 2957   __ jmp(start);
 2958 
 2959   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2960   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2961          "out of bounds read in post-call NOP check");
 2962   __ end_a_stub();
 2963   return entry_offset;
 2964 }
 2965 
 2966 static Assembler::Width widthForType(BasicType bt) {
 2967   if (bt == T_BYTE) {
 2968     return Assembler::B;
 2969   } else if (bt == T_SHORT) {
 2970     return Assembler::W;
 2971   } else if (bt == T_INT) {
 2972     return Assembler::D;
 2973   } else {
 2974     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2975     return Assembler::Q;
 2976   }
 2977 }
 2978 
 2979 //=============================================================================
 2980 
 2981   // Float masks come from different places depending on platform.
 2982   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2983   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2984   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2985   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2986   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2987   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2988   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2989   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2990   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2991   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2992   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2993   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2994   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2995   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2996   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2997   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2998   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2999   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3000   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3001 
 3002 //=============================================================================
 3003 bool Matcher::match_rule_supported(int opcode) {
 3004   if (!has_match_rule(opcode)) {
 3005     return false; // no match rule present
 3006   }
 3007   switch (opcode) {
 3008     case Op_AbsVL:
 3009     case Op_StoreVectorScatter:
 3010       if (UseAVX < 3) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_PopCountI:
 3015     case Op_PopCountL:
 3016       if (!UsePopCountInstruction) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_PopCountVI:
 3021       if (UseAVX < 2) {
 3022         return false;
 3023       }
 3024       break;
 3025     case Op_CompressV:
 3026     case Op_ExpandV:
 3027     case Op_PopCountVL:
 3028       if (UseAVX < 2) {
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_MulVI:
 3033       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulVL:
 3038       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_MulReductionVL:
 3043       if (VM_Version::supports_avx512dq() == false) {
 3044         return false;
 3045       }
 3046       break;
 3047     case Op_AbsVB:
 3048     case Op_AbsVS:
 3049     case Op_AbsVI:
 3050     case Op_AddReductionVI:
 3051     case Op_AndReductionV:
 3052     case Op_OrReductionV:
 3053     case Op_XorReductionV:
 3054       if (UseSSE < 3) { // requires at least SSSE3
 3055         return false;
 3056       }
 3057       break;
 3058     case Op_MaxHF:
 3059     case Op_MinHF:
 3060       if (!VM_Version::supports_avx512vlbw()) {
 3061         return false;
 3062       }  // fallthrough
 3063     case Op_AddHF:
 3064     case Op_DivHF:
 3065     case Op_FmaHF:
 3066     case Op_MulHF:
 3067     case Op_ReinterpretS2HF:
 3068     case Op_ReinterpretHF2S:
 3069     case Op_SubHF:
 3070     case Op_SqrtHF:
 3071       if (!VM_Version::supports_avx512_fp16()) {
 3072         return false;
 3073       }
 3074       break;
 3075     case Op_VectorLoadShuffle:
 3076     case Op_VectorRearrange:
 3077     case Op_MulReductionVI:
 3078       if (UseSSE < 4) { // requires at least SSE4
 3079         return false;
 3080       }
 3081       break;
 3082     case Op_IsInfiniteF:
 3083     case Op_IsInfiniteD:
 3084       if (!VM_Version::supports_avx512dq()) {
 3085         return false;
 3086       }
 3087       break;
 3088     case Op_SqrtVD:
 3089     case Op_SqrtVF:
 3090     case Op_VectorMaskCmp:
 3091     case Op_VectorCastB2X:
 3092     case Op_VectorCastS2X:
 3093     case Op_VectorCastI2X:
 3094     case Op_VectorCastL2X:
 3095     case Op_VectorCastF2X:
 3096     case Op_VectorCastD2X:
 3097     case Op_VectorUCastB2X:
 3098     case Op_VectorUCastS2X:
 3099     case Op_VectorUCastI2X:
 3100     case Op_VectorMaskCast:
 3101       if (UseAVX < 1) { // enabled for AVX only
 3102         return false;
 3103       }
 3104       break;
 3105     case Op_PopulateIndex:
 3106       if (UseAVX < 2) {
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_RoundVF:
 3111       if (UseAVX < 2) { // enabled for AVX2 only
 3112         return false;
 3113       }
 3114       break;
 3115     case Op_RoundVD:
 3116       if (UseAVX < 3) {
 3117         return false;  // enabled for AVX3 only
 3118       }
 3119       break;
 3120     case Op_CompareAndSwapL:
 3121     case Op_CompareAndSwapP:
 3122       break;
 3123     case Op_StrIndexOf:
 3124       if (!UseSSE42Intrinsics) {
 3125         return false;
 3126       }
 3127       break;
 3128     case Op_StrIndexOfChar:
 3129       if (!UseSSE42Intrinsics) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_OnSpinWait:
 3134       if (VM_Version::supports_on_spin_wait() == false) {
 3135         return false;
 3136       }
 3137       break;
 3138     case Op_MulVB:
 3139     case Op_LShiftVB:
 3140     case Op_RShiftVB:
 3141     case Op_URShiftVB:
 3142     case Op_VectorInsert:
 3143     case Op_VectorLoadMask:
 3144     case Op_VectorStoreMask:
 3145     case Op_VectorBlend:
 3146       if (UseSSE < 4) {
 3147         return false;
 3148       }
 3149       break;
 3150     case Op_MaxD:
 3151     case Op_MaxF:
 3152     case Op_MinD:
 3153     case Op_MinF:
 3154       if (UseAVX < 1) { // enabled for AVX only
 3155         return false;
 3156       }
 3157       break;
 3158     case Op_CacheWB:
 3159     case Op_CacheWBPreSync:
 3160     case Op_CacheWBPostSync:
 3161       if (!VM_Version::supports_data_cache_line_flush()) {
 3162         return false;
 3163       }
 3164       break;
 3165     case Op_ExtractB:
 3166     case Op_ExtractL:
 3167     case Op_ExtractI:
 3168     case Op_RoundDoubleMode:
 3169       if (UseSSE < 4) {
 3170         return false;
 3171       }
 3172       break;
 3173     case Op_RoundDoubleModeV:
 3174       if (VM_Version::supports_avx() == false) {
 3175         return false; // 128bit vroundpd is not available
 3176       }
 3177       break;
 3178     case Op_LoadVectorGather:
 3179     case Op_LoadVectorGatherMasked:
 3180       if (UseAVX < 2) {
 3181         return false;
 3182       }
 3183       break;
 3184     case Op_FmaF:
 3185     case Op_FmaD:
 3186     case Op_FmaVD:
 3187     case Op_FmaVF:
 3188       if (!UseFMA) {
 3189         return false;
 3190       }
 3191       break;
 3192     case Op_MacroLogicV:
 3193       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3194         return false;
 3195       }
 3196       break;
 3197 
 3198     case Op_VectorCmpMasked:
 3199     case Op_VectorMaskGen:
 3200       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3201         return false;
 3202       }
 3203       break;
 3204     case Op_VectorMaskFirstTrue:
 3205     case Op_VectorMaskLastTrue:
 3206     case Op_VectorMaskTrueCount:
 3207     case Op_VectorMaskToLong:
 3208       if (UseAVX < 1) {
 3209          return false;
 3210       }
 3211       break;
 3212     case Op_RoundF:
 3213     case Op_RoundD:
 3214       break;
 3215     case Op_CopySignD:
 3216     case Op_CopySignF:
 3217       if (UseAVX < 3)  {
 3218         return false;
 3219       }
 3220       if (!VM_Version::supports_avx512vl()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_CompressBits:
 3225     case Op_ExpandBits:
 3226       if (!VM_Version::supports_bmi2()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_CompressM:
 3231       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3232         return false;
 3233       }
 3234       break;
 3235     case Op_ConvF2HF:
 3236     case Op_ConvHF2F:
 3237       if (!VM_Version::supports_float16()) {
 3238         return false;
 3239       }
 3240       break;
 3241     case Op_VectorCastF2HF:
 3242     case Op_VectorCastHF2F:
 3243       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3244         return false;
 3245       }
 3246       break;
 3247   }
 3248   return true;  // Match rules are supported by default.
 3249 }
 3250 
 3251 //------------------------------------------------------------------------
 3252 
 3253 static inline bool is_pop_count_instr_target(BasicType bt) {
 3254   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3255          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3256 }
 3257 
 3258 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3259   return match_rule_supported_vector(opcode, vlen, bt);
 3260 }
 3261 
 3262 // Identify extra cases that we might want to provide match rules for vector nodes and
 3263 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3264 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3265   if (!match_rule_supported(opcode)) {
 3266     return false;
 3267   }
 3268   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3269   //   * SSE2 supports 128bit vectors for all types;
 3270   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3271   //   * AVX2 supports 256bit vectors for all types;
 3272   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3273   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3274   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3275   // And MaxVectorSize is taken into account as well.
 3276   if (!vector_size_supported(bt, vlen)) {
 3277     return false;
 3278   }
 3279   // Special cases which require vector length follow:
 3280   //   * implementation limitations
 3281   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3282   //   * 128bit vroundpd instruction is present only in AVX1
 3283   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3284   switch (opcode) {
 3285     case Op_MaxVHF:
 3286     case Op_MinVHF:
 3287       if (!VM_Version::supports_avx512bw()) {
 3288         return false;
 3289       }
 3290     case Op_AddVHF:
 3291     case Op_DivVHF:
 3292     case Op_FmaVHF:
 3293     case Op_MulVHF:
 3294     case Op_SubVHF:
 3295     case Op_SqrtVHF:
 3296       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3297         return false;
 3298       }
 3299       if (!VM_Version::supports_avx512_fp16()) {
 3300         return false;
 3301       }
 3302       break;
 3303     case Op_AbsVF:
 3304     case Op_NegVF:
 3305       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3306         return false; // 512bit vandps and vxorps are not available
 3307       }
 3308       break;
 3309     case Op_AbsVD:
 3310     case Op_NegVD:
 3311       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3312         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3313       }
 3314       break;
 3315     case Op_RotateRightV:
 3316     case Op_RotateLeftV:
 3317       if (bt != T_INT && bt != T_LONG) {
 3318         return false;
 3319       } // fallthrough
 3320     case Op_MacroLogicV:
 3321       if (!VM_Version::supports_evex() ||
 3322           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3323         return false;
 3324       }
 3325       break;
 3326     case Op_ClearArray:
 3327     case Op_VectorMaskGen:
 3328     case Op_VectorCmpMasked:
 3329       if (!VM_Version::supports_avx512bw()) {
 3330         return false;
 3331       }
 3332       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3333         return false;
 3334       }
 3335       break;
 3336     case Op_LoadVectorMasked:
 3337     case Op_StoreVectorMasked:
 3338       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3339         return false;
 3340       }
 3341       break;
 3342     case Op_UMinV:
 3343     case Op_UMaxV:
 3344       if (UseAVX == 0) {
 3345         return false;
 3346       }
 3347       break;
 3348     case Op_UMinReductionV:
 3349     case Op_UMaxReductionV:
 3350       if (UseAVX == 0) {
 3351         return false;
 3352       }
 3353       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3354         return false;
 3355       }
 3356       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3357         return false;
 3358       }
 3359       break;
 3360     case Op_MaxV:
 3361     case Op_MinV:
 3362       if (UseSSE < 4 && is_integral_type(bt)) {
 3363         return false;
 3364       }
 3365       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3366           // Float/Double intrinsics are enabled for AVX family currently.
 3367           if (UseAVX == 0) {
 3368             return false;
 3369           }
 3370           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3371             return false;
 3372           }
 3373       }
 3374       break;
 3375     case Op_CallLeafVector:
 3376       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3377         return false;
 3378       }
 3379       break;
 3380     case Op_AddReductionVI:
 3381       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3382         return false;
 3383       }
 3384       // fallthrough
 3385     case Op_AndReductionV:
 3386     case Op_OrReductionV:
 3387     case Op_XorReductionV:
 3388       if (is_subword_type(bt) && (UseSSE < 4)) {
 3389         return false;
 3390       }
 3391       break;
 3392     case Op_MinReductionV:
 3393     case Op_MaxReductionV:
 3394       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3395         return false;
 3396       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3397         return false;
 3398       }
 3399       // Float/Double intrinsics enabled for AVX family.
 3400       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3401         return false;
 3402       }
 3403       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3404         return false;
 3405       }
 3406       break;
 3407     case Op_VectorBlend:
 3408       if (UseAVX == 0 && size_in_bits < 128) {
 3409         return false;
 3410       }
 3411       break;
 3412     case Op_VectorTest:
 3413       if (UseSSE < 4) {
 3414         return false; // Implementation limitation
 3415       } else if (size_in_bits < 32) {
 3416         return false; // Implementation limitation
 3417       }
 3418       break;
 3419     case Op_VectorLoadShuffle:
 3420     case Op_VectorRearrange:
 3421       if(vlen == 2) {
 3422         return false; // Implementation limitation due to how shuffle is loaded
 3423       } else if (size_in_bits == 256 && UseAVX < 2) {
 3424         return false; // Implementation limitation
 3425       }
 3426       break;
 3427     case Op_VectorLoadMask:
 3428     case Op_VectorMaskCast:
 3429       if (size_in_bits == 256 && UseAVX < 2) {
 3430         return false; // Implementation limitation
 3431       }
 3432       // fallthrough
 3433     case Op_VectorStoreMask:
 3434       if (vlen == 2) {
 3435         return false; // Implementation limitation
 3436       }
 3437       break;
 3438     case Op_PopulateIndex:
 3439       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3440         return false;
 3441       }
 3442       break;
 3443     case Op_VectorCastB2X:
 3444     case Op_VectorCastS2X:
 3445     case Op_VectorCastI2X:
 3446       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3447         return false;
 3448       }
 3449       break;
 3450     case Op_VectorCastL2X:
 3451       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3452         return false;
 3453       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3454         return false;
 3455       }
 3456       break;
 3457     case Op_VectorCastF2X: {
 3458         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3459         // happen after intermediate conversion to integer and special handling
 3460         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3461         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3462         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3463           return false;
 3464         }
 3465       }
 3466       // fallthrough
 3467     case Op_VectorCastD2X:
 3468       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_VectorCastF2HF:
 3473     case Op_VectorCastHF2F:
 3474       if (!VM_Version::supports_f16c() &&
 3475          ((!VM_Version::supports_evex() ||
 3476          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3477         return false;
 3478       }
 3479       break;
 3480     case Op_RoundVD:
 3481       if (!VM_Version::supports_avx512dq()) {
 3482         return false;
 3483       }
 3484       break;
 3485     case Op_MulReductionVI:
 3486       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3487         return false;
 3488       }
 3489       break;
 3490     case Op_LoadVectorGatherMasked:
 3491       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3492         return false;
 3493       }
 3494       if (is_subword_type(bt) &&
 3495          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3496           (size_in_bits < 64)                                      ||
 3497           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3498         return false;
 3499       }
 3500       break;
 3501     case Op_StoreVectorScatterMasked:
 3502     case Op_StoreVectorScatter:
 3503       if (is_subword_type(bt)) {
 3504         return false;
 3505       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3506         return false;
 3507       }
 3508       // fallthrough
 3509     case Op_LoadVectorGather:
 3510       if (!is_subword_type(bt) && size_in_bits == 64) {
 3511         return false;
 3512       }
 3513       if (is_subword_type(bt) && size_in_bits < 64) {
 3514         return false;
 3515       }
 3516       break;
 3517     case Op_SaturatingAddV:
 3518     case Op_SaturatingSubV:
 3519       if (UseAVX < 1) {
 3520         return false; // Implementation limitation
 3521       }
 3522       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3523         return false;
 3524       }
 3525       break;
 3526     case Op_SelectFromTwoVector:
 3527        if (size_in_bits < 128) {
 3528          return false;
 3529        }
 3530        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3531          return false;
 3532        }
 3533        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3534          return false;
 3535        }
 3536        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3537          return false;
 3538        }
 3539        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3540          return false;
 3541        }
 3542        break;
 3543     case Op_MaskAll:
 3544       if (!VM_Version::supports_evex()) {
 3545         return false;
 3546       }
 3547       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3548         return false;
 3549       }
 3550       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3551         return false;
 3552       }
 3553       break;
 3554     case Op_VectorMaskCmp:
 3555       if (vlen < 2 || size_in_bits < 32) {
 3556         return false;
 3557       }
 3558       break;
 3559     case Op_CompressM:
 3560       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3561         return false;
 3562       }
 3563       break;
 3564     case Op_CompressV:
 3565     case Op_ExpandV:
 3566       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3567         return false;
 3568       }
 3569       if (size_in_bits < 128 ) {
 3570         return false;
 3571       }
 3572     case Op_VectorLongToMask:
 3573       if (UseAVX < 1) {
 3574         return false;
 3575       }
 3576       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3577         return false;
 3578       }
 3579       break;
 3580     case Op_SignumVD:
 3581     case Op_SignumVF:
 3582       if (UseAVX < 1) {
 3583         return false;
 3584       }
 3585       break;
 3586     case Op_PopCountVI:
 3587     case Op_PopCountVL: {
 3588         if (!is_pop_count_instr_target(bt) &&
 3589             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3590           return false;
 3591         }
 3592       }
 3593       break;
 3594     case Op_ReverseV:
 3595     case Op_ReverseBytesV:
 3596       if (UseAVX < 2) {
 3597         return false;
 3598       }
 3599       break;
 3600     case Op_CountTrailingZerosV:
 3601     case Op_CountLeadingZerosV:
 3602       if (UseAVX < 2) {
 3603         return false;
 3604       }
 3605       break;
 3606   }
 3607   return true;  // Per default match rules are supported.
 3608 }
 3609 
 3610 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3611   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3612   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3613   // of their non-masked counterpart with mask edge being the differentiator.
 3614   // This routine does a strict check on the existence of masked operation patterns
 3615   // by returning a default false value for all the other opcodes apart from the
 3616   // ones whose masked instruction patterns are defined in this file.
 3617   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3618     return false;
 3619   }
 3620 
 3621   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3622   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3623     return false;
 3624   }
 3625   switch(opcode) {
 3626     // Unary masked operations
 3627     case Op_AbsVB:
 3628     case Op_AbsVS:
 3629       if(!VM_Version::supports_avx512bw()) {
 3630         return false;  // Implementation limitation
 3631       }
 3632     case Op_AbsVI:
 3633     case Op_AbsVL:
 3634       return true;
 3635 
 3636     // Ternary masked operations
 3637     case Op_FmaVF:
 3638     case Op_FmaVD:
 3639       return true;
 3640 
 3641     case Op_MacroLogicV:
 3642       if(bt != T_INT && bt != T_LONG) {
 3643         return false;
 3644       }
 3645       return true;
 3646 
 3647     // Binary masked operations
 3648     case Op_AddVB:
 3649     case Op_AddVS:
 3650     case Op_SubVB:
 3651     case Op_SubVS:
 3652     case Op_MulVS:
 3653     case Op_LShiftVS:
 3654     case Op_RShiftVS:
 3655     case Op_URShiftVS:
 3656       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3657       if (!VM_Version::supports_avx512bw()) {
 3658         return false;  // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_MulVL:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (!VM_Version::supports_avx512dq()) {
 3665         return false;  // Implementation limitation
 3666       }
 3667       return true;
 3668 
 3669     case Op_AndV:
 3670     case Op_OrV:
 3671     case Op_XorV:
 3672     case Op_RotateRightV:
 3673     case Op_RotateLeftV:
 3674       if (bt != T_INT && bt != T_LONG) {
 3675         return false; // Implementation limitation
 3676       }
 3677       return true;
 3678 
 3679     case Op_VectorLoadMask:
 3680       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3681       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3682         return false;
 3683       }
 3684       return true;
 3685 
 3686     case Op_AddVI:
 3687     case Op_AddVL:
 3688     case Op_AddVF:
 3689     case Op_AddVD:
 3690     case Op_SubVI:
 3691     case Op_SubVL:
 3692     case Op_SubVF:
 3693     case Op_SubVD:
 3694     case Op_MulVI:
 3695     case Op_MulVF:
 3696     case Op_MulVD:
 3697     case Op_DivVF:
 3698     case Op_DivVD:
 3699     case Op_SqrtVF:
 3700     case Op_SqrtVD:
 3701     case Op_LShiftVI:
 3702     case Op_LShiftVL:
 3703     case Op_RShiftVI:
 3704     case Op_RShiftVL:
 3705     case Op_URShiftVI:
 3706     case Op_URShiftVL:
 3707     case Op_LoadVectorMasked:
 3708     case Op_StoreVectorMasked:
 3709     case Op_LoadVectorGatherMasked:
 3710     case Op_StoreVectorScatterMasked:
 3711       return true;
 3712 
 3713     case Op_UMinV:
 3714     case Op_UMaxV:
 3715       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3716         return false;
 3717       } // fallthrough
 3718     case Op_MaxV:
 3719     case Op_MinV:
 3720       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3721         return false; // Implementation limitation
 3722       }
 3723       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3724         return false; // Implementation limitation
 3725       }
 3726       return true;
 3727     case Op_SaturatingAddV:
 3728     case Op_SaturatingSubV:
 3729       if (!is_subword_type(bt)) {
 3730         return false;
 3731       }
 3732       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     case Op_VectorMaskCmp:
 3738       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3739         return false; // Implementation limitation
 3740       }
 3741       return true;
 3742 
 3743     case Op_VectorRearrange:
 3744       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3745         return false; // Implementation limitation
 3746       }
 3747       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3748         return false; // Implementation limitation
 3749       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3750         return false; // Implementation limitation
 3751       }
 3752       return true;
 3753 
 3754     // Binary Logical operations
 3755     case Op_AndVMask:
 3756     case Op_OrVMask:
 3757     case Op_XorVMask:
 3758       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3759         return false; // Implementation limitation
 3760       }
 3761       return true;
 3762 
 3763     case Op_PopCountVI:
 3764     case Op_PopCountVL:
 3765       if (!is_pop_count_instr_target(bt)) {
 3766         return false;
 3767       }
 3768       return true;
 3769 
 3770     case Op_MaskAll:
 3771       return true;
 3772 
 3773     case Op_CountLeadingZerosV:
 3774       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3775         return true;
 3776       }
 3777     default:
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3783   return false;
 3784 }
 3785 
 3786 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3787 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3788   switch (elem_bt) {
 3789     case T_BYTE:  return false;
 3790     case T_SHORT: return !VM_Version::supports_avx512bw();
 3791     case T_INT:   return !VM_Version::supports_avx();
 3792     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3793     default:
 3794       ShouldNotReachHere();
 3795       return false;
 3796   }
 3797 }
 3798 
 3799 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3800   // Prefer predicate if the mask type is "TypeVectMask".
 3801   return vt->isa_vectmask() != nullptr;
 3802 }
 3803 
 3804 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3805   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3806   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3807   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3808       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3809     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3810     return new legVecZOper();
 3811   }
 3812   if (legacy) {
 3813     switch (ideal_reg) {
 3814       case Op_VecS: return new legVecSOper();
 3815       case Op_VecD: return new legVecDOper();
 3816       case Op_VecX: return new legVecXOper();
 3817       case Op_VecY: return new legVecYOper();
 3818       case Op_VecZ: return new legVecZOper();
 3819     }
 3820   } else {
 3821     switch (ideal_reg) {
 3822       case Op_VecS: return new vecSOper();
 3823       case Op_VecD: return new vecDOper();
 3824       case Op_VecX: return new vecXOper();
 3825       case Op_VecY: return new vecYOper();
 3826       case Op_VecZ: return new vecZOper();
 3827     }
 3828   }
 3829   ShouldNotReachHere();
 3830   return nullptr;
 3831 }
 3832 
 3833 bool Matcher::is_reg2reg_move(MachNode* m) {
 3834   switch (m->rule()) {
 3835     case MoveVec2Leg_rule:
 3836     case MoveLeg2Vec_rule:
 3837     case MoveF2VL_rule:
 3838     case MoveF2LEG_rule:
 3839     case MoveVL2F_rule:
 3840     case MoveLEG2F_rule:
 3841     case MoveD2VL_rule:
 3842     case MoveD2LEG_rule:
 3843     case MoveVL2D_rule:
 3844     case MoveLEG2D_rule:
 3845       return true;
 3846     default:
 3847       return false;
 3848   }
 3849 }
 3850 
 3851 bool Matcher::is_generic_vector(MachOper* opnd) {
 3852   switch (opnd->opcode()) {
 3853     case VEC:
 3854     case LEGVEC:
 3855       return true;
 3856     default:
 3857       return false;
 3858   }
 3859 }
 3860 
 3861 //------------------------------------------------------------------------
 3862 
 3863 const RegMask* Matcher::predicate_reg_mask(void) {
 3864   return &_VECTMASK_REG_mask;
 3865 }
 3866 
 3867 // Max vector size in bytes. 0 if not supported.
 3868 int Matcher::vector_width_in_bytes(BasicType bt) {
 3869   assert(is_java_primitive(bt), "only primitive type vectors");
 3870   // SSE2 supports 128bit vectors for all types.
 3871   // AVX2 supports 256bit vectors for all types.
 3872   // AVX2/EVEX supports 512bit vectors for all types.
 3873   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3874   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3875   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3876     size = (UseAVX > 2) ? 64 : 32;
 3877   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3878     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3879   // Use flag to limit vector size.
 3880   size = MIN2(size,(int)MaxVectorSize);
 3881   // Minimum 2 values in vector (or 4 for bytes).
 3882   switch (bt) {
 3883   case T_DOUBLE:
 3884   case T_LONG:
 3885     if (size < 16) return 0;
 3886     break;
 3887   case T_FLOAT:
 3888   case T_INT:
 3889     if (size < 8) return 0;
 3890     break;
 3891   case T_BOOLEAN:
 3892     if (size < 4) return 0;
 3893     break;
 3894   case T_CHAR:
 3895     if (size < 4) return 0;
 3896     break;
 3897   case T_BYTE:
 3898     if (size < 4) return 0;
 3899     break;
 3900   case T_SHORT:
 3901     if (size < 4) return 0;
 3902     break;
 3903   default:
 3904     ShouldNotReachHere();
 3905   }
 3906   return size;
 3907 }
 3908 
 3909 // Limits on vector size (number of elements) loaded into vector.
 3910 int Matcher::max_vector_size(const BasicType bt) {
 3911   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3912 }
 3913 int Matcher::min_vector_size(const BasicType bt) {
 3914   int max_size = max_vector_size(bt);
 3915   // Min size which can be loaded into vector is 4 bytes.
 3916   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3917   // Support for calling svml double64 vectors
 3918   if (bt == T_DOUBLE) {
 3919     size = 1;
 3920   }
 3921   return MIN2(size,max_size);
 3922 }
 3923 
 3924 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3925   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3926   // by default on Cascade Lake
 3927   if (VM_Version::is_default_intel_cascade_lake()) {
 3928     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3929   }
 3930   return Matcher::max_vector_size(bt);
 3931 }
 3932 
 3933 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3934   return -1;
 3935 }
 3936 
 3937 // Vector ideal reg corresponding to specified size in bytes
 3938 uint Matcher::vector_ideal_reg(int size) {
 3939   assert(MaxVectorSize >= size, "");
 3940   switch(size) {
 3941     case  4: return Op_VecS;
 3942     case  8: return Op_VecD;
 3943     case 16: return Op_VecX;
 3944     case 32: return Op_VecY;
 3945     case 64: return Op_VecZ;
 3946   }
 3947   ShouldNotReachHere();
 3948   return 0;
 3949 }
 3950 
 3951 // Check for shift by small constant as well
 3952 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3953   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3954       shift->in(2)->get_int() <= 3 &&
 3955       // Are there other uses besides address expressions?
 3956       !matcher->is_visited(shift)) {
 3957     address_visited.set(shift->_idx); // Flag as address_visited
 3958     mstack.push(shift->in(2), Matcher::Visit);
 3959     Node *conv = shift->in(1);
 3960     // Allow Matcher to match the rule which bypass
 3961     // ConvI2L operation for an array index on LP64
 3962     // if the index value is positive.
 3963     if (conv->Opcode() == Op_ConvI2L &&
 3964         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3965         // Are there other uses besides address expressions?
 3966         !matcher->is_visited(conv)) {
 3967       address_visited.set(conv->_idx); // Flag as address_visited
 3968       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3969     } else {
 3970       mstack.push(conv, Matcher::Pre_Visit);
 3971     }
 3972     return true;
 3973   }
 3974   return false;
 3975 }
 3976 
 3977 // This function identifies sub-graphs in which a 'load' node is
 3978 // input to two different nodes, and such that it can be matched
 3979 // with BMI instructions like blsi, blsr, etc.
 3980 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3981 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3982 // refers to the same node.
 3983 //
 3984 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3985 // This is a temporary solution until we make DAGs expressible in ADL.
 3986 template<typename ConType>
 3987 class FusedPatternMatcher {
 3988   Node* _op1_node;
 3989   Node* _mop_node;
 3990   int _con_op;
 3991 
 3992   static int match_next(Node* n, int next_op, int next_op_idx) {
 3993     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3994       return -1;
 3995     }
 3996 
 3997     if (next_op_idx == -1) { // n is commutative, try rotations
 3998       if (n->in(1)->Opcode() == next_op) {
 3999         return 1;
 4000       } else if (n->in(2)->Opcode() == next_op) {
 4001         return 2;
 4002       }
 4003     } else {
 4004       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4005       if (n->in(next_op_idx)->Opcode() == next_op) {
 4006         return next_op_idx;
 4007       }
 4008     }
 4009     return -1;
 4010   }
 4011 
 4012  public:
 4013   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4014     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4015 
 4016   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4017              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4018              typename ConType::NativeType con_value) {
 4019     if (_op1_node->Opcode() != op1) {
 4020       return false;
 4021     }
 4022     if (_mop_node->outcnt() > 2) {
 4023       return false;
 4024     }
 4025     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4026     if (op1_op2_idx == -1) {
 4027       return false;
 4028     }
 4029     // Memory operation must be the other edge
 4030     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4031 
 4032     // Check that the mop node is really what we want
 4033     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4034       Node* op2_node = _op1_node->in(op1_op2_idx);
 4035       if (op2_node->outcnt() > 1) {
 4036         return false;
 4037       }
 4038       assert(op2_node->Opcode() == op2, "Should be");
 4039       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4040       if (op2_con_idx == -1) {
 4041         return false;
 4042       }
 4043       // Memory operation must be the other edge
 4044       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4045       // Check that the memory operation is the same node
 4046       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4047         // Now check the constant
 4048         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4049         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4050           return true;
 4051         }
 4052       }
 4053     }
 4054     return false;
 4055   }
 4056 };
 4057 
 4058 static bool is_bmi_pattern(Node* n, Node* m) {
 4059   assert(UseBMI1Instructions, "sanity");
 4060   if (n != nullptr && m != nullptr) {
 4061     if (m->Opcode() == Op_LoadI) {
 4062       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4063       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4064              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4065              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4066     } else if (m->Opcode() == Op_LoadL) {
 4067       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4068       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4069              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4070              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4071     }
 4072   }
 4073   return false;
 4074 }
 4075 
 4076 // Should the matcher clone input 'm' of node 'n'?
 4077 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4078   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4079   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4080     mstack.push(m, Visit);
 4081     return true;
 4082   }
 4083   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4084     mstack.push(m, Visit);           // m = ShiftCntV
 4085     return true;
 4086   }
 4087   if (is_encode_and_store_pattern(n, m)) {
 4088     mstack.push(m, Visit);
 4089     return true;
 4090   }
 4091   return false;
 4092 }
 4093 
 4094 // Should the Matcher clone shifts on addressing modes, expecting them
 4095 // to be subsumed into complex addressing expressions or compute them
 4096 // into registers?
 4097 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4098   Node *off = m->in(AddPNode::Offset);
 4099   if (off->is_Con()) {
 4100     address_visited.test_set(m->_idx); // Flag as address_visited
 4101     Node *adr = m->in(AddPNode::Address);
 4102 
 4103     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4104     // AtomicAdd is not an addressing expression.
 4105     // Cheap to find it by looking for screwy base.
 4106     if (adr->is_AddP() &&
 4107         !adr->in(AddPNode::Base)->is_top() &&
 4108         !adr->in(AddPNode::Offset)->is_Con() &&
 4109         off->get_long() == (int) (off->get_long()) && // immL32
 4110         // Are there other uses besides address expressions?
 4111         !is_visited(adr)) {
 4112       address_visited.set(adr->_idx); // Flag as address_visited
 4113       Node *shift = adr->in(AddPNode::Offset);
 4114       if (!clone_shift(shift, this, mstack, address_visited)) {
 4115         mstack.push(shift, Pre_Visit);
 4116       }
 4117       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4118       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4119     } else {
 4120       mstack.push(adr, Pre_Visit);
 4121     }
 4122 
 4123     // Clone X+offset as it also folds into most addressing expressions
 4124     mstack.push(off, Visit);
 4125     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4126     return true;
 4127   } else if (clone_shift(off, this, mstack, address_visited)) {
 4128     address_visited.test_set(m->_idx); // Flag as address_visited
 4129     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4130     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4131     return true;
 4132   }
 4133   return false;
 4134 }
 4135 
 4136 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4137   switch (bt) {
 4138     case BoolTest::eq:
 4139       return Assembler::eq;
 4140     case BoolTest::ne:
 4141       return Assembler::neq;
 4142     case BoolTest::le:
 4143     case BoolTest::ule:
 4144       return Assembler::le;
 4145     case BoolTest::ge:
 4146     case BoolTest::uge:
 4147       return Assembler::nlt;
 4148     case BoolTest::lt:
 4149     case BoolTest::ult:
 4150       return Assembler::lt;
 4151     case BoolTest::gt:
 4152     case BoolTest::ugt:
 4153       return Assembler::nle;
 4154     default : ShouldNotReachHere(); return Assembler::_false;
 4155   }
 4156 }
 4157 
 4158 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4159   switch (bt) {
 4160   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4161   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4162   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4163   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4164   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4165   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4166   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4167   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4168   }
 4169 }
 4170 
 4171 // Helper methods for MachSpillCopyNode::implementation().
 4172 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4173                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4174   assert(ireg == Op_VecS || // 32bit vector
 4175          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4176           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4177          "no non-adjacent vector moves" );
 4178   if (masm) {
 4179     switch (ireg) {
 4180     case Op_VecS: // copy whole register
 4181     case Op_VecD:
 4182     case Op_VecX:
 4183       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4184         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4185       } else {
 4186         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4187      }
 4188       break;
 4189     case Op_VecY:
 4190       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4191         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4192       } else {
 4193         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4194      }
 4195       break;
 4196     case Op_VecZ:
 4197       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4198       break;
 4199     default:
 4200       ShouldNotReachHere();
 4201     }
 4202 #ifndef PRODUCT
 4203   } else {
 4204     switch (ireg) {
 4205     case Op_VecS:
 4206     case Op_VecD:
 4207     case Op_VecX:
 4208       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4209       break;
 4210     case Op_VecY:
 4211     case Op_VecZ:
 4212       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4213       break;
 4214     default:
 4215       ShouldNotReachHere();
 4216     }
 4217 #endif
 4218   }
 4219 }
 4220 
 4221 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4222                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4223   if (masm) {
 4224     if (is_load) {
 4225       switch (ireg) {
 4226       case Op_VecS:
 4227         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4228         break;
 4229       case Op_VecD:
 4230         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4231         break;
 4232       case Op_VecX:
 4233         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4234           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4235         } else {
 4236           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4237           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4238         }
 4239         break;
 4240       case Op_VecY:
 4241         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4242           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4243         } else {
 4244           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4245           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4246         }
 4247         break;
 4248       case Op_VecZ:
 4249         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4250         break;
 4251       default:
 4252         ShouldNotReachHere();
 4253       }
 4254     } else { // store
 4255       switch (ireg) {
 4256       case Op_VecS:
 4257         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4258         break;
 4259       case Op_VecD:
 4260         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4261         break;
 4262       case Op_VecX:
 4263         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4264           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4265         }
 4266         else {
 4267           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4268         }
 4269         break;
 4270       case Op_VecY:
 4271         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4272           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4273         }
 4274         else {
 4275           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4276         }
 4277         break;
 4278       case Op_VecZ:
 4279         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4280         break;
 4281       default:
 4282         ShouldNotReachHere();
 4283       }
 4284     }
 4285 #ifndef PRODUCT
 4286   } else {
 4287     if (is_load) {
 4288       switch (ireg) {
 4289       case Op_VecS:
 4290         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4291         break;
 4292       case Op_VecD:
 4293         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4294         break;
 4295        case Op_VecX:
 4296         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4297         break;
 4298       case Op_VecY:
 4299       case Op_VecZ:
 4300         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4301         break;
 4302       default:
 4303         ShouldNotReachHere();
 4304       }
 4305     } else { // store
 4306       switch (ireg) {
 4307       case Op_VecS:
 4308         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4309         break;
 4310       case Op_VecD:
 4311         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4312         break;
 4313        case Op_VecX:
 4314         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4315         break;
 4316       case Op_VecY:
 4317       case Op_VecZ:
 4318         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4319         break;
 4320       default:
 4321         ShouldNotReachHere();
 4322       }
 4323     }
 4324 #endif
 4325   }
 4326 }
 4327 
 4328 template <class T>
 4329 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4330   int size = type2aelembytes(bt) * len;
 4331   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4332   for (int i = 0; i < len; i++) {
 4333     int offset = i * type2aelembytes(bt);
 4334     switch (bt) {
 4335       case T_BYTE: val->at(i) = con; break;
 4336       case T_SHORT: {
 4337         jshort c = con;
 4338         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4339         break;
 4340       }
 4341       case T_INT: {
 4342         jint c = con;
 4343         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4344         break;
 4345       }
 4346       case T_LONG: {
 4347         jlong c = con;
 4348         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4349         break;
 4350       }
 4351       case T_FLOAT: {
 4352         jfloat c = con;
 4353         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4354         break;
 4355       }
 4356       case T_DOUBLE: {
 4357         jdouble c = con;
 4358         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4359         break;
 4360       }
 4361       default: assert(false, "%s", type2name(bt));
 4362     }
 4363   }
 4364   return val;
 4365 }
 4366 
 4367 static inline jlong high_bit_set(BasicType bt) {
 4368   switch (bt) {
 4369     case T_BYTE:  return 0x8080808080808080;
 4370     case T_SHORT: return 0x8000800080008000;
 4371     case T_INT:   return 0x8000000080000000;
 4372     case T_LONG:  return 0x8000000000000000;
 4373     default:
 4374       ShouldNotReachHere();
 4375       return 0;
 4376   }
 4377 }
 4378 
 4379 #ifndef PRODUCT
 4380   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4381     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4382   }
 4383 #endif
 4384 
 4385   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4386     __ nop(_count);
 4387   }
 4388 
 4389   uint MachNopNode::size(PhaseRegAlloc*) const {
 4390     return _count;
 4391   }
 4392 
 4393 #ifndef PRODUCT
 4394   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4395     st->print("# breakpoint");
 4396   }
 4397 #endif
 4398 
 4399   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4400     __ int3();
 4401   }
 4402 
 4403   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4404     return MachNode::size(ra_);
 4405   }
 4406 
 4407 %}
 4408 
 4409 //----------ENCODING BLOCK-----------------------------------------------------
 4410 // This block specifies the encoding classes used by the compiler to
 4411 // output byte streams.  Encoding classes are parameterized macros
 4412 // used by Machine Instruction Nodes in order to generate the bit
 4413 // encoding of the instruction.  Operands specify their base encoding
 4414 // interface with the interface keyword.  There are currently
 4415 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4416 // COND_INTER.  REG_INTER causes an operand to generate a function
 4417 // which returns its register number when queried.  CONST_INTER causes
 4418 // an operand to generate a function which returns the value of the
 4419 // constant when queried.  MEMORY_INTER causes an operand to generate
 4420 // four functions which return the Base Register, the Index Register,
 4421 // the Scale Value, and the Offset Value of the operand when queried.
 4422 // COND_INTER causes an operand to generate six functions which return
 4423 // the encoding code (ie - encoding bits for the instruction)
 4424 // associated with each basic boolean condition for a conditional
 4425 // instruction.
 4426 //
 4427 // Instructions specify two basic values for encoding.  Again, a
 4428 // function is available to check if the constant displacement is an
 4429 // oop. They use the ins_encode keyword to specify their encoding
 4430 // classes (which must be a sequence of enc_class names, and their
 4431 // parameters, specified in the encoding block), and they use the
 4432 // opcode keyword to specify, in order, their primary, secondary, and
 4433 // tertiary opcode.  Only the opcode sections which a particular
 4434 // instruction needs for encoding need to be specified.
 4435 encode %{
 4436   enc_class cdql_enc(no_rax_rdx_RegI div)
 4437   %{
 4438     // Full implementation of Java idiv and irem; checks for
 4439     // special case as described in JVM spec., p.243 & p.271.
 4440     //
 4441     //         normal case                           special case
 4442     //
 4443     // input : rax: dividend                         min_int
 4444     //         reg: divisor                          -1
 4445     //
 4446     // output: rax: quotient  (= rax idiv reg)       min_int
 4447     //         rdx: remainder (= rax irem reg)       0
 4448     //
 4449     //  Code sequnce:
 4450     //
 4451     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4452     //    5:   75 07/08                jne    e <normal>
 4453     //    7:   33 d2                   xor    %edx,%edx
 4454     //  [div >= 8 -> offset + 1]
 4455     //  [REX_B]
 4456     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4457     //    c:   74 03/04                je     11 <done>
 4458     // 000000000000000e <normal>:
 4459     //    e:   99                      cltd
 4460     //  [div >= 8 -> offset + 1]
 4461     //  [REX_B]
 4462     //    f:   f7 f9                   idiv   $div
 4463     // 0000000000000011 <done>:
 4464     Label normal;
 4465     Label done;
 4466 
 4467     // cmp    $0x80000000,%eax
 4468     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4469 
 4470     // jne    e <normal>
 4471     __ jccb(Assembler::notEqual, normal);
 4472 
 4473     // xor    %edx,%edx
 4474     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4475 
 4476     // cmp    $0xffffffffffffffff,%ecx
 4477     __ cmpl($div$$Register, -1);
 4478 
 4479     // je     11 <done>
 4480     __ jccb(Assembler::equal, done);
 4481 
 4482     // <normal>
 4483     // cltd
 4484     __ bind(normal);
 4485     __ cdql();
 4486 
 4487     // idivl
 4488     // <done>
 4489     __ idivl($div$$Register);
 4490     __ bind(done);
 4491   %}
 4492 
 4493   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4494   %{
 4495     // Full implementation of Java ldiv and lrem; checks for
 4496     // special case as described in JVM spec., p.243 & p.271.
 4497     //
 4498     //         normal case                           special case
 4499     //
 4500     // input : rax: dividend                         min_long
 4501     //         reg: divisor                          -1
 4502     //
 4503     // output: rax: quotient  (= rax idiv reg)       min_long
 4504     //         rdx: remainder (= rax irem reg)       0
 4505     //
 4506     //  Code sequnce:
 4507     //
 4508     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4509     //    7:   00 00 80
 4510     //    a:   48 39 d0                cmp    %rdx,%rax
 4511     //    d:   75 08                   jne    17 <normal>
 4512     //    f:   33 d2                   xor    %edx,%edx
 4513     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4514     //   15:   74 05                   je     1c <done>
 4515     // 0000000000000017 <normal>:
 4516     //   17:   48 99                   cqto
 4517     //   19:   48 f7 f9                idiv   $div
 4518     // 000000000000001c <done>:
 4519     Label normal;
 4520     Label done;
 4521 
 4522     // mov    $0x8000000000000000,%rdx
 4523     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4524 
 4525     // cmp    %rdx,%rax
 4526     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4527 
 4528     // jne    17 <normal>
 4529     __ jccb(Assembler::notEqual, normal);
 4530 
 4531     // xor    %edx,%edx
 4532     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4533 
 4534     // cmp    $0xffffffffffffffff,$div
 4535     __ cmpq($div$$Register, -1);
 4536 
 4537     // je     1e <done>
 4538     __ jccb(Assembler::equal, done);
 4539 
 4540     // <normal>
 4541     // cqto
 4542     __ bind(normal);
 4543     __ cdqq();
 4544 
 4545     // idivq (note: must be emitted by the user of this rule)
 4546     // <done>
 4547     __ idivq($div$$Register);
 4548     __ bind(done);
 4549   %}
 4550 
 4551   enc_class clear_avx %{
 4552     DEBUG_ONLY(int off0 = __ offset());
 4553     if (generate_vzeroupper(Compile::current())) {
 4554       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4555       // Clear upper bits of YMM registers when current compiled code uses
 4556       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4557       __ vzeroupper();
 4558     }
 4559     DEBUG_ONLY(int off1 = __ offset());
 4560     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4561   %}
 4562 
 4563   enc_class Java_To_Runtime(method meth) %{
 4564     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4565     __ call(r10);
 4566     __ post_call_nop();
 4567   %}
 4568 
 4569   enc_class Java_Static_Call(method meth)
 4570   %{
 4571     // JAVA STATIC CALL
 4572     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4573     // determine who we intended to call.
 4574     if (!_method) {
 4575       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4576     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4577       // The NOP here is purely to ensure that eliding a call to
 4578       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4579       __ addr_nop_5();
 4580       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4581     } else {
 4582       int method_index = resolved_method_index(masm);
 4583       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4584                                                   : static_call_Relocation::spec(method_index);
 4585       address mark = __ pc();
 4586       int call_offset = __ offset();
 4587       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4588       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4589         // Calls of the same statically bound method can share
 4590         // a stub to the interpreter.
 4591         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4592       } else {
 4593         // Emit stubs for static call.
 4594         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4595         __ clear_inst_mark();
 4596         if (stub == nullptr) {
 4597           ciEnv::current()->record_failure("CodeCache is full");
 4598           return;
 4599         }
 4600       }
 4601     }
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class Java_Dynamic_Call(method meth) %{
 4606     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4607     __ post_call_nop();
 4608   %}
 4609 
 4610   enc_class call_epilog %{
 4611     if (VerifyStackAtCalls) {
 4612       // Check that stack depth is unchanged: find majik cookie on stack
 4613       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4614       Label L;
 4615       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4616       __ jccb(Assembler::equal, L);
 4617       // Die if stack mismatch
 4618       __ int3();
 4619       __ bind(L);
 4620     }
 4621     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4622       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4623       // Search for the corresponding projection, get the register and emit code that initialized it.
 4624       uint con = (tf()->range_cc()->cnt() - 1);
 4625       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4626         ProjNode* proj = fast_out(i)->as_Proj();
 4627         if (proj->_con == con) {
 4628           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4629           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4630           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4631           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4632           __ testq(rax, rax);
 4633           __ setb(Assembler::notZero, toReg);
 4634           __ movzbl(toReg, toReg);
 4635           if (reg->is_stack()) {
 4636             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4637             __ movq(Address(rsp, st_off), toReg);
 4638           }
 4639           break;
 4640         }
 4641       }
 4642       if (return_value_is_used()) {
 4643         // An inline type is returned as fields in multiple registers.
 4644         // Rax either contains an oop if the inline type is buffered or a pointer
 4645         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4646         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4647         // rax &= (rax & 1) - 1
 4648         __ movptr(rscratch1, rax);
 4649         __ andptr(rscratch1, 0x1);
 4650         __ subptr(rscratch1, 0x1);
 4651         __ andptr(rax, rscratch1);
 4652       }
 4653     }
 4654   %}
 4655 
 4656 %}
 4657 
 4658 //----------FRAME--------------------------------------------------------------
 4659 // Definition of frame structure and management information.
 4660 //
 4661 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4662 //                             |   (to get allocators register number
 4663 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4664 //  r   CALLER     |        |
 4665 //  o     |        +--------+      pad to even-align allocators stack-slot
 4666 //  w     V        |  pad0  |        numbers; owned by CALLER
 4667 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4668 //  h     ^        |   in   |  5
 4669 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4670 //  |     |        |        |  3
 4671 //  |     |        +--------+
 4672 //  V     |        | old out|      Empty on Intel, window on Sparc
 4673 //        |    old |preserve|      Must be even aligned.
 4674 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4675 //        |        |   in   |  3   area for Intel ret address
 4676 //     Owned by    |preserve|      Empty on Sparc.
 4677 //       SELF      +--------+
 4678 //        |        |  pad2  |  2   pad to align old SP
 4679 //        |        +--------+  1
 4680 //        |        | locks  |  0
 4681 //        |        +--------+----> OptoReg::stack0(), even aligned
 4682 //        |        |  pad1  | 11   pad to align new SP
 4683 //        |        +--------+
 4684 //        |        |        | 10
 4685 //        |        | spills |  9   spills
 4686 //        V        |        |  8   (pad0 slot for callee)
 4687 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4688 //        ^        |  out   |  7
 4689 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4690 //     Owned by    +--------+
 4691 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4692 //        |    new |preserve|      Must be even-aligned.
 4693 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4694 //        |        |        |
 4695 //
 4696 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4697 //         known from SELF's arguments and the Java calling convention.
 4698 //         Region 6-7 is determined per call site.
 4699 // Note 2: If the calling convention leaves holes in the incoming argument
 4700 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4701 //         are owned by the CALLEE.  Holes should not be necessary in the
 4702 //         incoming area, as the Java calling convention is completely under
 4703 //         the control of the AD file.  Doubles can be sorted and packed to
 4704 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4705 //         varargs C calling conventions.
 4706 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4707 //         even aligned with pad0 as needed.
 4708 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4709 //         region 6-11 is even aligned; it may be padded out more so that
 4710 //         the region from SP to FP meets the minimum stack alignment.
 4711 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4712 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4713 //         SP meets the minimum alignment.
 4714 
 4715 frame
 4716 %{
 4717   // These three registers define part of the calling convention
 4718   // between compiled code and the interpreter.
 4719   inline_cache_reg(RAX);                // Inline Cache Register
 4720 
 4721   // Optional: name the operand used by cisc-spilling to access
 4722   // [stack_pointer + offset]
 4723   cisc_spilling_operand_name(indOffset32);
 4724 
 4725   // Number of stack slots consumed by locking an object
 4726   sync_stack_slots(2);
 4727 
 4728   // Compiled code's Frame Pointer
 4729   frame_pointer(RSP);
 4730 
 4731   // Stack alignment requirement
 4732   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4733 
 4734   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4735   // for calls to C.  Supports the var-args backing area for register parms.
 4736   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4737 
 4738   // The after-PROLOG location of the return address.  Location of
 4739   // return address specifies a type (REG or STACK) and a number
 4740   // representing the register number (i.e. - use a register name) or
 4741   // stack slot.
 4742   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4743   // Otherwise, it is above the locks and verification slot and alignment word
 4744   return_addr(STACK - 2 +
 4745               align_up((Compile::current()->in_preserve_stack_slots() +
 4746                         Compile::current()->fixed_slots()),
 4747                        stack_alignment_in_slots()));
 4748 
 4749   // Location of compiled Java return values.  Same as C for now.
 4750   return_value
 4751   %{
 4752     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4753            "only return normal values");
 4754 
 4755     static const int lo[Op_RegL + 1] = {
 4756       0,
 4757       0,
 4758       RAX_num,  // Op_RegN
 4759       RAX_num,  // Op_RegI
 4760       RAX_num,  // Op_RegP
 4761       XMM0_num, // Op_RegF
 4762       XMM0_num, // Op_RegD
 4763       RAX_num   // Op_RegL
 4764     };
 4765     static const int hi[Op_RegL + 1] = {
 4766       0,
 4767       0,
 4768       OptoReg::Bad, // Op_RegN
 4769       OptoReg::Bad, // Op_RegI
 4770       RAX_H_num,    // Op_RegP
 4771       OptoReg::Bad, // Op_RegF
 4772       XMM0b_num,    // Op_RegD
 4773       RAX_H_num     // Op_RegL
 4774     };
 4775     // Excluded flags and vector registers.
 4776     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4777     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4778   %}
 4779 %}
 4780 
 4781 //----------ATTRIBUTES---------------------------------------------------------
 4782 //----------Operand Attributes-------------------------------------------------
 4783 op_attrib op_cost(0);        // Required cost attribute
 4784 
 4785 //----------Instruction Attributes---------------------------------------------
 4786 ins_attrib ins_cost(100);       // Required cost attribute
 4787 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4788 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4789                                 // a non-matching short branch variant
 4790                                 // of some long branch?
 4791 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4792                                 // be a power of 2) specifies the
 4793                                 // alignment that some part of the
 4794                                 // instruction (not necessarily the
 4795                                 // start) requires.  If > 1, a
 4796                                 // compute_padding() function must be
 4797                                 // provided for the instruction
 4798 
 4799 // Whether this node is expanded during code emission into a sequence of
 4800 // instructions and the first instruction can perform an implicit null check.
 4801 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4802 
 4803 //----------OPERANDS-----------------------------------------------------------
 4804 // Operand definitions must precede instruction definitions for correct parsing
 4805 // in the ADLC because operands constitute user defined types which are used in
 4806 // instruction definitions.
 4807 
 4808 //----------Simple Operands----------------------------------------------------
 4809 // Immediate Operands
 4810 // Integer Immediate
 4811 operand immI()
 4812 %{
 4813   match(ConI);
 4814 
 4815   op_cost(10);
 4816   format %{ %}
 4817   interface(CONST_INTER);
 4818 %}
 4819 
 4820 // Constant for test vs zero
 4821 operand immI_0()
 4822 %{
 4823   predicate(n->get_int() == 0);
 4824   match(ConI);
 4825 
 4826   op_cost(0);
 4827   format %{ %}
 4828   interface(CONST_INTER);
 4829 %}
 4830 
 4831 // Constant for increment
 4832 operand immI_1()
 4833 %{
 4834   predicate(n->get_int() == 1);
 4835   match(ConI);
 4836 
 4837   op_cost(0);
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 // Constant for decrement
 4843 operand immI_M1()
 4844 %{
 4845   predicate(n->get_int() == -1);
 4846   match(ConI);
 4847 
 4848   op_cost(0);
 4849   format %{ %}
 4850   interface(CONST_INTER);
 4851 %}
 4852 
 4853 operand immI_2()
 4854 %{
 4855   predicate(n->get_int() == 2);
 4856   match(ConI);
 4857 
 4858   op_cost(0);
 4859   format %{ %}
 4860   interface(CONST_INTER);
 4861 %}
 4862 
 4863 operand immI_4()
 4864 %{
 4865   predicate(n->get_int() == 4);
 4866   match(ConI);
 4867 
 4868   op_cost(0);
 4869   format %{ %}
 4870   interface(CONST_INTER);
 4871 %}
 4872 
 4873 operand immI_8()
 4874 %{
 4875   predicate(n->get_int() == 8);
 4876   match(ConI);
 4877 
 4878   op_cost(0);
 4879   format %{ %}
 4880   interface(CONST_INTER);
 4881 %}
 4882 
 4883 // Valid scale values for addressing modes
 4884 operand immI2()
 4885 %{
 4886   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4887   match(ConI);
 4888 
 4889   format %{ %}
 4890   interface(CONST_INTER);
 4891 %}
 4892 
 4893 operand immU7()
 4894 %{
 4895   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4896   match(ConI);
 4897 
 4898   op_cost(5);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 operand immI8()
 4904 %{
 4905   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4906   match(ConI);
 4907 
 4908   op_cost(5);
 4909   format %{ %}
 4910   interface(CONST_INTER);
 4911 %}
 4912 
 4913 operand immU8()
 4914 %{
 4915   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4916   match(ConI);
 4917 
 4918   op_cost(5);
 4919   format %{ %}
 4920   interface(CONST_INTER);
 4921 %}
 4922 
 4923 operand immI16()
 4924 %{
 4925   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4926   match(ConI);
 4927 
 4928   op_cost(10);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 // Int Immediate non-negative
 4934 operand immU31()
 4935 %{
 4936   predicate(n->get_int() >= 0);
 4937   match(ConI);
 4938 
 4939   op_cost(0);
 4940   format %{ %}
 4941   interface(CONST_INTER);
 4942 %}
 4943 
 4944 // Pointer Immediate
 4945 operand immP()
 4946 %{
 4947   match(ConP);
 4948 
 4949   op_cost(10);
 4950   format %{ %}
 4951   interface(CONST_INTER);
 4952 %}
 4953 
 4954 // Null Pointer Immediate
 4955 operand immP0()
 4956 %{
 4957   predicate(n->get_ptr() == 0);
 4958   match(ConP);
 4959 
 4960   op_cost(5);
 4961   format %{ %}
 4962   interface(CONST_INTER);
 4963 %}
 4964 
 4965 // Pointer Immediate
 4966 operand immN() %{
 4967   match(ConN);
 4968 
 4969   op_cost(10);
 4970   format %{ %}
 4971   interface(CONST_INTER);
 4972 %}
 4973 
 4974 operand immNKlass() %{
 4975   match(ConNKlass);
 4976 
 4977   op_cost(10);
 4978   format %{ %}
 4979   interface(CONST_INTER);
 4980 %}
 4981 
 4982 // Null Pointer Immediate
 4983 operand immN0() %{
 4984   predicate(n->get_narrowcon() == 0);
 4985   match(ConN);
 4986 
 4987   op_cost(5);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 operand immP31()
 4993 %{
 4994   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4995             && (n->get_ptr() >> 31) == 0);
 4996   match(ConP);
 4997 
 4998   op_cost(5);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 
 5004 // Long Immediate
 5005 operand immL()
 5006 %{
 5007   match(ConL);
 5008 
 5009   op_cost(20);
 5010   format %{ %}
 5011   interface(CONST_INTER);
 5012 %}
 5013 
 5014 // Long Immediate 8-bit
 5015 operand immL8()
 5016 %{
 5017   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5018   match(ConL);
 5019 
 5020   op_cost(5);
 5021   format %{ %}
 5022   interface(CONST_INTER);
 5023 %}
 5024 
 5025 // Long Immediate 32-bit unsigned
 5026 operand immUL32()
 5027 %{
 5028   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5029   match(ConL);
 5030 
 5031   op_cost(10);
 5032   format %{ %}
 5033   interface(CONST_INTER);
 5034 %}
 5035 
 5036 // Long Immediate 32-bit signed
 5037 operand immL32()
 5038 %{
 5039   predicate(n->get_long() == (int) (n->get_long()));
 5040   match(ConL);
 5041 
 5042   op_cost(15);
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 operand immL_Pow2()
 5048 %{
 5049   predicate(is_power_of_2((julong)n->get_long()));
 5050   match(ConL);
 5051 
 5052   op_cost(15);
 5053   format %{ %}
 5054   interface(CONST_INTER);
 5055 %}
 5056 
 5057 operand immL_NotPow2()
 5058 %{
 5059   predicate(is_power_of_2((julong)~n->get_long()));
 5060   match(ConL);
 5061 
 5062   op_cost(15);
 5063   format %{ %}
 5064   interface(CONST_INTER);
 5065 %}
 5066 
 5067 // Long Immediate zero
 5068 operand immL0()
 5069 %{
 5070   predicate(n->get_long() == 0L);
 5071   match(ConL);
 5072 
 5073   op_cost(10);
 5074   format %{ %}
 5075   interface(CONST_INTER);
 5076 %}
 5077 
 5078 // Constant for increment
 5079 operand immL1()
 5080 %{
 5081   predicate(n->get_long() == 1);
 5082   match(ConL);
 5083 
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Constant for decrement
 5089 operand immL_M1()
 5090 %{
 5091   predicate(n->get_long() == -1);
 5092   match(ConL);
 5093 
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Long Immediate: low 32-bit mask
 5099 operand immL_32bits()
 5100 %{
 5101   predicate(n->get_long() == 0xFFFFFFFFL);
 5102   match(ConL);
 5103   op_cost(20);
 5104 
 5105   format %{ %}
 5106   interface(CONST_INTER);
 5107 %}
 5108 
 5109 // Int Immediate: 2^n-1, positive
 5110 operand immI_Pow2M1()
 5111 %{
 5112   predicate((n->get_int() > 0)
 5113             && is_power_of_2((juint)n->get_int() + 1));
 5114   match(ConI);
 5115 
 5116   op_cost(20);
 5117   format %{ %}
 5118   interface(CONST_INTER);
 5119 %}
 5120 
 5121 // Float Immediate zero
 5122 operand immF0()
 5123 %{
 5124   predicate(jint_cast(n->getf()) == 0);
 5125   match(ConF);
 5126 
 5127   op_cost(5);
 5128   format %{ %}
 5129   interface(CONST_INTER);
 5130 %}
 5131 
 5132 // Float Immediate
 5133 operand immF()
 5134 %{
 5135   match(ConF);
 5136 
 5137   op_cost(15);
 5138   format %{ %}
 5139   interface(CONST_INTER);
 5140 %}
 5141 
 5142 // Half Float Immediate
 5143 operand immH()
 5144 %{
 5145   match(ConH);
 5146 
 5147   op_cost(15);
 5148   format %{ %}
 5149   interface(CONST_INTER);
 5150 %}
 5151 
 5152 // Double Immediate zero
 5153 operand immD0()
 5154 %{
 5155   predicate(jlong_cast(n->getd()) == 0);
 5156   match(ConD);
 5157 
 5158   op_cost(5);
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Double Immediate
 5164 operand immD()
 5165 %{
 5166   match(ConD);
 5167 
 5168   op_cost(15);
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Immediates for special shifts (sign extend)
 5174 
 5175 // Constants for increment
 5176 operand immI_16()
 5177 %{
 5178   predicate(n->get_int() == 16);
 5179   match(ConI);
 5180 
 5181   format %{ %}
 5182   interface(CONST_INTER);
 5183 %}
 5184 
 5185 operand immI_24()
 5186 %{
 5187   predicate(n->get_int() == 24);
 5188   match(ConI);
 5189 
 5190   format %{ %}
 5191   interface(CONST_INTER);
 5192 %}
 5193 
 5194 // Constant for byte-wide masking
 5195 operand immI_255()
 5196 %{
 5197   predicate(n->get_int() == 255);
 5198   match(ConI);
 5199 
 5200   format %{ %}
 5201   interface(CONST_INTER);
 5202 %}
 5203 
 5204 // Constant for short-wide masking
 5205 operand immI_65535()
 5206 %{
 5207   predicate(n->get_int() == 65535);
 5208   match(ConI);
 5209 
 5210   format %{ %}
 5211   interface(CONST_INTER);
 5212 %}
 5213 
 5214 // Constant for byte-wide masking
 5215 operand immL_255()
 5216 %{
 5217   predicate(n->get_long() == 255);
 5218   match(ConL);
 5219 
 5220   format %{ %}
 5221   interface(CONST_INTER);
 5222 %}
 5223 
 5224 // Constant for short-wide masking
 5225 operand immL_65535()
 5226 %{
 5227   predicate(n->get_long() == 65535);
 5228   match(ConL);
 5229 
 5230   format %{ %}
 5231   interface(CONST_INTER);
 5232 %}
 5233 
 5234 // AOT Runtime Constants Address
 5235 operand immAOTRuntimeConstantsAddress()
 5236 %{
 5237   // Check if the address is in the range of AOT Runtime Constants
 5238   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5239   match(ConP);
 5240 
 5241   op_cost(0);
 5242   format %{ %}
 5243   interface(CONST_INTER);
 5244 %}
 5245 
 5246 operand kReg()
 5247 %{
 5248   constraint(ALLOC_IN_RC(vectmask_reg));
 5249   match(RegVectMask);
 5250   format %{%}
 5251   interface(REG_INTER);
 5252 %}
 5253 
 5254 // Register Operands
 5255 // Integer Register
 5256 operand rRegI()
 5257 %{
 5258   constraint(ALLOC_IN_RC(int_reg));
 5259   match(RegI);
 5260 
 5261   match(rax_RegI);
 5262   match(rbx_RegI);
 5263   match(rcx_RegI);
 5264   match(rdx_RegI);
 5265   match(rdi_RegI);
 5266 
 5267   format %{ %}
 5268   interface(REG_INTER);
 5269 %}
 5270 
 5271 // Special Registers
 5272 operand rax_RegI()
 5273 %{
 5274   constraint(ALLOC_IN_RC(int_rax_reg));
 5275   match(RegI);
 5276   match(rRegI);
 5277 
 5278   format %{ "RAX" %}
 5279   interface(REG_INTER);
 5280 %}
 5281 
 5282 // Special Registers
 5283 operand rbx_RegI()
 5284 %{
 5285   constraint(ALLOC_IN_RC(int_rbx_reg));
 5286   match(RegI);
 5287   match(rRegI);
 5288 
 5289   format %{ "RBX" %}
 5290   interface(REG_INTER);
 5291 %}
 5292 
 5293 operand rcx_RegI()
 5294 %{
 5295   constraint(ALLOC_IN_RC(int_rcx_reg));
 5296   match(RegI);
 5297   match(rRegI);
 5298 
 5299   format %{ "RCX" %}
 5300   interface(REG_INTER);
 5301 %}
 5302 
 5303 operand rdx_RegI()
 5304 %{
 5305   constraint(ALLOC_IN_RC(int_rdx_reg));
 5306   match(RegI);
 5307   match(rRegI);
 5308 
 5309   format %{ "RDX" %}
 5310   interface(REG_INTER);
 5311 %}
 5312 
 5313 operand rdi_RegI()
 5314 %{
 5315   constraint(ALLOC_IN_RC(int_rdi_reg));
 5316   match(RegI);
 5317   match(rRegI);
 5318 
 5319   format %{ "RDI" %}
 5320   interface(REG_INTER);
 5321 %}
 5322 
 5323 operand no_rax_rdx_RegI()
 5324 %{
 5325   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5326   match(RegI);
 5327   match(rbx_RegI);
 5328   match(rcx_RegI);
 5329   match(rdi_RegI);
 5330 
 5331   format %{ %}
 5332   interface(REG_INTER);
 5333 %}
 5334 
 5335 operand no_rbp_r13_RegI()
 5336 %{
 5337   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5338   match(RegI);
 5339   match(rRegI);
 5340   match(rax_RegI);
 5341   match(rbx_RegI);
 5342   match(rcx_RegI);
 5343   match(rdx_RegI);
 5344   match(rdi_RegI);
 5345 
 5346   format %{ %}
 5347   interface(REG_INTER);
 5348 %}
 5349 
 5350 // Pointer Register
 5351 operand any_RegP()
 5352 %{
 5353   constraint(ALLOC_IN_RC(any_reg));
 5354   match(RegP);
 5355   match(rax_RegP);
 5356   match(rbx_RegP);
 5357   match(rdi_RegP);
 5358   match(rsi_RegP);
 5359   match(rbp_RegP);
 5360   match(r15_RegP);
 5361   match(rRegP);
 5362 
 5363   format %{ %}
 5364   interface(REG_INTER);
 5365 %}
 5366 
 5367 operand rRegP()
 5368 %{
 5369   constraint(ALLOC_IN_RC(ptr_reg));
 5370   match(RegP);
 5371   match(rax_RegP);
 5372   match(rbx_RegP);
 5373   match(rdi_RegP);
 5374   match(rsi_RegP);
 5375   match(rbp_RegP);  // See Q&A below about
 5376   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5377 
 5378   format %{ %}
 5379   interface(REG_INTER);
 5380 %}
 5381 
 5382 operand rRegN() %{
 5383   constraint(ALLOC_IN_RC(int_reg));
 5384   match(RegN);
 5385 
 5386   format %{ %}
 5387   interface(REG_INTER);
 5388 %}
 5389 
 5390 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5391 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5392 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5393 // The output of an instruction is controlled by the allocator, which respects
 5394 // register class masks, not match rules.  Unless an instruction mentions
 5395 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5396 // by the allocator as an input.
 5397 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5398 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5399 // result, RBP is not included in the output of the instruction either.
 5400 
 5401 // This operand is not allowed to use RBP even if
 5402 // RBP is not used to hold the frame pointer.
 5403 operand no_rbp_RegP()
 5404 %{
 5405   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5406   match(RegP);
 5407   match(rbx_RegP);
 5408   match(rsi_RegP);
 5409   match(rdi_RegP);
 5410 
 5411   format %{ %}
 5412   interface(REG_INTER);
 5413 %}
 5414 
 5415 // Special Registers
 5416 // Return a pointer value
 5417 operand rax_RegP()
 5418 %{
 5419   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5420   match(RegP);
 5421   match(rRegP);
 5422 
 5423   format %{ %}
 5424   interface(REG_INTER);
 5425 %}
 5426 
 5427 // Special Registers
 5428 // Return a compressed pointer value
 5429 operand rax_RegN()
 5430 %{
 5431   constraint(ALLOC_IN_RC(int_rax_reg));
 5432   match(RegN);
 5433   match(rRegN);
 5434 
 5435   format %{ %}
 5436   interface(REG_INTER);
 5437 %}
 5438 
 5439 // Used in AtomicAdd
 5440 operand rbx_RegP()
 5441 %{
 5442   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5443   match(RegP);
 5444   match(rRegP);
 5445 
 5446   format %{ %}
 5447   interface(REG_INTER);
 5448 %}
 5449 
 5450 operand rsi_RegP()
 5451 %{
 5452   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5453   match(RegP);
 5454   match(rRegP);
 5455 
 5456   format %{ %}
 5457   interface(REG_INTER);
 5458 %}
 5459 
 5460 operand rbp_RegP()
 5461 %{
 5462   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5463   match(RegP);
 5464   match(rRegP);
 5465 
 5466   format %{ %}
 5467   interface(REG_INTER);
 5468 %}
 5469 
 5470 // Used in rep stosq
 5471 operand rdi_RegP()
 5472 %{
 5473   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5474   match(RegP);
 5475   match(rRegP);
 5476 
 5477   format %{ %}
 5478   interface(REG_INTER);
 5479 %}
 5480 
 5481 operand r15_RegP()
 5482 %{
 5483   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5484   match(RegP);
 5485   match(rRegP);
 5486 
 5487   format %{ %}
 5488   interface(REG_INTER);
 5489 %}
 5490 
 5491 operand rRegL()
 5492 %{
 5493   constraint(ALLOC_IN_RC(long_reg));
 5494   match(RegL);
 5495   match(rax_RegL);
 5496   match(rdx_RegL);
 5497 
 5498   format %{ %}
 5499   interface(REG_INTER);
 5500 %}
 5501 
 5502 // Special Registers
 5503 operand no_rax_rdx_RegL()
 5504 %{
 5505   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5506   match(RegL);
 5507   match(rRegL);
 5508 
 5509   format %{ %}
 5510   interface(REG_INTER);
 5511 %}
 5512 
 5513 operand rax_RegL()
 5514 %{
 5515   constraint(ALLOC_IN_RC(long_rax_reg));
 5516   match(RegL);
 5517   match(rRegL);
 5518 
 5519   format %{ "RAX" %}
 5520   interface(REG_INTER);
 5521 %}
 5522 
 5523 operand rcx_RegL()
 5524 %{
 5525   constraint(ALLOC_IN_RC(long_rcx_reg));
 5526   match(RegL);
 5527   match(rRegL);
 5528 
 5529   format %{ %}
 5530   interface(REG_INTER);
 5531 %}
 5532 
 5533 operand rdx_RegL()
 5534 %{
 5535   constraint(ALLOC_IN_RC(long_rdx_reg));
 5536   match(RegL);
 5537   match(rRegL);
 5538 
 5539   format %{ %}
 5540   interface(REG_INTER);
 5541 %}
 5542 
 5543 operand r11_RegL()
 5544 %{
 5545   constraint(ALLOC_IN_RC(long_r11_reg));
 5546   match(RegL);
 5547   match(rRegL);
 5548 
 5549   format %{ %}
 5550   interface(REG_INTER);
 5551 %}
 5552 
 5553 operand no_rbp_r13_RegL()
 5554 %{
 5555   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5556   match(RegL);
 5557   match(rRegL);
 5558   match(rax_RegL);
 5559   match(rcx_RegL);
 5560   match(rdx_RegL);
 5561 
 5562   format %{ %}
 5563   interface(REG_INTER);
 5564 %}
 5565 
 5566 // Flags register, used as output of compare instructions
 5567 operand rFlagsReg()
 5568 %{
 5569   constraint(ALLOC_IN_RC(int_flags));
 5570   match(RegFlags);
 5571 
 5572   format %{ "RFLAGS" %}
 5573   interface(REG_INTER);
 5574 %}
 5575 
 5576 // Flags register, used as output of FLOATING POINT compare instructions
 5577 operand rFlagsRegU()
 5578 %{
 5579   constraint(ALLOC_IN_RC(int_flags));
 5580   match(RegFlags);
 5581 
 5582   format %{ "RFLAGS_U" %}
 5583   interface(REG_INTER);
 5584 %}
 5585 
 5586 operand rFlagsRegUCF() %{
 5587   constraint(ALLOC_IN_RC(int_flags));
 5588   match(RegFlags);
 5589   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5590 
 5591   format %{ "RFLAGS_U_CF" %}
 5592   interface(REG_INTER);
 5593 %}
 5594 
 5595 operand rFlagsRegUCFE() %{
 5596   constraint(ALLOC_IN_RC(int_flags));
 5597   match(RegFlags);
 5598   predicate(UseAPX && VM_Version::supports_avx10_2());
 5599 
 5600   format %{ "RFLAGS_U_CFE" %}
 5601   interface(REG_INTER);
 5602 %}
 5603 
 5604 // Float register operands
 5605 operand regF() %{
 5606    constraint(ALLOC_IN_RC(float_reg));
 5607    match(RegF);
 5608 
 5609    format %{ %}
 5610    interface(REG_INTER);
 5611 %}
 5612 
 5613 // Float register operands
 5614 operand legRegF() %{
 5615    constraint(ALLOC_IN_RC(float_reg_legacy));
 5616    match(RegF);
 5617 
 5618    format %{ %}
 5619    interface(REG_INTER);
 5620 %}
 5621 
 5622 // Float register operands
 5623 operand vlRegF() %{
 5624    constraint(ALLOC_IN_RC(float_reg_vl));
 5625    match(RegF);
 5626 
 5627    format %{ %}
 5628    interface(REG_INTER);
 5629 %}
 5630 
 5631 // Double register operands
 5632 operand regD() %{
 5633    constraint(ALLOC_IN_RC(double_reg));
 5634    match(RegD);
 5635 
 5636    format %{ %}
 5637    interface(REG_INTER);
 5638 %}
 5639 
 5640 // Double register operands
 5641 operand legRegD() %{
 5642    constraint(ALLOC_IN_RC(double_reg_legacy));
 5643    match(RegD);
 5644 
 5645    format %{ %}
 5646    interface(REG_INTER);
 5647 %}
 5648 
 5649 // Double register operands
 5650 operand vlRegD() %{
 5651    constraint(ALLOC_IN_RC(double_reg_vl));
 5652    match(RegD);
 5653 
 5654    format %{ %}
 5655    interface(REG_INTER);
 5656 %}
 5657 
 5658 //----------Memory Operands----------------------------------------------------
 5659 // Direct Memory Operand
 5660 // operand direct(immP addr)
 5661 // %{
 5662 //   match(addr);
 5663 
 5664 //   format %{ "[$addr]" %}
 5665 //   interface(MEMORY_INTER) %{
 5666 //     base(0xFFFFFFFF);
 5667 //     index(0x4);
 5668 //     scale(0x0);
 5669 //     disp($addr);
 5670 //   %}
 5671 // %}
 5672 
 5673 // Indirect Memory Operand
 5674 operand indirect(any_RegP reg)
 5675 %{
 5676   constraint(ALLOC_IN_RC(ptr_reg));
 5677   match(reg);
 5678 
 5679   format %{ "[$reg]" %}
 5680   interface(MEMORY_INTER) %{
 5681     base($reg);
 5682     index(0x4);
 5683     scale(0x0);
 5684     disp(0x0);
 5685   %}
 5686 %}
 5687 
 5688 // Indirect Memory Plus Short Offset Operand
 5689 operand indOffset8(any_RegP reg, immL8 off)
 5690 %{
 5691   constraint(ALLOC_IN_RC(ptr_reg));
 5692   match(AddP reg off);
 5693 
 5694   format %{ "[$reg + $off (8-bit)]" %}
 5695   interface(MEMORY_INTER) %{
 5696     base($reg);
 5697     index(0x4);
 5698     scale(0x0);
 5699     disp($off);
 5700   %}
 5701 %}
 5702 
 5703 // Indirect Memory Plus Long Offset Operand
 5704 operand indOffset32(any_RegP reg, immL32 off)
 5705 %{
 5706   constraint(ALLOC_IN_RC(ptr_reg));
 5707   match(AddP reg off);
 5708 
 5709   format %{ "[$reg + $off (32-bit)]" %}
 5710   interface(MEMORY_INTER) %{
 5711     base($reg);
 5712     index(0x4);
 5713     scale(0x0);
 5714     disp($off);
 5715   %}
 5716 %}
 5717 
 5718 // Indirect Memory Plus Index Register Plus Offset Operand
 5719 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5720 %{
 5721   constraint(ALLOC_IN_RC(ptr_reg));
 5722   match(AddP (AddP reg lreg) off);
 5723 
 5724   op_cost(10);
 5725   format %{"[$reg + $off + $lreg]" %}
 5726   interface(MEMORY_INTER) %{
 5727     base($reg);
 5728     index($lreg);
 5729     scale(0x0);
 5730     disp($off);
 5731   %}
 5732 %}
 5733 
 5734 // Indirect Memory Plus Index Register Plus Offset Operand
 5735 operand indIndex(any_RegP reg, rRegL lreg)
 5736 %{
 5737   constraint(ALLOC_IN_RC(ptr_reg));
 5738   match(AddP reg lreg);
 5739 
 5740   op_cost(10);
 5741   format %{"[$reg + $lreg]" %}
 5742   interface(MEMORY_INTER) %{
 5743     base($reg);
 5744     index($lreg);
 5745     scale(0x0);
 5746     disp(0x0);
 5747   %}
 5748 %}
 5749 
 5750 // Indirect Memory Times Scale Plus Index Register
 5751 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5752 %{
 5753   constraint(ALLOC_IN_RC(ptr_reg));
 5754   match(AddP reg (LShiftL lreg scale));
 5755 
 5756   op_cost(10);
 5757   format %{"[$reg + $lreg << $scale]" %}
 5758   interface(MEMORY_INTER) %{
 5759     base($reg);
 5760     index($lreg);
 5761     scale($scale);
 5762     disp(0x0);
 5763   %}
 5764 %}
 5765 
 5766 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + pos $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp(0x0);
 5779   %}
 5780 %}
 5781 
 5782 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5783 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5784 %{
 5785   constraint(ALLOC_IN_RC(ptr_reg));
 5786   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5787 
 5788   op_cost(10);
 5789   format %{"[$reg + $off + $lreg << $scale]" %}
 5790   interface(MEMORY_INTER) %{
 5791     base($reg);
 5792     index($lreg);
 5793     scale($scale);
 5794     disp($off);
 5795   %}
 5796 %}
 5797 
 5798 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5799 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5800 %{
 5801   constraint(ALLOC_IN_RC(ptr_reg));
 5802   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5803   match(AddP (AddP reg (ConvI2L idx)) off);
 5804 
 5805   op_cost(10);
 5806   format %{"[$reg + $off + $idx]" %}
 5807   interface(MEMORY_INTER) %{
 5808     base($reg);
 5809     index($idx);
 5810     scale(0x0);
 5811     disp($off);
 5812   %}
 5813 %}
 5814 
 5815 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5816 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5817 %{
 5818   constraint(ALLOC_IN_RC(ptr_reg));
 5819   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5820   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5821 
 5822   op_cost(10);
 5823   format %{"[$reg + $off + $idx << $scale]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index($idx);
 5827     scale($scale);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Narrow Oop Operand
 5833 operand indCompressedOop(rRegN reg) %{
 5834   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5835   constraint(ALLOC_IN_RC(ptr_reg));
 5836   match(DecodeN reg);
 5837 
 5838   op_cost(10);
 5839   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5840   interface(MEMORY_INTER) %{
 5841     base(0xc); // R12
 5842     index($reg);
 5843     scale(0x3);
 5844     disp(0x0);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Narrow Oop Plus Offset Operand
 5849 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5850 // we can't free r12 even with CompressedOops::base() == nullptr.
 5851 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5852   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5853   constraint(ALLOC_IN_RC(ptr_reg));
 5854   match(AddP (DecodeN reg) off);
 5855 
 5856   op_cost(10);
 5857   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5858   interface(MEMORY_INTER) %{
 5859     base(0xc); // R12
 5860     index($reg);
 5861     scale(0x3);
 5862     disp($off);
 5863   %}
 5864 %}
 5865 
 5866 // Indirect Memory Operand
 5867 operand indirectNarrow(rRegN reg)
 5868 %{
 5869   predicate(CompressedOops::shift() == 0);
 5870   constraint(ALLOC_IN_RC(ptr_reg));
 5871   match(DecodeN reg);
 5872 
 5873   format %{ "[$reg]" %}
 5874   interface(MEMORY_INTER) %{
 5875     base($reg);
 5876     index(0x4);
 5877     scale(0x0);
 5878     disp(0x0);
 5879   %}
 5880 %}
 5881 
 5882 // Indirect Memory Plus Short Offset Operand
 5883 operand indOffset8Narrow(rRegN reg, immL8 off)
 5884 %{
 5885   predicate(CompressedOops::shift() == 0);
 5886   constraint(ALLOC_IN_RC(ptr_reg));
 5887   match(AddP (DecodeN reg) off);
 5888 
 5889   format %{ "[$reg + $off (8-bit)]" %}
 5890   interface(MEMORY_INTER) %{
 5891     base($reg);
 5892     index(0x4);
 5893     scale(0x0);
 5894     disp($off);
 5895   %}
 5896 %}
 5897 
 5898 // Indirect Memory Plus Long Offset Operand
 5899 operand indOffset32Narrow(rRegN reg, immL32 off)
 5900 %{
 5901   predicate(CompressedOops::shift() == 0);
 5902   constraint(ALLOC_IN_RC(ptr_reg));
 5903   match(AddP (DecodeN reg) off);
 5904 
 5905   format %{ "[$reg + $off (32-bit)]" %}
 5906   interface(MEMORY_INTER) %{
 5907     base($reg);
 5908     index(0x4);
 5909     scale(0x0);
 5910     disp($off);
 5911   %}
 5912 %}
 5913 
 5914 // Indirect Memory Plus Index Register Plus Offset Operand
 5915 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5916 %{
 5917   predicate(CompressedOops::shift() == 0);
 5918   constraint(ALLOC_IN_RC(ptr_reg));
 5919   match(AddP (AddP (DecodeN reg) lreg) off);
 5920 
 5921   op_cost(10);
 5922   format %{"[$reg + $off + $lreg]" %}
 5923   interface(MEMORY_INTER) %{
 5924     base($reg);
 5925     index($lreg);
 5926     scale(0x0);
 5927     disp($off);
 5928   %}
 5929 %}
 5930 
 5931 // Indirect Memory Plus Index Register Plus Offset Operand
 5932 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5933 %{
 5934   predicate(CompressedOops::shift() == 0);
 5935   constraint(ALLOC_IN_RC(ptr_reg));
 5936   match(AddP (DecodeN reg) lreg);
 5937 
 5938   op_cost(10);
 5939   format %{"[$reg + $lreg]" %}
 5940   interface(MEMORY_INTER) %{
 5941     base($reg);
 5942     index($lreg);
 5943     scale(0x0);
 5944     disp(0x0);
 5945   %}
 5946 %}
 5947 
 5948 // Indirect Memory Times Scale Plus Index Register
 5949 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5950 %{
 5951   predicate(CompressedOops::shift() == 0);
 5952   constraint(ALLOC_IN_RC(ptr_reg));
 5953   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5954 
 5955   op_cost(10);
 5956   format %{"[$reg + $lreg << $scale]" %}
 5957   interface(MEMORY_INTER) %{
 5958     base($reg);
 5959     index($lreg);
 5960     scale($scale);
 5961     disp(0x0);
 5962   %}
 5963 %}
 5964 
 5965 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5966 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5967 %{
 5968   predicate(CompressedOops::shift() == 0);
 5969   constraint(ALLOC_IN_RC(ptr_reg));
 5970   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5971 
 5972   op_cost(10);
 5973   format %{"[$reg + $off + $lreg << $scale]" %}
 5974   interface(MEMORY_INTER) %{
 5975     base($reg);
 5976     index($lreg);
 5977     scale($scale);
 5978     disp($off);
 5979   %}
 5980 %}
 5981 
 5982 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5983 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5984 %{
 5985   constraint(ALLOC_IN_RC(ptr_reg));
 5986   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5987   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5988 
 5989   op_cost(10);
 5990   format %{"[$reg + $off + $idx]" %}
 5991   interface(MEMORY_INTER) %{
 5992     base($reg);
 5993     index($idx);
 5994     scale(0x0);
 5995     disp($off);
 5996   %}
 5997 %}
 5998 
 5999 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 6000 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 6001 %{
 6002   constraint(ALLOC_IN_RC(ptr_reg));
 6003   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 6004   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 6005 
 6006   op_cost(10);
 6007   format %{"[$reg + $off + $idx << $scale]" %}
 6008   interface(MEMORY_INTER) %{
 6009     base($reg);
 6010     index($idx);
 6011     scale($scale);
 6012     disp($off);
 6013   %}
 6014 %}
 6015 
 6016 //----------Special Memory Operands--------------------------------------------
 6017 // Stack Slot Operand - This operand is used for loading and storing temporary
 6018 //                      values on the stack where a match requires a value to
 6019 //                      flow through memory.
 6020 operand stackSlotP(sRegP reg)
 6021 %{
 6022   constraint(ALLOC_IN_RC(stack_slots));
 6023   // No match rule because this operand is only generated in matching
 6024 
 6025   format %{ "[$reg]" %}
 6026   interface(MEMORY_INTER) %{
 6027     base(0x4);   // RSP
 6028     index(0x4);  // No Index
 6029     scale(0x0);  // No Scale
 6030     disp($reg);  // Stack Offset
 6031   %}
 6032 %}
 6033 
 6034 operand stackSlotI(sRegI reg)
 6035 %{
 6036   constraint(ALLOC_IN_RC(stack_slots));
 6037   // No match rule because this operand is only generated in matching
 6038 
 6039   format %{ "[$reg]" %}
 6040   interface(MEMORY_INTER) %{
 6041     base(0x4);   // RSP
 6042     index(0x4);  // No Index
 6043     scale(0x0);  // No Scale
 6044     disp($reg);  // Stack Offset
 6045   %}
 6046 %}
 6047 
 6048 operand stackSlotF(sRegF reg)
 6049 %{
 6050   constraint(ALLOC_IN_RC(stack_slots));
 6051   // No match rule because this operand is only generated in matching
 6052 
 6053   format %{ "[$reg]" %}
 6054   interface(MEMORY_INTER) %{
 6055     base(0x4);   // RSP
 6056     index(0x4);  // No Index
 6057     scale(0x0);  // No Scale
 6058     disp($reg);  // Stack Offset
 6059   %}
 6060 %}
 6061 
 6062 operand stackSlotD(sRegD reg)
 6063 %{
 6064   constraint(ALLOC_IN_RC(stack_slots));
 6065   // No match rule because this operand is only generated in matching
 6066 
 6067   format %{ "[$reg]" %}
 6068   interface(MEMORY_INTER) %{
 6069     base(0x4);   // RSP
 6070     index(0x4);  // No Index
 6071     scale(0x0);  // No Scale
 6072     disp($reg);  // Stack Offset
 6073   %}
 6074 %}
 6075 operand stackSlotL(sRegL reg)
 6076 %{
 6077   constraint(ALLOC_IN_RC(stack_slots));
 6078   // No match rule because this operand is only generated in matching
 6079 
 6080   format %{ "[$reg]" %}
 6081   interface(MEMORY_INTER) %{
 6082     base(0x4);   // RSP
 6083     index(0x4);  // No Index
 6084     scale(0x0);  // No Scale
 6085     disp($reg);  // Stack Offset
 6086   %}
 6087 %}
 6088 
 6089 //----------Conditional Branch Operands----------------------------------------
 6090 // Comparison Op  - This is the operation of the comparison, and is limited to
 6091 //                  the following set of codes:
 6092 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6093 //
 6094 // Other attributes of the comparison, such as unsignedness, are specified
 6095 // by the comparison instruction that sets a condition code flags register.
 6096 // That result is represented by a flags operand whose subtype is appropriate
 6097 // to the unsignedness (etc.) of the comparison.
 6098 //
 6099 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6100 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6101 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6102 
 6103 // Comparison Code
 6104 operand cmpOp()
 6105 %{
 6106   match(Bool);
 6107 
 6108   format %{ "" %}
 6109   interface(COND_INTER) %{
 6110     equal(0x4, "e");
 6111     not_equal(0x5, "ne");
 6112     less(0xc, "l");
 6113     greater_equal(0xd, "ge");
 6114     less_equal(0xe, "le");
 6115     greater(0xf, "g");
 6116     overflow(0x0, "o");
 6117     no_overflow(0x1, "no");
 6118   %}
 6119 %}
 6120 
 6121 // Comparison Code, unsigned compare.  Used by FP also, with
 6122 // C2 (unordered) turned into GT or LT already.  The other bits
 6123 // C0 and C3 are turned into Carry & Zero flags.
 6124 operand cmpOpU()
 6125 %{
 6126   match(Bool);
 6127 
 6128   format %{ "" %}
 6129   interface(COND_INTER) %{
 6130     equal(0x4, "e");
 6131     not_equal(0x5, "ne");
 6132     less(0x2, "b");
 6133     greater_equal(0x3, "ae");
 6134     less_equal(0x6, "be");
 6135     greater(0x7, "a");
 6136     overflow(0x0, "o");
 6137     no_overflow(0x1, "no");
 6138   %}
 6139 %}
 6140 
 6141 
 6142 // Floating comparisons that don't require any fixup for the unordered case,
 6143 // If both inputs of the comparison are the same, ZF is always set so we
 6144 // don't need to use cmpOpUCF2 for eq/ne
 6145 operand cmpOpUCF() %{
 6146   match(Bool);
 6147   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6148             (n->as_Bool()->_test._test == BoolTest::lt ||
 6149              n->as_Bool()->_test._test == BoolTest::ge ||
 6150              n->as_Bool()->_test._test == BoolTest::le ||
 6151              n->as_Bool()->_test._test == BoolTest::gt ||
 6152              n->in(1)->in(1) == n->in(1)->in(2)));
 6153   format %{ "" %}
 6154   interface(COND_INTER) %{
 6155     equal(0xb, "np");
 6156     not_equal(0xa, "p");
 6157     less(0x2, "b");
 6158     greater_equal(0x3, "ae");
 6159     less_equal(0x6, "be");
 6160     greater(0x7, "a");
 6161     overflow(0x0, "o");
 6162     no_overflow(0x1, "no");
 6163   %}
 6164 %}
 6165 
 6166 
 6167 // Floating comparisons that can be fixed up with extra conditional jumps
 6168 operand cmpOpUCF2() %{
 6169   match(Bool);
 6170   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6171             (n->as_Bool()->_test._test == BoolTest::ne ||
 6172              n->as_Bool()->_test._test == BoolTest::eq) &&
 6173             n->in(1)->in(1) != n->in(1)->in(2));
 6174   format %{ "" %}
 6175   interface(COND_INTER) %{
 6176     equal(0x4, "e");
 6177     not_equal(0x5, "ne");
 6178     less(0x2, "b");
 6179     greater_equal(0x3, "ae");
 6180     less_equal(0x6, "be");
 6181     greater(0x7, "a");
 6182     overflow(0x0, "o");
 6183     no_overflow(0x1, "no");
 6184   %}
 6185 %}
 6186 
 6187 
 6188 // Floating point comparisons that set condition flags to test more directly,
 6189 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6190 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6191 // latter conditions to ones that use unsigned tests before passing into an
 6192 // instruction because the preceding comparison might be based on a three way
 6193 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6194 operand cmpOpUCFE()
 6195 %{
 6196   match(Bool);
 6197   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6198             (n->as_Bool()->_test._test == BoolTest::ne ||
 6199              n->as_Bool()->_test._test == BoolTest::eq ||
 6200              n->as_Bool()->_test._test == BoolTest::lt ||
 6201              n->as_Bool()->_test._test == BoolTest::ge ||
 6202              n->as_Bool()->_test._test == BoolTest::le ||
 6203              n->as_Bool()->_test._test == BoolTest::gt));
 6204 
 6205   format %{ "" %}
 6206   interface(COND_INTER) %{
 6207     equal(0x4, "e");
 6208     not_equal(0x5, "ne");
 6209     less(0x2, "b");
 6210     greater_equal(0x3, "ae");
 6211     less_equal(0x6, "be");
 6212     greater(0x7, "a");
 6213     overflow(0x0, "o");
 6214     no_overflow(0x1, "no");
 6215   %}
 6216 %}
 6217 
 6218 // Operands for bound floating pointer register arguments
 6219 operand rxmm0() %{
 6220   constraint(ALLOC_IN_RC(xmm0_reg));
 6221   match(VecX);
 6222   format%{%}
 6223   interface(REG_INTER);
 6224 %}
 6225 
 6226 // Vectors
 6227 
 6228 // Dummy generic vector class. Should be used for all vector operands.
 6229 // Replaced with vec[SDXYZ] during post-selection pass.
 6230 operand vec() %{
 6231   constraint(ALLOC_IN_RC(dynamic));
 6232   match(VecX);
 6233   match(VecY);
 6234   match(VecZ);
 6235   match(VecS);
 6236   match(VecD);
 6237 
 6238   format %{ %}
 6239   interface(REG_INTER);
 6240 %}
 6241 
 6242 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6243 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6244 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6245 // runtime code generation via reg_class_dynamic.
 6246 operand legVec() %{
 6247   constraint(ALLOC_IN_RC(dynamic));
 6248   match(VecX);
 6249   match(VecY);
 6250   match(VecZ);
 6251   match(VecS);
 6252   match(VecD);
 6253 
 6254   format %{ %}
 6255   interface(REG_INTER);
 6256 %}
 6257 
 6258 // Replaces vec during post-selection cleanup. See above.
 6259 operand vecS() %{
 6260   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6261   match(VecS);
 6262 
 6263   format %{ %}
 6264   interface(REG_INTER);
 6265 %}
 6266 
 6267 // Replaces legVec during post-selection cleanup. See above.
 6268 operand legVecS() %{
 6269   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6270   match(VecS);
 6271 
 6272   format %{ %}
 6273   interface(REG_INTER);
 6274 %}
 6275 
 6276 // Replaces vec during post-selection cleanup. See above.
 6277 operand vecD() %{
 6278   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6279   match(VecD);
 6280 
 6281   format %{ %}
 6282   interface(REG_INTER);
 6283 %}
 6284 
 6285 // Replaces legVec during post-selection cleanup. See above.
 6286 operand legVecD() %{
 6287   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6288   match(VecD);
 6289 
 6290   format %{ %}
 6291   interface(REG_INTER);
 6292 %}
 6293 
 6294 // Replaces vec during post-selection cleanup. See above.
 6295 operand vecX() %{
 6296   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6297   match(VecX);
 6298 
 6299   format %{ %}
 6300   interface(REG_INTER);
 6301 %}
 6302 
 6303 // Replaces legVec during post-selection cleanup. See above.
 6304 operand legVecX() %{
 6305   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6306   match(VecX);
 6307 
 6308   format %{ %}
 6309   interface(REG_INTER);
 6310 %}
 6311 
 6312 // Replaces vec during post-selection cleanup. See above.
 6313 operand vecY() %{
 6314   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6315   match(VecY);
 6316 
 6317   format %{ %}
 6318   interface(REG_INTER);
 6319 %}
 6320 
 6321 // Replaces legVec during post-selection cleanup. See above.
 6322 operand legVecY() %{
 6323   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6324   match(VecY);
 6325 
 6326   format %{ %}
 6327   interface(REG_INTER);
 6328 %}
 6329 
 6330 // Replaces vec during post-selection cleanup. See above.
 6331 operand vecZ() %{
 6332   constraint(ALLOC_IN_RC(vectorz_reg));
 6333   match(VecZ);
 6334 
 6335   format %{ %}
 6336   interface(REG_INTER);
 6337 %}
 6338 
 6339 // Replaces legVec during post-selection cleanup. See above.
 6340 operand legVecZ() %{
 6341   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6342   match(VecZ);
 6343 
 6344   format %{ %}
 6345   interface(REG_INTER);
 6346 %}
 6347 
 6348 //----------OPERAND CLASSES----------------------------------------------------
 6349 // Operand Classes are groups of operands that are used as to simplify
 6350 // instruction definitions by not requiring the AD writer to specify separate
 6351 // instructions for every form of operand when the instruction accepts
 6352 // multiple operand types with the same basic encoding and format.  The classic
 6353 // case of this is memory operands.
 6354 
 6355 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6356                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6357                indCompressedOop, indCompressedOopOffset,
 6358                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6359                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6360                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6361 
 6362 //----------PIPELINE-----------------------------------------------------------
 6363 // Rules which define the behavior of the target architectures pipeline.
 6364 pipeline %{
 6365 
 6366 //----------ATTRIBUTES---------------------------------------------------------
 6367 attributes %{
 6368   variable_size_instructions;        // Fixed size instructions
 6369   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6370   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6371   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6372   instruction_fetch_units = 1;       // of 16 bytes
 6373 %}
 6374 
 6375 //----------RESOURCES----------------------------------------------------------
 6376 // Resources are the functional units available to the machine
 6377 
 6378 // Generic P2/P3 pipeline
 6379 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6380 // 3 instructions decoded per cycle.
 6381 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6382 // 3 ALU op, only ALU0 handles mul instructions.
 6383 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6384            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6385            BR, FPU,
 6386            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6387 
 6388 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6389 // Pipeline Description specifies the stages in the machine's pipeline
 6390 
 6391 // Generic P2/P3 pipeline
 6392 pipe_desc(S0, S1, S2, S3, S4, S5);
 6393 
 6394 //----------PIPELINE CLASSES---------------------------------------------------
 6395 // Pipeline Classes describe the stages in which input and output are
 6396 // referenced by the hardware pipeline.
 6397 
 6398 // Naming convention: ialu or fpu
 6399 // Then: _reg
 6400 // Then: _reg if there is a 2nd register
 6401 // Then: _long if it's a pair of instructions implementing a long
 6402 // Then: _fat if it requires the big decoder
 6403 //   Or: _mem if it requires the big decoder and a memory unit.
 6404 
 6405 // Integer ALU reg operation
 6406 pipe_class ialu_reg(rRegI dst)
 6407 %{
 6408     single_instruction;
 6409     dst    : S4(write);
 6410     dst    : S3(read);
 6411     DECODE : S0;        // any decoder
 6412     ALU    : S3;        // any alu
 6413 %}
 6414 
 6415 // Long ALU reg operation
 6416 pipe_class ialu_reg_long(rRegL dst)
 6417 %{
 6418     instruction_count(2);
 6419     dst    : S4(write);
 6420     dst    : S3(read);
 6421     DECODE : S0(2);     // any 2 decoders
 6422     ALU    : S3(2);     // both alus
 6423 %}
 6424 
 6425 // Integer ALU reg operation using big decoder
 6426 pipe_class ialu_reg_fat(rRegI dst)
 6427 %{
 6428     single_instruction;
 6429     dst    : S4(write);
 6430     dst    : S3(read);
 6431     D0     : S0;        // big decoder only
 6432     ALU    : S3;        // any alu
 6433 %}
 6434 
 6435 // Integer ALU reg-reg operation
 6436 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6437 %{
 6438     single_instruction;
 6439     dst    : S4(write);
 6440     src    : S3(read);
 6441     DECODE : S0;        // any decoder
 6442     ALU    : S3;        // any alu
 6443 %}
 6444 
 6445 // Integer ALU reg-reg operation
 6446 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6447 %{
 6448     single_instruction;
 6449     dst    : S4(write);
 6450     src    : S3(read);
 6451     D0     : S0;        // big decoder only
 6452     ALU    : S3;        // any alu
 6453 %}
 6454 
 6455 // Integer ALU reg-mem operation
 6456 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6457 %{
 6458     single_instruction;
 6459     dst    : S5(write);
 6460     mem    : S3(read);
 6461     D0     : S0;        // big decoder only
 6462     ALU    : S4;        // any alu
 6463     MEM    : S3;        // any mem
 6464 %}
 6465 
 6466 // Integer mem operation (prefetch)
 6467 pipe_class ialu_mem(memory mem)
 6468 %{
 6469     single_instruction;
 6470     mem    : S3(read);
 6471     D0     : S0;        // big decoder only
 6472     MEM    : S3;        // any mem
 6473 %}
 6474 
 6475 // Integer Store to Memory
 6476 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6477 %{
 6478     single_instruction;
 6479     mem    : S3(read);
 6480     src    : S5(read);
 6481     D0     : S0;        // big decoder only
 6482     ALU    : S4;        // any alu
 6483     MEM    : S3;
 6484 %}
 6485 
 6486 // // Long Store to Memory
 6487 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6488 // %{
 6489 //     instruction_count(2);
 6490 //     mem    : S3(read);
 6491 //     src    : S5(read);
 6492 //     D0     : S0(2);          // big decoder only; twice
 6493 //     ALU    : S4(2);     // any 2 alus
 6494 //     MEM    : S3(2);  // Both mems
 6495 // %}
 6496 
 6497 // Integer Store to Memory
 6498 pipe_class ialu_mem_imm(memory mem)
 6499 %{
 6500     single_instruction;
 6501     mem    : S3(read);
 6502     D0     : S0;        // big decoder only
 6503     ALU    : S4;        // any alu
 6504     MEM    : S3;
 6505 %}
 6506 
 6507 // Integer ALU0 reg-reg operation
 6508 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6509 %{
 6510     single_instruction;
 6511     dst    : S4(write);
 6512     src    : S3(read);
 6513     D0     : S0;        // Big decoder only
 6514     ALU0   : S3;        // only alu0
 6515 %}
 6516 
 6517 // Integer ALU0 reg-mem operation
 6518 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6519 %{
 6520     single_instruction;
 6521     dst    : S5(write);
 6522     mem    : S3(read);
 6523     D0     : S0;        // big decoder only
 6524     ALU0   : S4;        // ALU0 only
 6525     MEM    : S3;        // any mem
 6526 %}
 6527 
 6528 // Integer ALU reg-reg operation
 6529 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6530 %{
 6531     single_instruction;
 6532     cr     : S4(write);
 6533     src1   : S3(read);
 6534     src2   : S3(read);
 6535     DECODE : S0;        // any decoder
 6536     ALU    : S3;        // any alu
 6537 %}
 6538 
 6539 // Integer ALU reg-imm operation
 6540 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6541 %{
 6542     single_instruction;
 6543     cr     : S4(write);
 6544     src1   : S3(read);
 6545     DECODE : S0;        // any decoder
 6546     ALU    : S3;        // any alu
 6547 %}
 6548 
 6549 // Integer ALU reg-mem operation
 6550 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6551 %{
 6552     single_instruction;
 6553     cr     : S4(write);
 6554     src1   : S3(read);
 6555     src2   : S3(read);
 6556     D0     : S0;        // big decoder only
 6557     ALU    : S4;        // any alu
 6558     MEM    : S3;
 6559 %}
 6560 
 6561 // Conditional move reg-reg
 6562 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6563 %{
 6564     instruction_count(4);
 6565     y      : S4(read);
 6566     q      : S3(read);
 6567     p      : S3(read);
 6568     DECODE : S0(4);     // any decoder
 6569 %}
 6570 
 6571 // Conditional move reg-reg
 6572 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6573 %{
 6574     single_instruction;
 6575     dst    : S4(write);
 6576     src    : S3(read);
 6577     cr     : S3(read);
 6578     DECODE : S0;        // any decoder
 6579 %}
 6580 
 6581 // Conditional move reg-mem
 6582 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6583 %{
 6584     single_instruction;
 6585     dst    : S4(write);
 6586     src    : S3(read);
 6587     cr     : S3(read);
 6588     DECODE : S0;        // any decoder
 6589     MEM    : S3;
 6590 %}
 6591 
 6592 // Conditional move reg-reg long
 6593 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6594 %{
 6595     single_instruction;
 6596     dst    : S4(write);
 6597     src    : S3(read);
 6598     cr     : S3(read);
 6599     DECODE : S0(2);     // any 2 decoders
 6600 %}
 6601 
 6602 // Float reg-reg operation
 6603 pipe_class fpu_reg(regD dst)
 6604 %{
 6605     instruction_count(2);
 6606     dst    : S3(read);
 6607     DECODE : S0(2);     // any 2 decoders
 6608     FPU    : S3;
 6609 %}
 6610 
 6611 // Float reg-reg operation
 6612 pipe_class fpu_reg_reg(regD dst, regD src)
 6613 %{
 6614     instruction_count(2);
 6615     dst    : S4(write);
 6616     src    : S3(read);
 6617     DECODE : S0(2);     // any 2 decoders
 6618     FPU    : S3;
 6619 %}
 6620 
 6621 // Float reg-reg operation
 6622 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6623 %{
 6624     instruction_count(3);
 6625     dst    : S4(write);
 6626     src1   : S3(read);
 6627     src2   : S3(read);
 6628     DECODE : S0(3);     // any 3 decoders
 6629     FPU    : S3(2);
 6630 %}
 6631 
 6632 // Float reg-reg operation
 6633 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6634 %{
 6635     instruction_count(4);
 6636     dst    : S4(write);
 6637     src1   : S3(read);
 6638     src2   : S3(read);
 6639     src3   : S3(read);
 6640     DECODE : S0(4);     // any 3 decoders
 6641     FPU    : S3(2);
 6642 %}
 6643 
 6644 // Float reg-reg operation
 6645 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6646 %{
 6647     instruction_count(4);
 6648     dst    : S4(write);
 6649     src1   : S3(read);
 6650     src2   : S3(read);
 6651     src3   : S3(read);
 6652     DECODE : S1(3);     // any 3 decoders
 6653     D0     : S0;        // Big decoder only
 6654     FPU    : S3(2);
 6655     MEM    : S3;
 6656 %}
 6657 
 6658 // Float reg-mem operation
 6659 pipe_class fpu_reg_mem(regD dst, memory mem)
 6660 %{
 6661     instruction_count(2);
 6662     dst    : S5(write);
 6663     mem    : S3(read);
 6664     D0     : S0;        // big decoder only
 6665     DECODE : S1;        // any decoder for FPU POP
 6666     FPU    : S4;
 6667     MEM    : S3;        // any mem
 6668 %}
 6669 
 6670 // Float reg-mem operation
 6671 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6672 %{
 6673     instruction_count(3);
 6674     dst    : S5(write);
 6675     src1   : S3(read);
 6676     mem    : S3(read);
 6677     D0     : S0;        // big decoder only
 6678     DECODE : S1(2);     // any decoder for FPU POP
 6679     FPU    : S4;
 6680     MEM    : S3;        // any mem
 6681 %}
 6682 
 6683 // Float mem-reg operation
 6684 pipe_class fpu_mem_reg(memory mem, regD src)
 6685 %{
 6686     instruction_count(2);
 6687     src    : S5(read);
 6688     mem    : S3(read);
 6689     DECODE : S0;        // any decoder for FPU PUSH
 6690     D0     : S1;        // big decoder only
 6691     FPU    : S4;
 6692     MEM    : S3;        // any mem
 6693 %}
 6694 
 6695 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6696 %{
 6697     instruction_count(3);
 6698     src1   : S3(read);
 6699     src2   : S3(read);
 6700     mem    : S3(read);
 6701     DECODE : S0(2);     // any decoder for FPU PUSH
 6702     D0     : S1;        // big decoder only
 6703     FPU    : S4;
 6704     MEM    : S3;        // any mem
 6705 %}
 6706 
 6707 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6708 %{
 6709     instruction_count(3);
 6710     src1   : S3(read);
 6711     src2   : S3(read);
 6712     mem    : S4(read);
 6713     DECODE : S0;        // any decoder for FPU PUSH
 6714     D0     : S0(2);     // big decoder only
 6715     FPU    : S4;
 6716     MEM    : S3(2);     // any mem
 6717 %}
 6718 
 6719 pipe_class fpu_mem_mem(memory dst, memory src1)
 6720 %{
 6721     instruction_count(2);
 6722     src1   : S3(read);
 6723     dst    : S4(read);
 6724     D0     : S0(2);     // big decoder only
 6725     MEM    : S3(2);     // any mem
 6726 %}
 6727 
 6728 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6729 %{
 6730     instruction_count(3);
 6731     src1   : S3(read);
 6732     src2   : S3(read);
 6733     dst    : S4(read);
 6734     D0     : S0(3);     // big decoder only
 6735     FPU    : S4;
 6736     MEM    : S3(3);     // any mem
 6737 %}
 6738 
 6739 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6740 %{
 6741     instruction_count(3);
 6742     src1   : S4(read);
 6743     mem    : S4(read);
 6744     DECODE : S0;        // any decoder for FPU PUSH
 6745     D0     : S0(2);     // big decoder only
 6746     FPU    : S4;
 6747     MEM    : S3(2);     // any mem
 6748 %}
 6749 
 6750 // Float load constant
 6751 pipe_class fpu_reg_con(regD dst)
 6752 %{
 6753     instruction_count(2);
 6754     dst    : S5(write);
 6755     D0     : S0;        // big decoder only for the load
 6756     DECODE : S1;        // any decoder for FPU POP
 6757     FPU    : S4;
 6758     MEM    : S3;        // any mem
 6759 %}
 6760 
 6761 // Float load constant
 6762 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6763 %{
 6764     instruction_count(3);
 6765     dst    : S5(write);
 6766     src    : S3(read);
 6767     D0     : S0;        // big decoder only for the load
 6768     DECODE : S1(2);     // any decoder for FPU POP
 6769     FPU    : S4;
 6770     MEM    : S3;        // any mem
 6771 %}
 6772 
 6773 // UnConditional branch
 6774 pipe_class pipe_jmp(label labl)
 6775 %{
 6776     single_instruction;
 6777     BR   : S3;
 6778 %}
 6779 
 6780 // Conditional branch
 6781 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6782 %{
 6783     single_instruction;
 6784     cr    : S1(read);
 6785     BR    : S3;
 6786 %}
 6787 
 6788 // Allocation idiom
 6789 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6790 %{
 6791     instruction_count(1); force_serialization;
 6792     fixed_latency(6);
 6793     heap_ptr : S3(read);
 6794     DECODE   : S0(3);
 6795     D0       : S2;
 6796     MEM      : S3;
 6797     ALU      : S3(2);
 6798     dst      : S5(write);
 6799     BR       : S5;
 6800 %}
 6801 
 6802 // Generic big/slow expanded idiom
 6803 pipe_class pipe_slow()
 6804 %{
 6805     instruction_count(10); multiple_bundles; force_serialization;
 6806     fixed_latency(100);
 6807     D0  : S0(2);
 6808     MEM : S3(2);
 6809 %}
 6810 
 6811 // The real do-nothing guy
 6812 pipe_class empty()
 6813 %{
 6814     instruction_count(0);
 6815 %}
 6816 
 6817 // Define the class for the Nop node
 6818 define
 6819 %{
 6820    MachNop = empty;
 6821 %}
 6822 
 6823 %}
 6824 
 6825 //----------INSTRUCTIONS-------------------------------------------------------
 6826 //
 6827 // match      -- States which machine-independent subtree may be replaced
 6828 //               by this instruction.
 6829 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6830 //               selection to identify a minimum cost tree of machine
 6831 //               instructions that matches a tree of machine-independent
 6832 //               instructions.
 6833 // format     -- A string providing the disassembly for this instruction.
 6834 //               The value of an instruction's operand may be inserted
 6835 //               by referring to it with a '$' prefix.
 6836 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6837 //               to within an encode class as $primary, $secondary, and $tertiary
 6838 //               rrspectively.  The primary opcode is commonly used to
 6839 //               indicate the type of machine instruction, while secondary
 6840 //               and tertiary are often used for prefix options or addressing
 6841 //               modes.
 6842 // ins_encode -- A list of encode classes with parameters. The encode class
 6843 //               name must have been defined in an 'enc_class' specification
 6844 //               in the encode section of the architecture description.
 6845 
 6846 // ============================================================================
 6847 
 6848 instruct ShouldNotReachHere() %{
 6849   match(Halt);
 6850   format %{ "stop\t# ShouldNotReachHere" %}
 6851   ins_encode %{
 6852     if (is_reachable()) {
 6853       const char* str = __ code_string(_halt_reason);
 6854       __ stop(str);
 6855     }
 6856   %}
 6857   ins_pipe(pipe_slow);
 6858 %}
 6859 
 6860 // ============================================================================
 6861 
 6862 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6863 // Load Float
 6864 instruct MoveF2VL(vlRegF dst, regF src) %{
 6865   match(Set dst src);
 6866   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6867   ins_encode %{
 6868     ShouldNotReachHere();
 6869   %}
 6870   ins_pipe( fpu_reg_reg );
 6871 %}
 6872 
 6873 // Load Float
 6874 instruct MoveF2LEG(legRegF dst, regF src) %{
 6875   match(Set dst src);
 6876   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6877   ins_encode %{
 6878     ShouldNotReachHere();
 6879   %}
 6880   ins_pipe( fpu_reg_reg );
 6881 %}
 6882 
 6883 // Load Float
 6884 instruct MoveVL2F(regF dst, vlRegF src) %{
 6885   match(Set dst src);
 6886   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6887   ins_encode %{
 6888     ShouldNotReachHere();
 6889   %}
 6890   ins_pipe( fpu_reg_reg );
 6891 %}
 6892 
 6893 // Load Float
 6894 instruct MoveLEG2F(regF dst, legRegF src) %{
 6895   match(Set dst src);
 6896   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6897   ins_encode %{
 6898     ShouldNotReachHere();
 6899   %}
 6900   ins_pipe( fpu_reg_reg );
 6901 %}
 6902 
 6903 // Load Double
 6904 instruct MoveD2VL(vlRegD dst, regD src) %{
 6905   match(Set dst src);
 6906   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6907   ins_encode %{
 6908     ShouldNotReachHere();
 6909   %}
 6910   ins_pipe( fpu_reg_reg );
 6911 %}
 6912 
 6913 // Load Double
 6914 instruct MoveD2LEG(legRegD dst, regD src) %{
 6915   match(Set dst src);
 6916   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6917   ins_encode %{
 6918     ShouldNotReachHere();
 6919   %}
 6920   ins_pipe( fpu_reg_reg );
 6921 %}
 6922 
 6923 // Load Double
 6924 instruct MoveVL2D(regD dst, vlRegD src) %{
 6925   match(Set dst src);
 6926   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6927   ins_encode %{
 6928     ShouldNotReachHere();
 6929   %}
 6930   ins_pipe( fpu_reg_reg );
 6931 %}
 6932 
 6933 // Load Double
 6934 instruct MoveLEG2D(regD dst, legRegD src) %{
 6935   match(Set dst src);
 6936   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6937   ins_encode %{
 6938     ShouldNotReachHere();
 6939   %}
 6940   ins_pipe( fpu_reg_reg );
 6941 %}
 6942 
 6943 //----------Load/Store/Move Instructions---------------------------------------
 6944 //----------Load Instructions--------------------------------------------------
 6945 
 6946 // Load Byte (8 bit signed)
 6947 instruct loadB(rRegI dst, memory mem)
 6948 %{
 6949   match(Set dst (LoadB mem));
 6950 
 6951   ins_cost(125);
 6952   format %{ "movsbl  $dst, $mem\t# byte" %}
 6953 
 6954   ins_encode %{
 6955     __ movsbl($dst$$Register, $mem$$Address);
 6956   %}
 6957 
 6958   ins_pipe(ialu_reg_mem);
 6959 %}
 6960 
 6961 // Load Byte (8 bit signed) into Long Register
 6962 instruct loadB2L(rRegL dst, memory mem)
 6963 %{
 6964   match(Set dst (ConvI2L (LoadB mem)));
 6965 
 6966   ins_cost(125);
 6967   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6968 
 6969   ins_encode %{
 6970     __ movsbq($dst$$Register, $mem$$Address);
 6971   %}
 6972 
 6973   ins_pipe(ialu_reg_mem);
 6974 %}
 6975 
 6976 // Load Unsigned Byte (8 bit UNsigned)
 6977 instruct loadUB(rRegI dst, memory mem)
 6978 %{
 6979   match(Set dst (LoadUB mem));
 6980 
 6981   ins_cost(125);
 6982   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6983 
 6984   ins_encode %{
 6985     __ movzbl($dst$$Register, $mem$$Address);
 6986   %}
 6987 
 6988   ins_pipe(ialu_reg_mem);
 6989 %}
 6990 
 6991 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6992 instruct loadUB2L(rRegL dst, memory mem)
 6993 %{
 6994   match(Set dst (ConvI2L (LoadUB mem)));
 6995 
 6996   ins_cost(125);
 6997   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6998 
 6999   ins_encode %{
 7000     __ movzbq($dst$$Register, $mem$$Address);
 7001   %}
 7002 
 7003   ins_pipe(ialu_reg_mem);
 7004 %}
 7005 
 7006 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 7007 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7008   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 7009   effect(KILL cr);
 7010 
 7011   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7012             "andl    $dst, right_n_bits($mask, 8)" %}
 7013   ins_encode %{
 7014     Register Rdst = $dst$$Register;
 7015     __ movzbq(Rdst, $mem$$Address);
 7016     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7017   %}
 7018   ins_pipe(ialu_reg_mem);
 7019 %}
 7020 
 7021 // Load Short (16 bit signed)
 7022 instruct loadS(rRegI dst, memory mem)
 7023 %{
 7024   match(Set dst (LoadS mem));
 7025 
 7026   ins_cost(125);
 7027   format %{ "movswl $dst, $mem\t# short" %}
 7028 
 7029   ins_encode %{
 7030     __ movswl($dst$$Register, $mem$$Address);
 7031   %}
 7032 
 7033   ins_pipe(ialu_reg_mem);
 7034 %}
 7035 
 7036 // Load Short (16 bit signed) to Byte (8 bit signed)
 7037 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7038   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7039 
 7040   ins_cost(125);
 7041   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7042   ins_encode %{
 7043     __ movsbl($dst$$Register, $mem$$Address);
 7044   %}
 7045   ins_pipe(ialu_reg_mem);
 7046 %}
 7047 
 7048 // Load Short (16 bit signed) into Long Register
 7049 instruct loadS2L(rRegL dst, memory mem)
 7050 %{
 7051   match(Set dst (ConvI2L (LoadS mem)));
 7052 
 7053   ins_cost(125);
 7054   format %{ "movswq $dst, $mem\t# short -> long" %}
 7055 
 7056   ins_encode %{
 7057     __ movswq($dst$$Register, $mem$$Address);
 7058   %}
 7059 
 7060   ins_pipe(ialu_reg_mem);
 7061 %}
 7062 
 7063 // Load Unsigned Short/Char (16 bit UNsigned)
 7064 instruct loadUS(rRegI dst, memory mem)
 7065 %{
 7066   match(Set dst (LoadUS mem));
 7067 
 7068   ins_cost(125);
 7069   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7070 
 7071   ins_encode %{
 7072     __ movzwl($dst$$Register, $mem$$Address);
 7073   %}
 7074 
 7075   ins_pipe(ialu_reg_mem);
 7076 %}
 7077 
 7078 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7079 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7080   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7081 
 7082   ins_cost(125);
 7083   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7084   ins_encode %{
 7085     __ movsbl($dst$$Register, $mem$$Address);
 7086   %}
 7087   ins_pipe(ialu_reg_mem);
 7088 %}
 7089 
 7090 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7091 instruct loadUS2L(rRegL dst, memory mem)
 7092 %{
 7093   match(Set dst (ConvI2L (LoadUS mem)));
 7094 
 7095   ins_cost(125);
 7096   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7097 
 7098   ins_encode %{
 7099     __ movzwq($dst$$Register, $mem$$Address);
 7100   %}
 7101 
 7102   ins_pipe(ialu_reg_mem);
 7103 %}
 7104 
 7105 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7106 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7107   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7108 
 7109   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7110   ins_encode %{
 7111     __ movzbq($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem);
 7114 %}
 7115 
 7116 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7117 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7118   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7119   effect(KILL cr);
 7120 
 7121   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7122             "andl    $dst, right_n_bits($mask, 16)" %}
 7123   ins_encode %{
 7124     Register Rdst = $dst$$Register;
 7125     __ movzwq(Rdst, $mem$$Address);
 7126     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7127   %}
 7128   ins_pipe(ialu_reg_mem);
 7129 %}
 7130 
 7131 // Load Integer
 7132 instruct loadI(rRegI dst, memory mem)
 7133 %{
 7134   match(Set dst (LoadI mem));
 7135 
 7136   ins_cost(125);
 7137   format %{ "movl    $dst, $mem\t# int" %}
 7138 
 7139   ins_encode %{
 7140     __ movl($dst$$Register, $mem$$Address);
 7141   %}
 7142 
 7143   ins_pipe(ialu_reg_mem);
 7144 %}
 7145 
 7146 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7147 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7148   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7149 
 7150   ins_cost(125);
 7151   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7152   ins_encode %{
 7153     __ movsbl($dst$$Register, $mem$$Address);
 7154   %}
 7155   ins_pipe(ialu_reg_mem);
 7156 %}
 7157 
 7158 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7159 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7160   match(Set dst (AndI (LoadI mem) mask));
 7161 
 7162   ins_cost(125);
 7163   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7164   ins_encode %{
 7165     __ movzbl($dst$$Register, $mem$$Address);
 7166   %}
 7167   ins_pipe(ialu_reg_mem);
 7168 %}
 7169 
 7170 // Load Integer (32 bit signed) to Short (16 bit signed)
 7171 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7172   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7173 
 7174   ins_cost(125);
 7175   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7176   ins_encode %{
 7177     __ movswl($dst$$Register, $mem$$Address);
 7178   %}
 7179   ins_pipe(ialu_reg_mem);
 7180 %}
 7181 
 7182 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7183 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7184   match(Set dst (AndI (LoadI mem) mask));
 7185 
 7186   ins_cost(125);
 7187   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7188   ins_encode %{
 7189     __ movzwl($dst$$Register, $mem$$Address);
 7190   %}
 7191   ins_pipe(ialu_reg_mem);
 7192 %}
 7193 
 7194 // Load Integer into Long Register
 7195 instruct loadI2L(rRegL dst, memory mem)
 7196 %{
 7197   match(Set dst (ConvI2L (LoadI mem)));
 7198 
 7199   ins_cost(125);
 7200   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7201 
 7202   ins_encode %{
 7203     __ movslq($dst$$Register, $mem$$Address);
 7204   %}
 7205 
 7206   ins_pipe(ialu_reg_mem);
 7207 %}
 7208 
 7209 // Load Integer with mask 0xFF into Long Register
 7210 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7211   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7212 
 7213   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7214   ins_encode %{
 7215     __ movzbq($dst$$Register, $mem$$Address);
 7216   %}
 7217   ins_pipe(ialu_reg_mem);
 7218 %}
 7219 
 7220 // Load Integer with mask 0xFFFF into Long Register
 7221 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7222   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7223 
 7224   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7225   ins_encode %{
 7226     __ movzwq($dst$$Register, $mem$$Address);
 7227   %}
 7228   ins_pipe(ialu_reg_mem);
 7229 %}
 7230 
 7231 // Load Integer with a 31-bit mask into Long Register
 7232 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7233   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7234   effect(KILL cr);
 7235 
 7236   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7237             "andl    $dst, $mask" %}
 7238   ins_encode %{
 7239     Register Rdst = $dst$$Register;
 7240     __ movl(Rdst, $mem$$Address);
 7241     __ andl(Rdst, $mask$$constant);
 7242   %}
 7243   ins_pipe(ialu_reg_mem);
 7244 %}
 7245 
 7246 // Load Unsigned Integer into Long Register
 7247 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7248 %{
 7249   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7250 
 7251   ins_cost(125);
 7252   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7253 
 7254   ins_encode %{
 7255     __ movl($dst$$Register, $mem$$Address);
 7256   %}
 7257 
 7258   ins_pipe(ialu_reg_mem);
 7259 %}
 7260 
 7261 // Load Long
 7262 instruct loadL(rRegL dst, memory mem)
 7263 %{
 7264   match(Set dst (LoadL mem));
 7265 
 7266   ins_cost(125);
 7267   format %{ "movq    $dst, $mem\t# long" %}
 7268 
 7269   ins_encode %{
 7270     __ movq($dst$$Register, $mem$$Address);
 7271   %}
 7272 
 7273   ins_pipe(ialu_reg_mem); // XXX
 7274 %}
 7275 
 7276 // Load Range
 7277 instruct loadRange(rRegI dst, memory mem)
 7278 %{
 7279   match(Set dst (LoadRange mem));
 7280 
 7281   ins_cost(125); // XXX
 7282   format %{ "movl    $dst, $mem\t# range" %}
 7283   ins_encode %{
 7284     __ movl($dst$$Register, $mem$$Address);
 7285   %}
 7286   ins_pipe(ialu_reg_mem);
 7287 %}
 7288 
 7289 // Load Pointer
 7290 instruct loadP(rRegP dst, memory mem)
 7291 %{
 7292   match(Set dst (LoadP mem));
 7293   predicate(n->as_Load()->barrier_data() == 0);
 7294 
 7295   ins_cost(125); // XXX
 7296   format %{ "movq    $dst, $mem\t# ptr" %}
 7297   ins_encode %{
 7298     __ movq($dst$$Register, $mem$$Address);
 7299   %}
 7300   ins_pipe(ialu_reg_mem); // XXX
 7301 %}
 7302 
 7303 // Load Compressed Pointer
 7304 instruct loadN(rRegN dst, memory mem)
 7305 %{
 7306    predicate(n->as_Load()->barrier_data() == 0);
 7307    match(Set dst (LoadN mem));
 7308 
 7309    ins_cost(125); // XXX
 7310    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7311    ins_encode %{
 7312      __ movl($dst$$Register, $mem$$Address);
 7313    %}
 7314    ins_pipe(ialu_reg_mem); // XXX
 7315 %}
 7316 
 7317 
 7318 // Load Klass Pointer
 7319 instruct loadKlass(rRegP dst, memory mem)
 7320 %{
 7321   match(Set dst (LoadKlass mem));
 7322 
 7323   ins_cost(125); // XXX
 7324   format %{ "movq    $dst, $mem\t# class" %}
 7325   ins_encode %{
 7326     __ movq($dst$$Register, $mem$$Address);
 7327   %}
 7328   ins_pipe(ialu_reg_mem); // XXX
 7329 %}
 7330 
 7331 // Load narrow Klass Pointer
 7332 instruct loadNKlass(rRegN dst, memory mem)
 7333 %{
 7334   predicate(!UseCompactObjectHeaders);
 7335   match(Set dst (LoadNKlass mem));
 7336 
 7337   ins_cost(125); // XXX
 7338   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7339   ins_encode %{
 7340     __ movl($dst$$Register, $mem$$Address);
 7341   %}
 7342   ins_pipe(ialu_reg_mem); // XXX
 7343 %}
 7344 
 7345 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7346 %{
 7347   predicate(UseCompactObjectHeaders);
 7348   match(Set dst (LoadNKlass mem));
 7349   effect(KILL cr);
 7350   ins_cost(125);
 7351   format %{
 7352     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7353     "shrl    $dst, markWord::klass_shift_at_offset"
 7354   %}
 7355   ins_encode %{
 7356     if (UseAPX) {
 7357       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7358     }
 7359     else {
 7360       __ movl($dst$$Register, $mem$$Address);
 7361       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7362     }
 7363   %}
 7364   ins_pipe(ialu_reg_mem);
 7365 %}
 7366 
 7367 // Load Float
 7368 instruct loadF(regF dst, memory mem)
 7369 %{
 7370   match(Set dst (LoadF mem));
 7371 
 7372   ins_cost(145); // XXX
 7373   format %{ "movss   $dst, $mem\t# float" %}
 7374   ins_encode %{
 7375     __ movflt($dst$$XMMRegister, $mem$$Address);
 7376   %}
 7377   ins_pipe(pipe_slow); // XXX
 7378 %}
 7379 
 7380 // Load Double
 7381 instruct loadD_partial(regD dst, memory mem)
 7382 %{
 7383   predicate(!UseXmmLoadAndClearUpper);
 7384   match(Set dst (LoadD mem));
 7385 
 7386   ins_cost(145); // XXX
 7387   format %{ "movlpd  $dst, $mem\t# double" %}
 7388   ins_encode %{
 7389     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7390   %}
 7391   ins_pipe(pipe_slow); // XXX
 7392 %}
 7393 
 7394 instruct loadD(regD dst, memory mem)
 7395 %{
 7396   predicate(UseXmmLoadAndClearUpper);
 7397   match(Set dst (LoadD mem));
 7398 
 7399   ins_cost(145); // XXX
 7400   format %{ "movsd   $dst, $mem\t# double" %}
 7401   ins_encode %{
 7402     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7403   %}
 7404   ins_pipe(pipe_slow); // XXX
 7405 %}
 7406 
 7407 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7408 %{
 7409   match(Set dst con);
 7410 
 7411   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7412 
 7413   ins_encode %{
 7414     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7415   %}
 7416 
 7417   ins_pipe(ialu_reg_fat);
 7418 %}
 7419 
 7420 // max = java.lang.Math.max(float a, float b)
 7421 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7422   predicate(VM_Version::supports_avx10_2());
 7423   match(Set dst (MaxF a b));
 7424   format %{ "maxF $dst, $a, $b" %}
 7425   ins_encode %{
 7426     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7427   %}
 7428   ins_pipe( pipe_slow );
 7429 %}
 7430 
 7431 // max = java.lang.Math.max(float a, float b)
 7432 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7433   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7434   match(Set dst (MaxF a b));
 7435   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7436   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7437   ins_encode %{
 7438     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7439   %}
 7440   ins_pipe( pipe_slow );
 7441 %}
 7442 
 7443 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7444   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7445   match(Set dst (MaxF a b));
 7446   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7447 
 7448   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7449   ins_encode %{
 7450     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7451                     false /*min*/, true /*single*/);
 7452   %}
 7453   ins_pipe( pipe_slow );
 7454 %}
 7455 
 7456 // max = java.lang.Math.max(double a, double b)
 7457 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7458   predicate(VM_Version::supports_avx10_2());
 7459   match(Set dst (MaxD a b));
 7460   format %{ "maxD $dst, $a, $b" %}
 7461   ins_encode %{
 7462     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7463   %}
 7464   ins_pipe( pipe_slow );
 7465 %}
 7466 
 7467 // max = java.lang.Math.max(double a, double b)
 7468 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7469   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7470   match(Set dst (MaxD a b));
 7471   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7472   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7473   ins_encode %{
 7474     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7475   %}
 7476   ins_pipe( pipe_slow );
 7477 %}
 7478 
 7479 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7480   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7481   match(Set dst (MaxD a b));
 7482   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7483 
 7484   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7485   ins_encode %{
 7486     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7487                     false /*min*/, false /*single*/);
 7488   %}
 7489   ins_pipe( pipe_slow );
 7490 %}
 7491 
 7492 // max = java.lang.Math.min(float a, float b)
 7493 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7494   predicate(VM_Version::supports_avx10_2());
 7495   match(Set dst (MinF a b));
 7496   format %{ "minF $dst, $a, $b" %}
 7497   ins_encode %{
 7498     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7499   %}
 7500   ins_pipe( pipe_slow );
 7501 %}
 7502 
 7503 // min = java.lang.Math.min(float a, float b)
 7504 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7505   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7506   match(Set dst (MinF a b));
 7507   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7508   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7509   ins_encode %{
 7510     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7511   %}
 7512   ins_pipe( pipe_slow );
 7513 %}
 7514 
 7515 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7516   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7517   match(Set dst (MinF a b));
 7518   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7519 
 7520   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7521   ins_encode %{
 7522     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7523                     true /*min*/, true /*single*/);
 7524   %}
 7525   ins_pipe( pipe_slow );
 7526 %}
 7527 
 7528 // max = java.lang.Math.min(double a, double b)
 7529 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7530   predicate(VM_Version::supports_avx10_2());
 7531   match(Set dst (MinD a b));
 7532   format %{ "minD $dst, $a, $b" %}
 7533   ins_encode %{
 7534     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7535   %}
 7536   ins_pipe( pipe_slow );
 7537 %}
 7538 
 7539 // min = java.lang.Math.min(double a, double b)
 7540 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7541   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7542   match(Set dst (MinD a b));
 7543   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7544     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7545   ins_encode %{
 7546     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7547   %}
 7548   ins_pipe( pipe_slow );
 7549 %}
 7550 
 7551 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7552   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7553   match(Set dst (MinD a b));
 7554   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7555 
 7556   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7557   ins_encode %{
 7558     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7559                     true /*min*/, false /*single*/);
 7560   %}
 7561   ins_pipe( pipe_slow );
 7562 %}
 7563 
 7564 // Load Effective Address
 7565 instruct leaP8(rRegP dst, indOffset8 mem)
 7566 %{
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110); // XXX
 7570   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaP32(rRegP dst, indOffset32 mem)
 7578 %{
 7579   match(Set dst mem);
 7580 
 7581   ins_cost(110);
 7582   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7583   ins_encode %{
 7584     __ leaq($dst$$Register, $mem$$Address);
 7585   %}
 7586   ins_pipe(ialu_reg_reg_fat);
 7587 %}
 7588 
 7589 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7590 %{
 7591   match(Set dst mem);
 7592 
 7593   ins_cost(110);
 7594   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7595   ins_encode %{
 7596     __ leaq($dst$$Register, $mem$$Address);
 7597   %}
 7598   ins_pipe(ialu_reg_reg_fat);
 7599 %}
 7600 
 7601 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7602 %{
 7603   match(Set dst mem);
 7604 
 7605   ins_cost(110);
 7606   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7607   ins_encode %{
 7608     __ leaq($dst$$Register, $mem$$Address);
 7609   %}
 7610   ins_pipe(ialu_reg_reg_fat);
 7611 %}
 7612 
 7613 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7614 %{
 7615   match(Set dst mem);
 7616 
 7617   ins_cost(110);
 7618   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7619   ins_encode %{
 7620     __ leaq($dst$$Register, $mem$$Address);
 7621   %}
 7622   ins_pipe(ialu_reg_reg_fat);
 7623 %}
 7624 
 7625 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7626 %{
 7627   match(Set dst mem);
 7628 
 7629   ins_cost(110);
 7630   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7631   ins_encode %{
 7632     __ leaq($dst$$Register, $mem$$Address);
 7633   %}
 7634   ins_pipe(ialu_reg_reg_fat);
 7635 %}
 7636 
 7637 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7638 %{
 7639   match(Set dst mem);
 7640 
 7641   ins_cost(110);
 7642   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7643   ins_encode %{
 7644     __ leaq($dst$$Register, $mem$$Address);
 7645   %}
 7646   ins_pipe(ialu_reg_reg_fat);
 7647 %}
 7648 
 7649 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7650 %{
 7651   match(Set dst mem);
 7652 
 7653   ins_cost(110);
 7654   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7655   ins_encode %{
 7656     __ leaq($dst$$Register, $mem$$Address);
 7657   %}
 7658   ins_pipe(ialu_reg_reg_fat);
 7659 %}
 7660 
 7661 // Load Effective Address which uses Narrow (32-bits) oop
 7662 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7663 %{
 7664   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7665   match(Set dst mem);
 7666 
 7667   ins_cost(110);
 7668   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7669   ins_encode %{
 7670     __ leaq($dst$$Register, $mem$$Address);
 7671   %}
 7672   ins_pipe(ialu_reg_reg_fat);
 7673 %}
 7674 
 7675 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7676 %{
 7677   predicate(CompressedOops::shift() == 0);
 7678   match(Set dst mem);
 7679 
 7680   ins_cost(110); // XXX
 7681   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7682   ins_encode %{
 7683     __ leaq($dst$$Register, $mem$$Address);
 7684   %}
 7685   ins_pipe(ialu_reg_reg_fat);
 7686 %}
 7687 
 7688 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7689 %{
 7690   predicate(CompressedOops::shift() == 0);
 7691   match(Set dst mem);
 7692 
 7693   ins_cost(110);
 7694   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7695   ins_encode %{
 7696     __ leaq($dst$$Register, $mem$$Address);
 7697   %}
 7698   ins_pipe(ialu_reg_reg_fat);
 7699 %}
 7700 
 7701 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7702 %{
 7703   predicate(CompressedOops::shift() == 0);
 7704   match(Set dst mem);
 7705 
 7706   ins_cost(110);
 7707   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7708   ins_encode %{
 7709     __ leaq($dst$$Register, $mem$$Address);
 7710   %}
 7711   ins_pipe(ialu_reg_reg_fat);
 7712 %}
 7713 
 7714 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7715 %{
 7716   predicate(CompressedOops::shift() == 0);
 7717   match(Set dst mem);
 7718 
 7719   ins_cost(110);
 7720   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7721   ins_encode %{
 7722     __ leaq($dst$$Register, $mem$$Address);
 7723   %}
 7724   ins_pipe(ialu_reg_reg_fat);
 7725 %}
 7726 
 7727 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7728 %{
 7729   predicate(CompressedOops::shift() == 0);
 7730   match(Set dst mem);
 7731 
 7732   ins_cost(110);
 7733   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7734   ins_encode %{
 7735     __ leaq($dst$$Register, $mem$$Address);
 7736   %}
 7737   ins_pipe(ialu_reg_reg_fat);
 7738 %}
 7739 
 7740 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7741 %{
 7742   predicate(CompressedOops::shift() == 0);
 7743   match(Set dst mem);
 7744 
 7745   ins_cost(110);
 7746   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7747   ins_encode %{
 7748     __ leaq($dst$$Register, $mem$$Address);
 7749   %}
 7750   ins_pipe(ialu_reg_reg_fat);
 7751 %}
 7752 
 7753 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7754 %{
 7755   predicate(CompressedOops::shift() == 0);
 7756   match(Set dst mem);
 7757 
 7758   ins_cost(110);
 7759   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7760   ins_encode %{
 7761     __ leaq($dst$$Register, $mem$$Address);
 7762   %}
 7763   ins_pipe(ialu_reg_reg_fat);
 7764 %}
 7765 
 7766 instruct loadConI(rRegI dst, immI src)
 7767 %{
 7768   match(Set dst src);
 7769 
 7770   format %{ "movl    $dst, $src\t# int" %}
 7771   ins_encode %{
 7772     __ movl($dst$$Register, $src$$constant);
 7773   %}
 7774   ins_pipe(ialu_reg_fat); // XXX
 7775 %}
 7776 
 7777 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7778 %{
 7779   match(Set dst src);
 7780   effect(KILL cr);
 7781 
 7782   ins_cost(50);
 7783   format %{ "xorl    $dst, $dst\t# int" %}
 7784   ins_encode %{
 7785     __ xorl($dst$$Register, $dst$$Register);
 7786   %}
 7787   ins_pipe(ialu_reg);
 7788 %}
 7789 
 7790 instruct loadConL(rRegL dst, immL src)
 7791 %{
 7792   match(Set dst src);
 7793 
 7794   ins_cost(150);
 7795   format %{ "movq    $dst, $src\t# long" %}
 7796   ins_encode %{
 7797     __ mov64($dst$$Register, $src$$constant);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7803 %{
 7804   match(Set dst src);
 7805   effect(KILL cr);
 7806 
 7807   ins_cost(50);
 7808   format %{ "xorl    $dst, $dst\t# long" %}
 7809   ins_encode %{
 7810     __ xorl($dst$$Register, $dst$$Register);
 7811   %}
 7812   ins_pipe(ialu_reg); // XXX
 7813 %}
 7814 
 7815 instruct loadConUL32(rRegL dst, immUL32 src)
 7816 %{
 7817   match(Set dst src);
 7818 
 7819   ins_cost(60);
 7820   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7821   ins_encode %{
 7822     __ movl($dst$$Register, $src$$constant);
 7823   %}
 7824   ins_pipe(ialu_reg);
 7825 %}
 7826 
 7827 instruct loadConL32(rRegL dst, immL32 src)
 7828 %{
 7829   match(Set dst src);
 7830 
 7831   ins_cost(70);
 7832   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7833   ins_encode %{
 7834     __ movq($dst$$Register, $src$$constant);
 7835   %}
 7836   ins_pipe(ialu_reg);
 7837 %}
 7838 
 7839 instruct loadConP(rRegP dst, immP con) %{
 7840   match(Set dst con);
 7841 
 7842   format %{ "movq    $dst, $con\t# ptr" %}
 7843   ins_encode %{
 7844     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7845   %}
 7846   ins_pipe(ialu_reg_fat); // XXX
 7847 %}
 7848 
 7849 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7850 %{
 7851   match(Set dst src);
 7852   effect(KILL cr);
 7853 
 7854   ins_cost(50);
 7855   format %{ "xorl    $dst, $dst\t# ptr" %}
 7856   ins_encode %{
 7857     __ xorl($dst$$Register, $dst$$Register);
 7858   %}
 7859   ins_pipe(ialu_reg);
 7860 %}
 7861 
 7862 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7863 %{
 7864   match(Set dst src);
 7865   effect(KILL cr);
 7866 
 7867   ins_cost(60);
 7868   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7869   ins_encode %{
 7870     __ movl($dst$$Register, $src$$constant);
 7871   %}
 7872   ins_pipe(ialu_reg);
 7873 %}
 7874 
 7875 instruct loadConF(regF dst, immF con) %{
 7876   match(Set dst con);
 7877   ins_cost(125);
 7878   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7879   ins_encode %{
 7880     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7881   %}
 7882   ins_pipe(pipe_slow);
 7883 %}
 7884 
 7885 instruct loadConH(regF dst, immH con) %{
 7886   match(Set dst con);
 7887   ins_cost(125);
 7888   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7889   ins_encode %{
 7890     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7891   %}
 7892   ins_pipe(pipe_slow);
 7893 %}
 7894 
 7895 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7896   match(Set dst src);
 7897   effect(KILL cr);
 7898   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7899   ins_encode %{
 7900     __ xorq($dst$$Register, $dst$$Register);
 7901   %}
 7902   ins_pipe(ialu_reg);
 7903 %}
 7904 
 7905 instruct loadConN(rRegN dst, immN src) %{
 7906   match(Set dst src);
 7907 
 7908   ins_cost(125);
 7909   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7910   ins_encode %{
 7911     address con = (address)$src$$constant;
 7912     if (con == nullptr) {
 7913       ShouldNotReachHere();
 7914     } else {
 7915       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7916     }
 7917   %}
 7918   ins_pipe(ialu_reg_fat); // XXX
 7919 %}
 7920 
 7921 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7922   match(Set dst src);
 7923 
 7924   ins_cost(125);
 7925   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7926   ins_encode %{
 7927     address con = (address)$src$$constant;
 7928     if (con == nullptr) {
 7929       ShouldNotReachHere();
 7930     } else {
 7931       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7932     }
 7933   %}
 7934   ins_pipe(ialu_reg_fat); // XXX
 7935 %}
 7936 
 7937 instruct loadConF0(regF dst, immF0 src)
 7938 %{
 7939   match(Set dst src);
 7940   ins_cost(100);
 7941 
 7942   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7943   ins_encode %{
 7944     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7945   %}
 7946   ins_pipe(pipe_slow);
 7947 %}
 7948 
 7949 // Use the same format since predicate() can not be used here.
 7950 instruct loadConD(regD dst, immD con) %{
 7951   match(Set dst con);
 7952   ins_cost(125);
 7953   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7954   ins_encode %{
 7955     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7956   %}
 7957   ins_pipe(pipe_slow);
 7958 %}
 7959 
 7960 instruct loadConD0(regD dst, immD0 src)
 7961 %{
 7962   match(Set dst src);
 7963   ins_cost(100);
 7964 
 7965   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7966   ins_encode %{
 7967     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7968   %}
 7969   ins_pipe(pipe_slow);
 7970 %}
 7971 
 7972 instruct loadSSI(rRegI dst, stackSlotI src)
 7973 %{
 7974   match(Set dst src);
 7975 
 7976   ins_cost(125);
 7977   format %{ "movl    $dst, $src\t# int stk" %}
 7978   ins_encode %{
 7979     __ movl($dst$$Register, $src$$Address);
 7980   %}
 7981   ins_pipe(ialu_reg_mem);
 7982 %}
 7983 
 7984 instruct loadSSL(rRegL dst, stackSlotL src)
 7985 %{
 7986   match(Set dst src);
 7987 
 7988   ins_cost(125);
 7989   format %{ "movq    $dst, $src\t# long stk" %}
 7990   ins_encode %{
 7991     __ movq($dst$$Register, $src$$Address);
 7992   %}
 7993   ins_pipe(ialu_reg_mem);
 7994 %}
 7995 
 7996 instruct loadSSP(rRegP dst, stackSlotP src)
 7997 %{
 7998   match(Set dst src);
 7999 
 8000   ins_cost(125);
 8001   format %{ "movq    $dst, $src\t# ptr stk" %}
 8002   ins_encode %{
 8003     __ movq($dst$$Register, $src$$Address);
 8004   %}
 8005   ins_pipe(ialu_reg_mem);
 8006 %}
 8007 
 8008 instruct loadSSF(regF dst, stackSlotF src)
 8009 %{
 8010   match(Set dst src);
 8011 
 8012   ins_cost(125);
 8013   format %{ "movss   $dst, $src\t# float stk" %}
 8014   ins_encode %{
 8015     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 8016   %}
 8017   ins_pipe(pipe_slow); // XXX
 8018 %}
 8019 
 8020 // Use the same format since predicate() can not be used here.
 8021 instruct loadSSD(regD dst, stackSlotD src)
 8022 %{
 8023   match(Set dst src);
 8024 
 8025   ins_cost(125);
 8026   format %{ "movsd   $dst, $src\t# double stk" %}
 8027   ins_encode  %{
 8028     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8029   %}
 8030   ins_pipe(pipe_slow); // XXX
 8031 %}
 8032 
 8033 // Prefetch instructions for allocation.
 8034 // Must be safe to execute with invalid address (cannot fault).
 8035 
 8036 instruct prefetchAlloc( memory mem ) %{
 8037   predicate(AllocatePrefetchInstr==3);
 8038   match(PrefetchAllocation mem);
 8039   ins_cost(125);
 8040 
 8041   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8042   ins_encode %{
 8043     __ prefetchw($mem$$Address);
 8044   %}
 8045   ins_pipe(ialu_mem);
 8046 %}
 8047 
 8048 instruct prefetchAllocNTA( memory mem ) %{
 8049   predicate(AllocatePrefetchInstr==0);
 8050   match(PrefetchAllocation mem);
 8051   ins_cost(125);
 8052 
 8053   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8054   ins_encode %{
 8055     __ prefetchnta($mem$$Address);
 8056   %}
 8057   ins_pipe(ialu_mem);
 8058 %}
 8059 
 8060 instruct prefetchAllocT0( memory mem ) %{
 8061   predicate(AllocatePrefetchInstr==1);
 8062   match(PrefetchAllocation mem);
 8063   ins_cost(125);
 8064 
 8065   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8066   ins_encode %{
 8067     __ prefetcht0($mem$$Address);
 8068   %}
 8069   ins_pipe(ialu_mem);
 8070 %}
 8071 
 8072 instruct prefetchAllocT2( memory mem ) %{
 8073   predicate(AllocatePrefetchInstr==2);
 8074   match(PrefetchAllocation mem);
 8075   ins_cost(125);
 8076 
 8077   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8078   ins_encode %{
 8079     __ prefetcht2($mem$$Address);
 8080   %}
 8081   ins_pipe(ialu_mem);
 8082 %}
 8083 
 8084 //----------Store Instructions-------------------------------------------------
 8085 
 8086 // Store Byte
 8087 instruct storeB(memory mem, rRegI src)
 8088 %{
 8089   match(Set mem (StoreB mem src));
 8090 
 8091   ins_cost(125); // XXX
 8092   format %{ "movb    $mem, $src\t# byte" %}
 8093   ins_encode %{
 8094     __ movb($mem$$Address, $src$$Register);
 8095   %}
 8096   ins_pipe(ialu_mem_reg);
 8097 %}
 8098 
 8099 // Store Char/Short
 8100 instruct storeC(memory mem, rRegI src)
 8101 %{
 8102   match(Set mem (StoreC mem src));
 8103 
 8104   ins_cost(125); // XXX
 8105   format %{ "movw    $mem, $src\t# char/short" %}
 8106   ins_encode %{
 8107     __ movw($mem$$Address, $src$$Register);
 8108   %}
 8109   ins_pipe(ialu_mem_reg);
 8110 %}
 8111 
 8112 // Store Integer
 8113 instruct storeI(memory mem, rRegI src)
 8114 %{
 8115   match(Set mem (StoreI mem src));
 8116 
 8117   ins_cost(125); // XXX
 8118   format %{ "movl    $mem, $src\t# int" %}
 8119   ins_encode %{
 8120     __ movl($mem$$Address, $src$$Register);
 8121   %}
 8122   ins_pipe(ialu_mem_reg);
 8123 %}
 8124 
 8125 // Store Long
 8126 instruct storeL(memory mem, rRegL src)
 8127 %{
 8128   match(Set mem (StoreL mem src));
 8129 
 8130   ins_cost(125); // XXX
 8131   format %{ "movq    $mem, $src\t# long" %}
 8132   ins_encode %{
 8133     __ movq($mem$$Address, $src$$Register);
 8134   %}
 8135   ins_pipe(ialu_mem_reg); // XXX
 8136 %}
 8137 
 8138 // Store Pointer
 8139 instruct storeP(memory mem, any_RegP src)
 8140 %{
 8141   predicate(n->as_Store()->barrier_data() == 0);
 8142   match(Set mem (StoreP mem src));
 8143 
 8144   ins_cost(125); // XXX
 8145   format %{ "movq    $mem, $src\t# ptr" %}
 8146   ins_encode %{
 8147     __ movq($mem$$Address, $src$$Register);
 8148   %}
 8149   ins_pipe(ialu_mem_reg);
 8150 %}
 8151 
 8152 instruct storeImmP0(memory mem, immP0 zero)
 8153 %{
 8154   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8155   match(Set mem (StoreP mem zero));
 8156 
 8157   ins_cost(125); // XXX
 8158   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8159   ins_encode %{
 8160     __ movq($mem$$Address, r12);
 8161   %}
 8162   ins_pipe(ialu_mem_reg);
 8163 %}
 8164 
 8165 // Store Null Pointer, mark word, or other simple pointer constant.
 8166 instruct storeImmP(memory mem, immP31 src)
 8167 %{
 8168   predicate(n->as_Store()->barrier_data() == 0);
 8169   match(Set mem (StoreP mem src));
 8170 
 8171   ins_cost(150); // XXX
 8172   format %{ "movq    $mem, $src\t# ptr" %}
 8173   ins_encode %{
 8174     __ movq($mem$$Address, $src$$constant);
 8175   %}
 8176   ins_pipe(ialu_mem_imm);
 8177 %}
 8178 
 8179 // Store Compressed Pointer
 8180 instruct storeN(memory mem, rRegN src)
 8181 %{
 8182   predicate(n->as_Store()->barrier_data() == 0);
 8183   match(Set mem (StoreN mem src));
 8184 
 8185   ins_cost(125); // XXX
 8186   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8187   ins_encode %{
 8188     __ movl($mem$$Address, $src$$Register);
 8189   %}
 8190   ins_pipe(ialu_mem_reg);
 8191 %}
 8192 
 8193 instruct storeNKlass(memory mem, rRegN src)
 8194 %{
 8195   match(Set mem (StoreNKlass mem src));
 8196 
 8197   ins_cost(125); // XXX
 8198   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8199   ins_encode %{
 8200     __ movl($mem$$Address, $src$$Register);
 8201   %}
 8202   ins_pipe(ialu_mem_reg);
 8203 %}
 8204 
 8205 instruct storeImmN0(memory mem, immN0 zero)
 8206 %{
 8207   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8208   match(Set mem (StoreN mem zero));
 8209 
 8210   ins_cost(125); // XXX
 8211   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8212   ins_encode %{
 8213     __ movl($mem$$Address, r12);
 8214   %}
 8215   ins_pipe(ialu_mem_reg);
 8216 %}
 8217 
 8218 instruct storeImmN(memory mem, immN src)
 8219 %{
 8220   predicate(n->as_Store()->barrier_data() == 0);
 8221   match(Set mem (StoreN mem src));
 8222 
 8223   ins_cost(150); // XXX
 8224   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8225   ins_encode %{
 8226     address con = (address)$src$$constant;
 8227     if (con == nullptr) {
 8228       __ movl($mem$$Address, 0);
 8229     } else {
 8230       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8231     }
 8232   %}
 8233   ins_pipe(ialu_mem_imm);
 8234 %}
 8235 
 8236 instruct storeImmNKlass(memory mem, immNKlass src)
 8237 %{
 8238   match(Set mem (StoreNKlass mem src));
 8239 
 8240   ins_cost(150); // XXX
 8241   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8242   ins_encode %{
 8243     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8244   %}
 8245   ins_pipe(ialu_mem_imm);
 8246 %}
 8247 
 8248 // Store Integer Immediate
 8249 instruct storeImmI0(memory mem, immI_0 zero)
 8250 %{
 8251   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8252   match(Set mem (StoreI mem zero));
 8253 
 8254   ins_cost(125); // XXX
 8255   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8256   ins_encode %{
 8257     __ movl($mem$$Address, r12);
 8258   %}
 8259   ins_pipe(ialu_mem_reg);
 8260 %}
 8261 
 8262 instruct storeImmI(memory mem, immI src)
 8263 %{
 8264   match(Set mem (StoreI mem src));
 8265 
 8266   ins_cost(150);
 8267   format %{ "movl    $mem, $src\t# int" %}
 8268   ins_encode %{
 8269     __ movl($mem$$Address, $src$$constant);
 8270   %}
 8271   ins_pipe(ialu_mem_imm);
 8272 %}
 8273 
 8274 // Store Long Immediate
 8275 instruct storeImmL0(memory mem, immL0 zero)
 8276 %{
 8277   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8278   match(Set mem (StoreL mem zero));
 8279 
 8280   ins_cost(125); // XXX
 8281   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8282   ins_encode %{
 8283     __ movq($mem$$Address, r12);
 8284   %}
 8285   ins_pipe(ialu_mem_reg);
 8286 %}
 8287 
 8288 instruct storeImmL(memory mem, immL32 src)
 8289 %{
 8290   match(Set mem (StoreL mem src));
 8291 
 8292   ins_cost(150);
 8293   format %{ "movq    $mem, $src\t# long" %}
 8294   ins_encode %{
 8295     __ movq($mem$$Address, $src$$constant);
 8296   %}
 8297   ins_pipe(ialu_mem_imm);
 8298 %}
 8299 
 8300 // Store Short/Char Immediate
 8301 instruct storeImmC0(memory mem, immI_0 zero)
 8302 %{
 8303   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8304   match(Set mem (StoreC mem zero));
 8305 
 8306   ins_cost(125); // XXX
 8307   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8308   ins_encode %{
 8309     __ movw($mem$$Address, r12);
 8310   %}
 8311   ins_pipe(ialu_mem_reg);
 8312 %}
 8313 
 8314 instruct storeImmI16(memory mem, immI16 src)
 8315 %{
 8316   predicate(UseStoreImmI16);
 8317   match(Set mem (StoreC mem src));
 8318 
 8319   ins_cost(150);
 8320   format %{ "movw    $mem, $src\t# short/char" %}
 8321   ins_encode %{
 8322     __ movw($mem$$Address, $src$$constant);
 8323   %}
 8324   ins_pipe(ialu_mem_imm);
 8325 %}
 8326 
 8327 // Store Byte Immediate
 8328 instruct storeImmB0(memory mem, immI_0 zero)
 8329 %{
 8330   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8331   match(Set mem (StoreB mem zero));
 8332 
 8333   ins_cost(125); // XXX
 8334   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8335   ins_encode %{
 8336     __ movb($mem$$Address, r12);
 8337   %}
 8338   ins_pipe(ialu_mem_reg);
 8339 %}
 8340 
 8341 instruct storeImmB(memory mem, immI8 src)
 8342 %{
 8343   match(Set mem (StoreB mem src));
 8344 
 8345   ins_cost(150); // XXX
 8346   format %{ "movb    $mem, $src\t# byte" %}
 8347   ins_encode %{
 8348     __ movb($mem$$Address, $src$$constant);
 8349   %}
 8350   ins_pipe(ialu_mem_imm);
 8351 %}
 8352 
 8353 // Store Float
 8354 instruct storeF(memory mem, regF src)
 8355 %{
 8356   match(Set mem (StoreF mem src));
 8357 
 8358   ins_cost(95); // XXX
 8359   format %{ "movss   $mem, $src\t# float" %}
 8360   ins_encode %{
 8361     __ movflt($mem$$Address, $src$$XMMRegister);
 8362   %}
 8363   ins_pipe(pipe_slow); // XXX
 8364 %}
 8365 
 8366 // Store immediate Float value (it is faster than store from XMM register)
 8367 instruct storeF0(memory mem, immF0 zero)
 8368 %{
 8369   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8370   match(Set mem (StoreF mem zero));
 8371 
 8372   ins_cost(25); // XXX
 8373   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8374   ins_encode %{
 8375     __ movl($mem$$Address, r12);
 8376   %}
 8377   ins_pipe(ialu_mem_reg);
 8378 %}
 8379 
 8380 instruct storeF_imm(memory mem, immF src)
 8381 %{
 8382   match(Set mem (StoreF mem src));
 8383 
 8384   ins_cost(50);
 8385   format %{ "movl    $mem, $src\t# float" %}
 8386   ins_encode %{
 8387     __ movl($mem$$Address, jint_cast($src$$constant));
 8388   %}
 8389   ins_pipe(ialu_mem_imm);
 8390 %}
 8391 
 8392 // Store Double
 8393 instruct storeD(memory mem, regD src)
 8394 %{
 8395   match(Set mem (StoreD mem src));
 8396 
 8397   ins_cost(95); // XXX
 8398   format %{ "movsd   $mem, $src\t# double" %}
 8399   ins_encode %{
 8400     __ movdbl($mem$$Address, $src$$XMMRegister);
 8401   %}
 8402   ins_pipe(pipe_slow); // XXX
 8403 %}
 8404 
 8405 // Store immediate double 0.0 (it is faster than store from XMM register)
 8406 instruct storeD0_imm(memory mem, immD0 src)
 8407 %{
 8408   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8409   match(Set mem (StoreD mem src));
 8410 
 8411   ins_cost(50);
 8412   format %{ "movq    $mem, $src\t# double 0." %}
 8413   ins_encode %{
 8414     __ movq($mem$$Address, $src$$constant);
 8415   %}
 8416   ins_pipe(ialu_mem_imm);
 8417 %}
 8418 
 8419 instruct storeD0(memory mem, immD0 zero)
 8420 %{
 8421   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8422   match(Set mem (StoreD mem zero));
 8423 
 8424   ins_cost(25); // XXX
 8425   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8426   ins_encode %{
 8427     __ movq($mem$$Address, r12);
 8428   %}
 8429   ins_pipe(ialu_mem_reg);
 8430 %}
 8431 
 8432 instruct storeSSI(stackSlotI dst, rRegI src)
 8433 %{
 8434   match(Set dst src);
 8435 
 8436   ins_cost(100);
 8437   format %{ "movl    $dst, $src\t# int stk" %}
 8438   ins_encode %{
 8439     __ movl($dst$$Address, $src$$Register);
 8440   %}
 8441   ins_pipe( ialu_mem_reg );
 8442 %}
 8443 
 8444 instruct storeSSL(stackSlotL dst, rRegL src)
 8445 %{
 8446   match(Set dst src);
 8447 
 8448   ins_cost(100);
 8449   format %{ "movq    $dst, $src\t# long stk" %}
 8450   ins_encode %{
 8451     __ movq($dst$$Address, $src$$Register);
 8452   %}
 8453   ins_pipe(ialu_mem_reg);
 8454 %}
 8455 
 8456 instruct storeSSP(stackSlotP dst, rRegP src)
 8457 %{
 8458   match(Set dst src);
 8459 
 8460   ins_cost(100);
 8461   format %{ "movq    $dst, $src\t# ptr stk" %}
 8462   ins_encode %{
 8463     __ movq($dst$$Address, $src$$Register);
 8464   %}
 8465   ins_pipe(ialu_mem_reg);
 8466 %}
 8467 
 8468 instruct storeSSF(stackSlotF dst, regF src)
 8469 %{
 8470   match(Set dst src);
 8471 
 8472   ins_cost(95); // XXX
 8473   format %{ "movss   $dst, $src\t# float stk" %}
 8474   ins_encode %{
 8475     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8476   %}
 8477   ins_pipe(pipe_slow); // XXX
 8478 %}
 8479 
 8480 instruct storeSSD(stackSlotD dst, regD src)
 8481 %{
 8482   match(Set dst src);
 8483 
 8484   ins_cost(95); // XXX
 8485   format %{ "movsd   $dst, $src\t# double stk" %}
 8486   ins_encode %{
 8487     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8488   %}
 8489   ins_pipe(pipe_slow); // XXX
 8490 %}
 8491 
 8492 instruct cacheWB(indirect addr)
 8493 %{
 8494   predicate(VM_Version::supports_data_cache_line_flush());
 8495   match(CacheWB addr);
 8496 
 8497   ins_cost(100);
 8498   format %{"cache wb $addr" %}
 8499   ins_encode %{
 8500     assert($addr->index_position() < 0, "should be");
 8501     assert($addr$$disp == 0, "should be");
 8502     __ cache_wb(Address($addr$$base$$Register, 0));
 8503   %}
 8504   ins_pipe(pipe_slow); // XXX
 8505 %}
 8506 
 8507 instruct cacheWBPreSync()
 8508 %{
 8509   predicate(VM_Version::supports_data_cache_line_flush());
 8510   match(CacheWBPreSync);
 8511 
 8512   ins_cost(100);
 8513   format %{"cache wb presync" %}
 8514   ins_encode %{
 8515     __ cache_wbsync(true);
 8516   %}
 8517   ins_pipe(pipe_slow); // XXX
 8518 %}
 8519 
 8520 instruct cacheWBPostSync()
 8521 %{
 8522   predicate(VM_Version::supports_data_cache_line_flush());
 8523   match(CacheWBPostSync);
 8524 
 8525   ins_cost(100);
 8526   format %{"cache wb postsync" %}
 8527   ins_encode %{
 8528     __ cache_wbsync(false);
 8529   %}
 8530   ins_pipe(pipe_slow); // XXX
 8531 %}
 8532 
 8533 //----------BSWAP Instructions-------------------------------------------------
 8534 instruct bytes_reverse_int(rRegI dst) %{
 8535   match(Set dst (ReverseBytesI dst));
 8536 
 8537   format %{ "bswapl  $dst" %}
 8538   ins_encode %{
 8539     __ bswapl($dst$$Register);
 8540   %}
 8541   ins_pipe( ialu_reg );
 8542 %}
 8543 
 8544 instruct bytes_reverse_long(rRegL dst) %{
 8545   match(Set dst (ReverseBytesL dst));
 8546 
 8547   format %{ "bswapq  $dst" %}
 8548   ins_encode %{
 8549     __ bswapq($dst$$Register);
 8550   %}
 8551   ins_pipe( ialu_reg);
 8552 %}
 8553 
 8554 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8555   match(Set dst (ReverseBytesUS dst));
 8556   effect(KILL cr);
 8557 
 8558   format %{ "bswapl  $dst\n\t"
 8559             "shrl    $dst,16\n\t" %}
 8560   ins_encode %{
 8561     __ bswapl($dst$$Register);
 8562     __ shrl($dst$$Register, 16);
 8563   %}
 8564   ins_pipe( ialu_reg );
 8565 %}
 8566 
 8567 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8568   match(Set dst (ReverseBytesS dst));
 8569   effect(KILL cr);
 8570 
 8571   format %{ "bswapl  $dst\n\t"
 8572             "sar     $dst,16\n\t" %}
 8573   ins_encode %{
 8574     __ bswapl($dst$$Register);
 8575     __ sarl($dst$$Register, 16);
 8576   %}
 8577   ins_pipe( ialu_reg );
 8578 %}
 8579 
 8580 //---------- Zeros Count Instructions ------------------------------------------
 8581 
 8582 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8583   predicate(UseCountLeadingZerosInstruction);
 8584   match(Set dst (CountLeadingZerosI src));
 8585   effect(KILL cr);
 8586 
 8587   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8588   ins_encode %{
 8589     __ lzcntl($dst$$Register, $src$$Register);
 8590   %}
 8591   ins_pipe(ialu_reg);
 8592 %}
 8593 
 8594 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8595   predicate(UseCountLeadingZerosInstruction);
 8596   match(Set dst (CountLeadingZerosI (LoadI src)));
 8597   effect(KILL cr);
 8598   ins_cost(175);
 8599   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8600   ins_encode %{
 8601     __ lzcntl($dst$$Register, $src$$Address);
 8602   %}
 8603   ins_pipe(ialu_reg_mem);
 8604 %}
 8605 
 8606 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8607   predicate(!UseCountLeadingZerosInstruction);
 8608   match(Set dst (CountLeadingZerosI src));
 8609   effect(KILL cr);
 8610 
 8611   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8612             "jnz     skip\n\t"
 8613             "movl    $dst, -1\n"
 8614       "skip:\n\t"
 8615             "negl    $dst\n\t"
 8616             "addl    $dst, 31" %}
 8617   ins_encode %{
 8618     Register Rdst = $dst$$Register;
 8619     Register Rsrc = $src$$Register;
 8620     Label skip;
 8621     __ bsrl(Rdst, Rsrc);
 8622     __ jccb(Assembler::notZero, skip);
 8623     __ movl(Rdst, -1);
 8624     __ bind(skip);
 8625     __ negl(Rdst);
 8626     __ addl(Rdst, BitsPerInt - 1);
 8627   %}
 8628   ins_pipe(ialu_reg);
 8629 %}
 8630 
 8631 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8632   predicate(UseCountLeadingZerosInstruction);
 8633   match(Set dst (CountLeadingZerosL src));
 8634   effect(KILL cr);
 8635 
 8636   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8637   ins_encode %{
 8638     __ lzcntq($dst$$Register, $src$$Register);
 8639   %}
 8640   ins_pipe(ialu_reg);
 8641 %}
 8642 
 8643 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8644   predicate(UseCountLeadingZerosInstruction);
 8645   match(Set dst (CountLeadingZerosL (LoadL src)));
 8646   effect(KILL cr);
 8647   ins_cost(175);
 8648   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8649   ins_encode %{
 8650     __ lzcntq($dst$$Register, $src$$Address);
 8651   %}
 8652   ins_pipe(ialu_reg_mem);
 8653 %}
 8654 
 8655 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8656   predicate(!UseCountLeadingZerosInstruction);
 8657   match(Set dst (CountLeadingZerosL src));
 8658   effect(KILL cr);
 8659 
 8660   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8661             "jnz     skip\n\t"
 8662             "movl    $dst, -1\n"
 8663       "skip:\n\t"
 8664             "negl    $dst\n\t"
 8665             "addl    $dst, 63" %}
 8666   ins_encode %{
 8667     Register Rdst = $dst$$Register;
 8668     Register Rsrc = $src$$Register;
 8669     Label skip;
 8670     __ bsrq(Rdst, Rsrc);
 8671     __ jccb(Assembler::notZero, skip);
 8672     __ movl(Rdst, -1);
 8673     __ bind(skip);
 8674     __ negl(Rdst);
 8675     __ addl(Rdst, BitsPerLong - 1);
 8676   %}
 8677   ins_pipe(ialu_reg);
 8678 %}
 8679 
 8680 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8681   predicate(UseCountTrailingZerosInstruction);
 8682   match(Set dst (CountTrailingZerosI src));
 8683   effect(KILL cr);
 8684 
 8685   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8686   ins_encode %{
 8687     __ tzcntl($dst$$Register, $src$$Register);
 8688   %}
 8689   ins_pipe(ialu_reg);
 8690 %}
 8691 
 8692 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8693   predicate(UseCountTrailingZerosInstruction);
 8694   match(Set dst (CountTrailingZerosI (LoadI src)));
 8695   effect(KILL cr);
 8696   ins_cost(175);
 8697   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8698   ins_encode %{
 8699     __ tzcntl($dst$$Register, $src$$Address);
 8700   %}
 8701   ins_pipe(ialu_reg_mem);
 8702 %}
 8703 
 8704 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8705   predicate(!UseCountTrailingZerosInstruction);
 8706   match(Set dst (CountTrailingZerosI src));
 8707   effect(KILL cr);
 8708 
 8709   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8710             "jnz     done\n\t"
 8711             "movl    $dst, 32\n"
 8712       "done:" %}
 8713   ins_encode %{
 8714     Register Rdst = $dst$$Register;
 8715     Label done;
 8716     __ bsfl(Rdst, $src$$Register);
 8717     __ jccb(Assembler::notZero, done);
 8718     __ movl(Rdst, BitsPerInt);
 8719     __ bind(done);
 8720   %}
 8721   ins_pipe(ialu_reg);
 8722 %}
 8723 
 8724 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8725   predicate(UseCountTrailingZerosInstruction);
 8726   match(Set dst (CountTrailingZerosL src));
 8727   effect(KILL cr);
 8728 
 8729   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8730   ins_encode %{
 8731     __ tzcntq($dst$$Register, $src$$Register);
 8732   %}
 8733   ins_pipe(ialu_reg);
 8734 %}
 8735 
 8736 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8737   predicate(UseCountTrailingZerosInstruction);
 8738   match(Set dst (CountTrailingZerosL (LoadL src)));
 8739   effect(KILL cr);
 8740   ins_cost(175);
 8741   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8742   ins_encode %{
 8743     __ tzcntq($dst$$Register, $src$$Address);
 8744   %}
 8745   ins_pipe(ialu_reg_mem);
 8746 %}
 8747 
 8748 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8749   predicate(!UseCountTrailingZerosInstruction);
 8750   match(Set dst (CountTrailingZerosL src));
 8751   effect(KILL cr);
 8752 
 8753   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8754             "jnz     done\n\t"
 8755             "movl    $dst, 64\n"
 8756       "done:" %}
 8757   ins_encode %{
 8758     Register Rdst = $dst$$Register;
 8759     Label done;
 8760     __ bsfq(Rdst, $src$$Register);
 8761     __ jccb(Assembler::notZero, done);
 8762     __ movl(Rdst, BitsPerLong);
 8763     __ bind(done);
 8764   %}
 8765   ins_pipe(ialu_reg);
 8766 %}
 8767 
 8768 //--------------- Reverse Operation Instructions ----------------
 8769 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8770   predicate(!VM_Version::supports_gfni());
 8771   match(Set dst (ReverseI src));
 8772   effect(TEMP dst, TEMP rtmp, KILL cr);
 8773   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8774   ins_encode %{
 8775     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8776   %}
 8777   ins_pipe( ialu_reg );
 8778 %}
 8779 
 8780 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8781   predicate(VM_Version::supports_gfni());
 8782   match(Set dst (ReverseI src));
 8783   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8784   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8785   ins_encode %{
 8786     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8787   %}
 8788   ins_pipe( ialu_reg );
 8789 %}
 8790 
 8791 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8792   predicate(!VM_Version::supports_gfni());
 8793   match(Set dst (ReverseL src));
 8794   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8795   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8796   ins_encode %{
 8797     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8798   %}
 8799   ins_pipe( ialu_reg );
 8800 %}
 8801 
 8802 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8803   predicate(VM_Version::supports_gfni());
 8804   match(Set dst (ReverseL src));
 8805   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8806   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8807   ins_encode %{
 8808     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8809   %}
 8810   ins_pipe( ialu_reg );
 8811 %}
 8812 
 8813 //---------- Population Count Instructions -------------------------------------
 8814 
 8815 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8816   predicate(UsePopCountInstruction);
 8817   match(Set dst (PopCountI src));
 8818   effect(KILL cr);
 8819 
 8820   format %{ "popcnt  $dst, $src" %}
 8821   ins_encode %{
 8822     __ popcntl($dst$$Register, $src$$Register);
 8823   %}
 8824   ins_pipe(ialu_reg);
 8825 %}
 8826 
 8827 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8828   predicate(UsePopCountInstruction);
 8829   match(Set dst (PopCountI (LoadI mem)));
 8830   effect(KILL cr);
 8831 
 8832   format %{ "popcnt  $dst, $mem" %}
 8833   ins_encode %{
 8834     __ popcntl($dst$$Register, $mem$$Address);
 8835   %}
 8836   ins_pipe(ialu_reg);
 8837 %}
 8838 
 8839 // Note: Long.bitCount(long) returns an int.
 8840 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8841   predicate(UsePopCountInstruction);
 8842   match(Set dst (PopCountL src));
 8843   effect(KILL cr);
 8844 
 8845   format %{ "popcnt  $dst, $src" %}
 8846   ins_encode %{
 8847     __ popcntq($dst$$Register, $src$$Register);
 8848   %}
 8849   ins_pipe(ialu_reg);
 8850 %}
 8851 
 8852 // Note: Long.bitCount(long) returns an int.
 8853 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8854   predicate(UsePopCountInstruction);
 8855   match(Set dst (PopCountL (LoadL mem)));
 8856   effect(KILL cr);
 8857 
 8858   format %{ "popcnt  $dst, $mem" %}
 8859   ins_encode %{
 8860     __ popcntq($dst$$Register, $mem$$Address);
 8861   %}
 8862   ins_pipe(ialu_reg);
 8863 %}
 8864 
 8865 
 8866 //----------MemBar Instructions-----------------------------------------------
 8867 // Memory barrier flavors
 8868 
 8869 instruct membar_acquire()
 8870 %{
 8871   match(MemBarAcquire);
 8872   match(LoadFence);
 8873   ins_cost(0);
 8874 
 8875   size(0);
 8876   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8877   ins_encode();
 8878   ins_pipe(empty);
 8879 %}
 8880 
 8881 instruct membar_acquire_lock()
 8882 %{
 8883   match(MemBarAcquireLock);
 8884   ins_cost(0);
 8885 
 8886   size(0);
 8887   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8888   ins_encode();
 8889   ins_pipe(empty);
 8890 %}
 8891 
 8892 instruct membar_release()
 8893 %{
 8894   match(MemBarRelease);
 8895   match(StoreFence);
 8896   ins_cost(0);
 8897 
 8898   size(0);
 8899   format %{ "MEMBAR-release ! (empty encoding)" %}
 8900   ins_encode();
 8901   ins_pipe(empty);
 8902 %}
 8903 
 8904 instruct membar_release_lock()
 8905 %{
 8906   match(MemBarReleaseLock);
 8907   ins_cost(0);
 8908 
 8909   size(0);
 8910   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8911   ins_encode();
 8912   ins_pipe(empty);
 8913 %}
 8914 
 8915 instruct membar_volatile(rFlagsReg cr) %{
 8916   match(MemBarVolatile);
 8917   effect(KILL cr);
 8918   ins_cost(400);
 8919 
 8920   format %{
 8921     $$template
 8922     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8923   %}
 8924   ins_encode %{
 8925     __ membar(Assembler::StoreLoad);
 8926   %}
 8927   ins_pipe(pipe_slow);
 8928 %}
 8929 
 8930 instruct unnecessary_membar_volatile()
 8931 %{
 8932   match(MemBarVolatile);
 8933   predicate(Matcher::post_store_load_barrier(n));
 8934   ins_cost(0);
 8935 
 8936   size(0);
 8937   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8938   ins_encode();
 8939   ins_pipe(empty);
 8940 %}
 8941 
 8942 instruct membar_storestore() %{
 8943   match(MemBarStoreStore);
 8944   match(StoreStoreFence);
 8945   ins_cost(0);
 8946 
 8947   size(0);
 8948   format %{ "MEMBAR-storestore (empty encoding)" %}
 8949   ins_encode( );
 8950   ins_pipe(empty);
 8951 %}
 8952 
 8953 //----------Move Instructions--------------------------------------------------
 8954 
 8955 instruct castX2P(rRegP dst, rRegL src)
 8956 %{
 8957   match(Set dst (CastX2P src));
 8958 
 8959   format %{ "movq    $dst, $src\t# long->ptr" %}
 8960   ins_encode %{
 8961     if ($dst$$reg != $src$$reg) {
 8962       __ movptr($dst$$Register, $src$$Register);
 8963     }
 8964   %}
 8965   ins_pipe(ialu_reg_reg); // XXX
 8966 %}
 8967 
 8968 instruct castI2N(rRegN dst, rRegI src)
 8969 %{
 8970   match(Set dst (CastI2N src));
 8971 
 8972   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8973   ins_encode %{
 8974     if ($dst$$reg != $src$$reg) {
 8975       __ movl($dst$$Register, $src$$Register);
 8976     }
 8977   %}
 8978   ins_pipe(ialu_reg_reg); // XXX
 8979 %}
 8980 
 8981 instruct castN2X(rRegL dst, rRegN src)
 8982 %{
 8983   match(Set dst (CastP2X src));
 8984 
 8985   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8986   ins_encode %{
 8987     if ($dst$$reg != $src$$reg) {
 8988       __ movptr($dst$$Register, $src$$Register);
 8989     }
 8990   %}
 8991   ins_pipe(ialu_reg_reg); // XXX
 8992 %}
 8993 
 8994 instruct castP2X(rRegL dst, rRegP src)
 8995 %{
 8996   match(Set dst (CastP2X src));
 8997 
 8998   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8999   ins_encode %{
 9000     if ($dst$$reg != $src$$reg) {
 9001       __ movptr($dst$$Register, $src$$Register);
 9002     }
 9003   %}
 9004   ins_pipe(ialu_reg_reg); // XXX
 9005 %}
 9006 
 9007 // Convert oop into int for vectors alignment masking
 9008 instruct convP2I(rRegI dst, rRegP src)
 9009 %{
 9010   match(Set dst (ConvL2I (CastP2X src)));
 9011 
 9012   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9013   ins_encode %{
 9014     __ movl($dst$$Register, $src$$Register);
 9015   %}
 9016   ins_pipe(ialu_reg_reg); // XXX
 9017 %}
 9018 
 9019 // Convert compressed oop into int for vectors alignment masking
 9020 // in case of 32bit oops (heap < 4Gb).
 9021 instruct convN2I(rRegI dst, rRegN src)
 9022 %{
 9023   predicate(CompressedOops::shift() == 0);
 9024   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9025 
 9026   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9027   ins_encode %{
 9028     __ movl($dst$$Register, $src$$Register);
 9029   %}
 9030   ins_pipe(ialu_reg_reg); // XXX
 9031 %}
 9032 
 9033 // Convert oop pointer into compressed form
 9034 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9035   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9036   match(Set dst (EncodeP src));
 9037   effect(KILL cr);
 9038   format %{ "encode_heap_oop $dst,$src" %}
 9039   ins_encode %{
 9040     Register s = $src$$Register;
 9041     Register d = $dst$$Register;
 9042     if (s != d) {
 9043       __ movq(d, s);
 9044     }
 9045     __ encode_heap_oop(d);
 9046   %}
 9047   ins_pipe(ialu_reg_long);
 9048 %}
 9049 
 9050 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9051   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9052   match(Set dst (EncodeP src));
 9053   effect(KILL cr);
 9054   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9055   ins_encode %{
 9056     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9057   %}
 9058   ins_pipe(ialu_reg_long);
 9059 %}
 9060 
 9061 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9062   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9063             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9064   match(Set dst (DecodeN src));
 9065   effect(KILL cr);
 9066   format %{ "decode_heap_oop $dst,$src" %}
 9067   ins_encode %{
 9068     Register s = $src$$Register;
 9069     Register d = $dst$$Register;
 9070     if (s != d) {
 9071       __ movq(d, s);
 9072     }
 9073     __ decode_heap_oop(d);
 9074   %}
 9075   ins_pipe(ialu_reg_long);
 9076 %}
 9077 
 9078 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9079   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9080             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9081   match(Set dst (DecodeN src));
 9082   effect(KILL cr);
 9083   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9084   ins_encode %{
 9085     Register s = $src$$Register;
 9086     Register d = $dst$$Register;
 9087     if (s != d) {
 9088       __ decode_heap_oop_not_null(d, s);
 9089     } else {
 9090       __ decode_heap_oop_not_null(d);
 9091     }
 9092   %}
 9093   ins_pipe(ialu_reg_long);
 9094 %}
 9095 
 9096 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9097   match(Set dst (EncodePKlass src));
 9098   effect(TEMP dst, KILL cr);
 9099   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9100   ins_encode %{
 9101     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9102   %}
 9103   ins_pipe(ialu_reg_long);
 9104 %}
 9105 
 9106 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9107   match(Set dst (DecodeNKlass src));
 9108   effect(TEMP dst, KILL cr);
 9109   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9110   ins_encode %{
 9111     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9112   %}
 9113   ins_pipe(ialu_reg_long);
 9114 %}
 9115 
 9116 //----------Conditional Move---------------------------------------------------
 9117 // Jump
 9118 // dummy instruction for generating temp registers
 9119 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9120   match(Jump (LShiftL switch_val shift));
 9121   ins_cost(350);
 9122   predicate(false);
 9123   effect(TEMP dest);
 9124 
 9125   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9126             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9127   ins_encode %{
 9128     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9129     // to do that and the compiler is using that register as one it can allocate.
 9130     // So we build it all by hand.
 9131     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9132     // ArrayAddress dispatch(table, index);
 9133     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9134     __ lea($dest$$Register, $constantaddress);
 9135     __ jmp(dispatch);
 9136   %}
 9137   ins_pipe(pipe_jmp);
 9138 %}
 9139 
 9140 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9141   match(Jump (AddL (LShiftL switch_val shift) offset));
 9142   ins_cost(350);
 9143   effect(TEMP dest);
 9144 
 9145   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9146             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9147   ins_encode %{
 9148     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9149     // to do that and the compiler is using that register as one it can allocate.
 9150     // So we build it all by hand.
 9151     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9152     // ArrayAddress dispatch(table, index);
 9153     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9154     __ lea($dest$$Register, $constantaddress);
 9155     __ jmp(dispatch);
 9156   %}
 9157   ins_pipe(pipe_jmp);
 9158 %}
 9159 
 9160 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9161   match(Jump switch_val);
 9162   ins_cost(350);
 9163   effect(TEMP dest);
 9164 
 9165   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9166             "jmp     [$dest + $switch_val]\n\t" %}
 9167   ins_encode %{
 9168     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9169     // to do that and the compiler is using that register as one it can allocate.
 9170     // So we build it all by hand.
 9171     // Address index(noreg, switch_reg, Address::times_1);
 9172     // ArrayAddress dispatch(table, index);
 9173     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9174     __ lea($dest$$Register, $constantaddress);
 9175     __ jmp(dispatch);
 9176   %}
 9177   ins_pipe(pipe_jmp);
 9178 %}
 9179 
 9180 // Conditional move
 9181 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9182 %{
 9183   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9184   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9185 
 9186   ins_cost(100); // XXX
 9187   format %{ "setbn$cop $dst\t# signed, int" %}
 9188   ins_encode %{
 9189     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9190     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9191   %}
 9192   ins_pipe(ialu_reg);
 9193 %}
 9194 
 9195 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9196 %{
 9197   predicate(!UseAPX);
 9198   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9199 
 9200   ins_cost(200); // XXX
 9201   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9202   ins_encode %{
 9203     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9204   %}
 9205   ins_pipe(pipe_cmov_reg);
 9206 %}
 9207 
 9208 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9209 %{
 9210   predicate(UseAPX);
 9211   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9212 
 9213   ins_cost(200);
 9214   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9215   ins_encode %{
 9216     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9217   %}
 9218   ins_pipe(pipe_cmov_reg);
 9219 %}
 9220 
 9221 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9222 %{
 9223   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9224   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9225 
 9226   ins_cost(100); // XXX
 9227   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9228   ins_encode %{
 9229     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9230     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9231   %}
 9232   ins_pipe(ialu_reg);
 9233 %}
 9234 
 9235 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9236   predicate(!UseAPX);
 9237   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9238 
 9239   ins_cost(200); // XXX
 9240   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9241   ins_encode %{
 9242     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9243   %}
 9244   ins_pipe(pipe_cmov_reg);
 9245 %}
 9246 
 9247 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9248   predicate(UseAPX);
 9249   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9250 
 9251   ins_cost(200);
 9252   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9253   ins_encode %{
 9254     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9255   %}
 9256   ins_pipe(pipe_cmov_reg);
 9257 %}
 9258 
 9259 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9260 %{
 9261   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9262   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9263 
 9264   ins_cost(100); // XXX
 9265   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9266   ins_encode %{
 9267     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9268     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9269   %}
 9270   ins_pipe(ialu_reg);
 9271 %}
 9272 
 9273 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9274 %{
 9275   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9276   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9277 
 9278   ins_cost(100); // XXX
 9279   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9280   ins_encode %{
 9281     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9282     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9283   %}
 9284   ins_pipe(ialu_reg);
 9285 %}
 9286 
 9287 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9288   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9289 
 9290   ins_cost(200);
 9291   expand %{
 9292     cmovI_regU(cop, cr, dst, src);
 9293   %}
 9294 %}
 9295 
 9296 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9297   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9298 
 9299   ins_cost(200);
 9300   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9301   ins_encode %{
 9302     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9303   %}
 9304   ins_pipe(pipe_cmov_reg);
 9305 %}
 9306 
 9307 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9308   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9309   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9310 
 9311   ins_cost(200); // XXX
 9312   format %{ "cmovpl  $dst, $src\n\t"
 9313             "cmovnel $dst, $src" %}
 9314   ins_encode %{
 9315     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9316     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9317   %}
 9318   ins_pipe(pipe_cmov_reg);
 9319 %}
 9320 
 9321 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9322 // inputs of the CMove
 9323 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9324   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9325   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9326   effect(TEMP dst);
 9327 
 9328   ins_cost(200); // XXX
 9329   format %{ "cmovpl  $dst, $src\n\t"
 9330             "cmovnel $dst, $src" %}
 9331   ins_encode %{
 9332     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9333     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9334   %}
 9335   ins_pipe(pipe_cmov_reg);
 9336 %}
 9337 
 9338 // Conditional move
 9339 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9340   predicate(!UseAPX);
 9341   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9342 
 9343   ins_cost(250); // XXX
 9344   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9345   ins_encode %{
 9346     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9347   %}
 9348   ins_pipe(pipe_cmov_mem);
 9349 %}
 9350 
 9351 // Conditional move
 9352 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9353 %{
 9354   predicate(UseAPX);
 9355   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9356 
 9357   ins_cost(250);
 9358   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9359   ins_encode %{
 9360     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9361   %}
 9362   ins_pipe(pipe_cmov_mem);
 9363 %}
 9364 
 9365 // Conditional move
 9366 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9367 %{
 9368   predicate(!UseAPX);
 9369   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9370 
 9371   ins_cost(250); // XXX
 9372   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9373   ins_encode %{
 9374     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9375   %}
 9376   ins_pipe(pipe_cmov_mem);
 9377 %}
 9378 
 9379 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9380   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9381 
 9382   ins_cost(250);
 9383   expand %{
 9384     cmovI_memU(cop, cr, dst, src);
 9385   %}
 9386 %}
 9387 
 9388 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9389 %{
 9390   predicate(UseAPX);
 9391   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9392 
 9393   ins_cost(250);
 9394   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9395   ins_encode %{
 9396     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9397   %}
 9398   ins_pipe(pipe_cmov_mem);
 9399 %}
 9400 
 9401 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9402 %{
 9403   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9404 
 9405   ins_cost(250);
 9406   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9407   ins_encode %{
 9408     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9409   %}
 9410   ins_pipe(pipe_cmov_mem);
 9411 %}
 9412 
 9413 // Conditional move
 9414 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9415 %{
 9416   predicate(!UseAPX);
 9417   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9418 
 9419   ins_cost(200); // XXX
 9420   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9421   ins_encode %{
 9422     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9423   %}
 9424   ins_pipe(pipe_cmov_reg);
 9425 %}
 9426 
 9427 // Conditional move ndd
 9428 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9429 %{
 9430   predicate(UseAPX);
 9431   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9432 
 9433   ins_cost(200);
 9434   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9435   ins_encode %{
 9436     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9437   %}
 9438   ins_pipe(pipe_cmov_reg);
 9439 %}
 9440 
 9441 // Conditional move
 9442 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9443 %{
 9444   predicate(!UseAPX);
 9445   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9446 
 9447   ins_cost(200); // XXX
 9448   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9449   ins_encode %{
 9450     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9451   %}
 9452   ins_pipe(pipe_cmov_reg);
 9453 %}
 9454 
 9455 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9456   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9457 
 9458   ins_cost(200);
 9459   expand %{
 9460     cmovN_regU(cop, cr, dst, src);
 9461   %}
 9462 %}
 9463 
 9464 // Conditional move ndd
 9465 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9466 %{
 9467   predicate(UseAPX);
 9468   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9469 
 9470   ins_cost(200);
 9471   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9472   ins_encode %{
 9473     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9474   %}
 9475   ins_pipe(pipe_cmov_reg);
 9476 %}
 9477 
 9478 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9479   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9480 
 9481   ins_cost(200);
 9482   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9483   ins_encode %{
 9484     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9485   %}
 9486   ins_pipe(pipe_cmov_reg);
 9487 %}
 9488 
 9489 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9490   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9491   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9492 
 9493   ins_cost(200); // XXX
 9494   format %{ "cmovpl  $dst, $src\n\t"
 9495             "cmovnel $dst, $src" %}
 9496   ins_encode %{
 9497     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9498     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9499   %}
 9500   ins_pipe(pipe_cmov_reg);
 9501 %}
 9502 
 9503 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9504 // inputs of the CMove
 9505 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9506   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9507   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9508 
 9509   ins_cost(200); // XXX
 9510   format %{ "cmovpl  $dst, $src\n\t"
 9511             "cmovnel $dst, $src" %}
 9512   ins_encode %{
 9513     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9514     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9515   %}
 9516   ins_pipe(pipe_cmov_reg);
 9517 %}
 9518 
 9519 // Conditional move
 9520 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9521 %{
 9522   predicate(!UseAPX);
 9523   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9524 
 9525   ins_cost(200); // XXX
 9526   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9527   ins_encode %{
 9528     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9529   %}
 9530   ins_pipe(pipe_cmov_reg);  // XXX
 9531 %}
 9532 
 9533 // Conditional move ndd
 9534 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9535 %{
 9536   predicate(UseAPX);
 9537   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9538 
 9539   ins_cost(200);
 9540   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9541   ins_encode %{
 9542     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9543   %}
 9544   ins_pipe(pipe_cmov_reg);
 9545 %}
 9546 
 9547 // Conditional move
 9548 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9549 %{
 9550   predicate(!UseAPX);
 9551   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9552 
 9553   ins_cost(200); // XXX
 9554   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9555   ins_encode %{
 9556     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9557   %}
 9558   ins_pipe(pipe_cmov_reg); // XXX
 9559 %}
 9560 
 9561 // Conditional move ndd
 9562 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9563 %{
 9564   predicate(UseAPX);
 9565   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9566 
 9567   ins_cost(200);
 9568   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9569   ins_encode %{
 9570     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9571   %}
 9572   ins_pipe(pipe_cmov_reg);
 9573 %}
 9574 
 9575 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9576   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9577 
 9578   ins_cost(200);
 9579   expand %{
 9580     cmovP_regU(cop, cr, dst, src);
 9581   %}
 9582 %}
 9583 
 9584 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9585   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9586 
 9587   ins_cost(200);
 9588   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9589   ins_encode %{
 9590     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9591   %}
 9592   ins_pipe(pipe_cmov_reg);
 9593 %}
 9594 
 9595 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9596   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9597   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9598 
 9599   ins_cost(200); // XXX
 9600   format %{ "cmovpq  $dst, $src\n\t"
 9601             "cmovneq $dst, $src" %}
 9602   ins_encode %{
 9603     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9604     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9605   %}
 9606   ins_pipe(pipe_cmov_reg);
 9607 %}
 9608 
 9609 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9610 // inputs of the CMove
 9611 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9612   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9613   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9614 
 9615   ins_cost(200); // XXX
 9616   format %{ "cmovpq  $dst, $src\n\t"
 9617             "cmovneq $dst, $src" %}
 9618   ins_encode %{
 9619     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9620     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9621   %}
 9622   ins_pipe(pipe_cmov_reg);
 9623 %}
 9624 
 9625 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9626 %{
 9627   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9628   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9629 
 9630   ins_cost(100); // XXX
 9631   format %{ "setbn$cop $dst\t# signed, long" %}
 9632   ins_encode %{
 9633     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9634     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9635   %}
 9636   ins_pipe(ialu_reg);
 9637 %}
 9638 
 9639 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9640 %{
 9641   predicate(!UseAPX);
 9642   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9643 
 9644   ins_cost(200); // XXX
 9645   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9646   ins_encode %{
 9647     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9648   %}
 9649   ins_pipe(pipe_cmov_reg);  // XXX
 9650 %}
 9651 
 9652 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9653 %{
 9654   predicate(UseAPX);
 9655   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9656 
 9657   ins_cost(200);
 9658   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9659   ins_encode %{
 9660     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9661   %}
 9662   ins_pipe(pipe_cmov_reg);
 9663 %}
 9664 
 9665 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9666 %{
 9667   predicate(!UseAPX);
 9668   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9669 
 9670   ins_cost(200); // XXX
 9671   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9672   ins_encode %{
 9673     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9674   %}
 9675   ins_pipe(pipe_cmov_mem);  // XXX
 9676 %}
 9677 
 9678 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9679 %{
 9680   predicate(UseAPX);
 9681   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9682 
 9683   ins_cost(200);
 9684   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9685   ins_encode %{
 9686     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9687   %}
 9688   ins_pipe(pipe_cmov_mem);
 9689 %}
 9690 
 9691 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9692 %{
 9693   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9694   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9695 
 9696   ins_cost(100); // XXX
 9697   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9698   ins_encode %{
 9699     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9700     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9701   %}
 9702   ins_pipe(ialu_reg);
 9703 %}
 9704 
 9705 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9706 %{
 9707   predicate(!UseAPX);
 9708   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9709 
 9710   ins_cost(200); // XXX
 9711   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9712   ins_encode %{
 9713     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9714   %}
 9715   ins_pipe(pipe_cmov_reg); // XXX
 9716 %}
 9717 
 9718 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9719 %{
 9720   predicate(UseAPX);
 9721   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9722 
 9723   ins_cost(200);
 9724   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9725   ins_encode %{
 9726     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9727   %}
 9728   ins_pipe(pipe_cmov_reg);
 9729 %}
 9730 
 9731 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9732 %{
 9733   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9734   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9735 
 9736   ins_cost(100); // XXX
 9737   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9738   ins_encode %{
 9739     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9740     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9741   %}
 9742   ins_pipe(ialu_reg);
 9743 %}
 9744 
 9745 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9746 %{
 9747   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9748   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9749 
 9750   ins_cost(100); // XXX
 9751   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9752   ins_encode %{
 9753     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9754     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9755   %}
 9756   ins_pipe(ialu_reg);
 9757 %}
 9758 
 9759 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9760   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9761 
 9762   ins_cost(200);
 9763   expand %{
 9764     cmovL_regU(cop, cr, dst, src);
 9765   %}
 9766 %}
 9767 
 9768 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9769 %{
 9770   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9771 
 9772   ins_cost(200);
 9773   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9774   ins_encode %{
 9775     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9776   %}
 9777   ins_pipe(pipe_cmov_reg);
 9778 %}
 9779 
 9780 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9781   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9782   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9783 
 9784   ins_cost(200); // XXX
 9785   format %{ "cmovpq  $dst, $src\n\t"
 9786             "cmovneq $dst, $src" %}
 9787   ins_encode %{
 9788     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9789     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9790   %}
 9791   ins_pipe(pipe_cmov_reg);
 9792 %}
 9793 
 9794 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9795 // inputs of the CMove
 9796 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9797   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9798   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9799 
 9800   ins_cost(200); // XXX
 9801   format %{ "cmovpq  $dst, $src\n\t"
 9802             "cmovneq $dst, $src" %}
 9803   ins_encode %{
 9804     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9805     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9806   %}
 9807   ins_pipe(pipe_cmov_reg);
 9808 %}
 9809 
 9810 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9811 %{
 9812   predicate(!UseAPX);
 9813   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9814 
 9815   ins_cost(200); // XXX
 9816   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9817   ins_encode %{
 9818     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9819   %}
 9820   ins_pipe(pipe_cmov_mem); // XXX
 9821 %}
 9822 
 9823 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9824   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9825 
 9826   ins_cost(200);
 9827   expand %{
 9828     cmovL_memU(cop, cr, dst, src);
 9829   %}
 9830 %}
 9831 
 9832 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9833 %{
 9834   predicate(UseAPX);
 9835   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9836 
 9837   ins_cost(200);
 9838   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9839   ins_encode %{
 9840     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9841   %}
 9842   ins_pipe(pipe_cmov_mem);
 9843 %}
 9844 
 9845 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9846 %{
 9847   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9848 
 9849   ins_cost(200);
 9850   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9851   ins_encode %{
 9852     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9853   %}
 9854   ins_pipe(pipe_cmov_mem);
 9855 %}
 9856 
 9857 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9858 %{
 9859   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9860 
 9861   ins_cost(200); // XXX
 9862   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9863             "movss     $dst, $src\n"
 9864     "skip:" %}
 9865   ins_encode %{
 9866     Label Lskip;
 9867     // Invert sense of branch from sense of CMOV
 9868     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9869     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9870     __ bind(Lskip);
 9871   %}
 9872   ins_pipe(pipe_slow);
 9873 %}
 9874 
 9875 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9876 %{
 9877   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9878 
 9879   ins_cost(200); // XXX
 9880   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9881             "movss     $dst, $src\n"
 9882     "skip:" %}
 9883   ins_encode %{
 9884     Label Lskip;
 9885     // Invert sense of branch from sense of CMOV
 9886     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9887     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9888     __ bind(Lskip);
 9889   %}
 9890   ins_pipe(pipe_slow);
 9891 %}
 9892 
 9893 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9894   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9895 
 9896   ins_cost(200);
 9897   expand %{
 9898     cmovF_regU(cop, cr, dst, src);
 9899   %}
 9900 %}
 9901 
 9902 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9903 %{
 9904   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9905 
 9906   ins_cost(200); // XXX
 9907   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9908             "movss     $dst, $src\n"
 9909     "skip:" %}
 9910   ins_encode %{
 9911     Label Lskip;
 9912     // Invert sense of branch from sense of CMOV
 9913     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9914     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9915     __ bind(Lskip);
 9916   %}
 9917   ins_pipe(pipe_slow);
 9918 %}
 9919 
 9920 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9921 %{
 9922   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9923 
 9924   ins_cost(200); // XXX
 9925   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9926             "movsd     $dst, $src\n"
 9927     "skip:" %}
 9928   ins_encode %{
 9929     Label Lskip;
 9930     // Invert sense of branch from sense of CMOV
 9931     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9932     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9933     __ bind(Lskip);
 9934   %}
 9935   ins_pipe(pipe_slow);
 9936 %}
 9937 
 9938 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9939 %{
 9940   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9941 
 9942   ins_cost(200); // XXX
 9943   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9944             "movsd     $dst, $src\n"
 9945     "skip:" %}
 9946   ins_encode %{
 9947     Label Lskip;
 9948     // Invert sense of branch from sense of CMOV
 9949     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9950     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9951     __ bind(Lskip);
 9952   %}
 9953   ins_pipe(pipe_slow);
 9954 %}
 9955 
 9956 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9957   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9958 
 9959   ins_cost(200);
 9960   expand %{
 9961     cmovD_regU(cop, cr, dst, src);
 9962   %}
 9963 %}
 9964 
 9965 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9966 %{
 9967   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9968 
 9969   ins_cost(200); // XXX
 9970   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9971             "movsd     $dst, $src\n"
 9972     "skip:" %}
 9973   ins_encode %{
 9974     Label Lskip;
 9975     // Invert sense of branch from sense of CMOV
 9976     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9977     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9978     __ bind(Lskip);
 9979   %}
 9980   ins_pipe(pipe_slow);
 9981 %}
 9982 
 9983 //----------Arithmetic Instructions--------------------------------------------
 9984 //----------Addition Instructions----------------------------------------------
 9985 
 9986 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9987 %{
 9988   predicate(!UseAPX);
 9989   match(Set dst (AddI dst src));
 9990   effect(KILL cr);
 9991   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9992   format %{ "addl    $dst, $src\t# int" %}
 9993   ins_encode %{
 9994     __ addl($dst$$Register, $src$$Register);
 9995   %}
 9996   ins_pipe(ialu_reg_reg);
 9997 %}
 9998 
 9999 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10000 %{
10001   predicate(UseAPX);
10002   match(Set dst (AddI src1 src2));
10003   effect(KILL cr);
10004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10005 
10006   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10007   ins_encode %{
10008     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10009   %}
10010   ins_pipe(ialu_reg_reg);
10011 %}
10012 
10013 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10014 %{
10015   predicate(!UseAPX);
10016   match(Set dst (AddI dst src));
10017   effect(KILL cr);
10018   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019 
10020   format %{ "addl    $dst, $src\t# int" %}
10021   ins_encode %{
10022     __ addl($dst$$Register, $src$$constant);
10023   %}
10024   ins_pipe( ialu_reg );
10025 %}
10026 
10027 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10028 %{
10029   predicate(UseAPX);
10030   match(Set dst (AddI src1 src2));
10031   effect(KILL cr);
10032   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10033 
10034   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10035   ins_encode %{
10036     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10037   %}
10038   ins_pipe( ialu_reg );
10039 %}
10040 
10041 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10042 %{
10043   predicate(UseAPX);
10044   match(Set dst (AddI (LoadI src1) src2));
10045   effect(KILL cr);
10046   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047 
10048   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10049   ins_encode %{
10050     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10051   %}
10052   ins_pipe( ialu_reg );
10053 %}
10054 
10055 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10056 %{
10057   predicate(!UseAPX);
10058   match(Set dst (AddI dst (LoadI src)));
10059   effect(KILL cr);
10060   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10061 
10062   ins_cost(150); // XXX
10063   format %{ "addl    $dst, $src\t# int" %}
10064   ins_encode %{
10065     __ addl($dst$$Register, $src$$Address);
10066   %}
10067   ins_pipe(ialu_reg_mem);
10068 %}
10069 
10070 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10071 %{
10072   predicate(UseAPX);
10073   match(Set dst (AddI src1 (LoadI src2)));
10074   effect(KILL cr);
10075   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10076 
10077   ins_cost(150);
10078   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10079   ins_encode %{
10080     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10081   %}
10082   ins_pipe(ialu_reg_mem);
10083 %}
10084 
10085 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10086 %{
10087   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10088   effect(KILL cr);
10089   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10090 
10091   ins_cost(150); // XXX
10092   format %{ "addl    $dst, $src\t# int" %}
10093   ins_encode %{
10094     __ addl($dst$$Address, $src$$Register);
10095   %}
10096   ins_pipe(ialu_mem_reg);
10097 %}
10098 
10099 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10100 %{
10101   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102   effect(KILL cr);
10103   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10104 
10105 
10106   ins_cost(125); // XXX
10107   format %{ "addl    $dst, $src\t# int" %}
10108   ins_encode %{
10109     __ addl($dst$$Address, $src$$constant);
10110   %}
10111   ins_pipe(ialu_mem_imm);
10112 %}
10113 
10114 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10115 %{
10116   predicate(!UseAPX && UseIncDec);
10117   match(Set dst (AddI dst src));
10118   effect(KILL cr);
10119 
10120   format %{ "incl    $dst\t# int" %}
10121   ins_encode %{
10122     __ incrementl($dst$$Register);
10123   %}
10124   ins_pipe(ialu_reg);
10125 %}
10126 
10127 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10128 %{
10129   predicate(UseAPX && UseIncDec);
10130   match(Set dst (AddI src val));
10131   effect(KILL cr);
10132   flag(PD::Flag_ndd_demotable_opr1);
10133 
10134   format %{ "eincl    $dst, $src\t# int ndd" %}
10135   ins_encode %{
10136     __ eincl($dst$$Register, $src$$Register, false);
10137   %}
10138   ins_pipe(ialu_reg);
10139 %}
10140 
10141 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10142 %{
10143   predicate(UseAPX && UseIncDec);
10144   match(Set dst (AddI (LoadI src) val));
10145   effect(KILL cr);
10146 
10147   format %{ "eincl    $dst, $src\t# int ndd" %}
10148   ins_encode %{
10149     __ eincl($dst$$Register, $src$$Address, false);
10150   %}
10151   ins_pipe(ialu_reg);
10152 %}
10153 
10154 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10155 %{
10156   predicate(UseIncDec);
10157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158   effect(KILL cr);
10159 
10160   ins_cost(125); // XXX
10161   format %{ "incl    $dst\t# int" %}
10162   ins_encode %{
10163     __ incrementl($dst$$Address);
10164   %}
10165   ins_pipe(ialu_mem_imm);
10166 %}
10167 
10168 // XXX why does that use AddI
10169 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10170 %{
10171   predicate(!UseAPX && UseIncDec);
10172   match(Set dst (AddI dst src));
10173   effect(KILL cr);
10174 
10175   format %{ "decl    $dst\t# int" %}
10176   ins_encode %{
10177     __ decrementl($dst$$Register);
10178   %}
10179   ins_pipe(ialu_reg);
10180 %}
10181 
10182 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10183 %{
10184   predicate(UseAPX && UseIncDec);
10185   match(Set dst (AddI src val));
10186   effect(KILL cr);
10187   flag(PD::Flag_ndd_demotable_opr1);
10188 
10189   format %{ "edecl    $dst, $src\t# int ndd" %}
10190   ins_encode %{
10191     __ edecl($dst$$Register, $src$$Register, false);
10192   %}
10193   ins_pipe(ialu_reg);
10194 %}
10195 
10196 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10197 %{
10198   predicate(UseAPX && UseIncDec);
10199   match(Set dst (AddI (LoadI src) val));
10200   effect(KILL cr);
10201 
10202   format %{ "edecl    $dst, $src\t# int ndd" %}
10203   ins_encode %{
10204     __ edecl($dst$$Register, $src$$Address, false);
10205   %}
10206   ins_pipe(ialu_reg);
10207 %}
10208 
10209 // XXX why does that use AddI
10210 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10211 %{
10212   predicate(UseIncDec);
10213   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10214   effect(KILL cr);
10215 
10216   ins_cost(125); // XXX
10217   format %{ "decl    $dst\t# int" %}
10218   ins_encode %{
10219     __ decrementl($dst$$Address);
10220   %}
10221   ins_pipe(ialu_mem_imm);
10222 %}
10223 
10224 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10225 %{
10226   predicate(VM_Version::supports_fast_2op_lea());
10227   match(Set dst (AddI (LShiftI index scale) disp));
10228 
10229   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10230   ins_encode %{
10231     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10232     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10233   %}
10234   ins_pipe(ialu_reg_reg);
10235 %}
10236 
10237 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10238 %{
10239   predicate(VM_Version::supports_fast_3op_lea());
10240   match(Set dst (AddI (AddI base index) disp));
10241 
10242   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10243   ins_encode %{
10244     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10245   %}
10246   ins_pipe(ialu_reg_reg);
10247 %}
10248 
10249 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10250 %{
10251   predicate(VM_Version::supports_fast_2op_lea());
10252   match(Set dst (AddI base (LShiftI index scale)));
10253 
10254   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10255   ins_encode %{
10256     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10257     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10258   %}
10259   ins_pipe(ialu_reg_reg);
10260 %}
10261 
10262 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10263 %{
10264   predicate(VM_Version::supports_fast_3op_lea());
10265   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10266 
10267   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10268   ins_encode %{
10269     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10270     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10271   %}
10272   ins_pipe(ialu_reg_reg);
10273 %}
10274 
10275 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10276 %{
10277   predicate(!UseAPX);
10278   match(Set dst (AddL dst src));
10279   effect(KILL cr);
10280   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281 
10282   format %{ "addq    $dst, $src\t# long" %}
10283   ins_encode %{
10284     __ addq($dst$$Register, $src$$Register);
10285   %}
10286   ins_pipe(ialu_reg_reg);
10287 %}
10288 
10289 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10290 %{
10291   predicate(UseAPX);
10292   match(Set dst (AddL src1 src2));
10293   effect(KILL cr);
10294   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10295 
10296   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10297   ins_encode %{
10298     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10299   %}
10300   ins_pipe(ialu_reg_reg);
10301 %}
10302 
10303 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10304 %{
10305   predicate(!UseAPX);
10306   match(Set dst (AddL dst src));
10307   effect(KILL cr);
10308   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309 
10310   format %{ "addq    $dst, $src\t# long" %}
10311   ins_encode %{
10312     __ addq($dst$$Register, $src$$constant);
10313   %}
10314   ins_pipe( ialu_reg );
10315 %}
10316 
10317 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10318 %{
10319   predicate(UseAPX);
10320   match(Set dst (AddL src1 src2));
10321   effect(KILL cr);
10322   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10323 
10324   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10325   ins_encode %{
10326     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10327   %}
10328   ins_pipe( ialu_reg );
10329 %}
10330 
10331 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10332 %{
10333   predicate(UseAPX);
10334   match(Set dst (AddL (LoadL src1) src2));
10335   effect(KILL cr);
10336   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337 
10338   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10339   ins_encode %{
10340     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10341   %}
10342   ins_pipe( ialu_reg );
10343 %}
10344 
10345 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10346 %{
10347   predicate(!UseAPX);
10348   match(Set dst (AddL dst (LoadL src)));
10349   effect(KILL cr);
10350   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10351 
10352   ins_cost(150); // XXX
10353   format %{ "addq    $dst, $src\t# long" %}
10354   ins_encode %{
10355     __ addq($dst$$Register, $src$$Address);
10356   %}
10357   ins_pipe(ialu_reg_mem);
10358 %}
10359 
10360 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10361 %{
10362   predicate(UseAPX);
10363   match(Set dst (AddL src1 (LoadL src2)));
10364   effect(KILL cr);
10365   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10366 
10367   ins_cost(150);
10368   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10369   ins_encode %{
10370     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10371   %}
10372   ins_pipe(ialu_reg_mem);
10373 %}
10374 
10375 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10376 %{
10377   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10378   effect(KILL cr);
10379   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10380 
10381   ins_cost(150); // XXX
10382   format %{ "addq    $dst, $src\t# long" %}
10383   ins_encode %{
10384     __ addq($dst$$Address, $src$$Register);
10385   %}
10386   ins_pipe(ialu_mem_reg);
10387 %}
10388 
10389 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10390 %{
10391   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10392   effect(KILL cr);
10393   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10394 
10395   ins_cost(125); // XXX
10396   format %{ "addq    $dst, $src\t# long" %}
10397   ins_encode %{
10398     __ addq($dst$$Address, $src$$constant);
10399   %}
10400   ins_pipe(ialu_mem_imm);
10401 %}
10402 
10403 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10404 %{
10405   predicate(!UseAPX && UseIncDec);
10406   match(Set dst (AddL dst src));
10407   effect(KILL cr);
10408 
10409   format %{ "incq    $dst\t# long" %}
10410   ins_encode %{
10411     __ incrementq($dst$$Register);
10412   %}
10413   ins_pipe(ialu_reg);
10414 %}
10415 
10416 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10417 %{
10418   predicate(UseAPX && UseIncDec);
10419   match(Set dst (AddL src val));
10420   effect(KILL cr);
10421   flag(PD::Flag_ndd_demotable_opr1);
10422 
10423   format %{ "eincq    $dst, $src\t# long ndd" %}
10424   ins_encode %{
10425     __ eincq($dst$$Register, $src$$Register, false);
10426   %}
10427   ins_pipe(ialu_reg);
10428 %}
10429 
10430 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10431 %{
10432   predicate(UseAPX && UseIncDec);
10433   match(Set dst (AddL (LoadL src) val));
10434   effect(KILL cr);
10435 
10436   format %{ "eincq    $dst, $src\t# long ndd" %}
10437   ins_encode %{
10438     __ eincq($dst$$Register, $src$$Address, false);
10439   %}
10440   ins_pipe(ialu_reg);
10441 %}
10442 
10443 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10444 %{
10445   predicate(UseIncDec);
10446   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447   effect(KILL cr);
10448 
10449   ins_cost(125); // XXX
10450   format %{ "incq    $dst\t# long" %}
10451   ins_encode %{
10452     __ incrementq($dst$$Address);
10453   %}
10454   ins_pipe(ialu_mem_imm);
10455 %}
10456 
10457 // XXX why does that use AddL
10458 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10459 %{
10460   predicate(!UseAPX && UseIncDec);
10461   match(Set dst (AddL dst src));
10462   effect(KILL cr);
10463 
10464   format %{ "decq    $dst\t# long" %}
10465   ins_encode %{
10466     __ decrementq($dst$$Register);
10467   %}
10468   ins_pipe(ialu_reg);
10469 %}
10470 
10471 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10472 %{
10473   predicate(UseAPX && UseIncDec);
10474   match(Set dst (AddL src val));
10475   effect(KILL cr);
10476   flag(PD::Flag_ndd_demotable_opr1);
10477 
10478   format %{ "edecq    $dst, $src\t# long ndd" %}
10479   ins_encode %{
10480     __ edecq($dst$$Register, $src$$Register, false);
10481   %}
10482   ins_pipe(ialu_reg);
10483 %}
10484 
10485 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10486 %{
10487   predicate(UseAPX && UseIncDec);
10488   match(Set dst (AddL (LoadL src) val));
10489   effect(KILL cr);
10490 
10491   format %{ "edecq    $dst, $src\t# long ndd" %}
10492   ins_encode %{
10493     __ edecq($dst$$Register, $src$$Address, false);
10494   %}
10495   ins_pipe(ialu_reg);
10496 %}
10497 
10498 // XXX why does that use AddL
10499 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10500 %{
10501   predicate(UseIncDec);
10502   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10503   effect(KILL cr);
10504 
10505   ins_cost(125); // XXX
10506   format %{ "decq    $dst\t# long" %}
10507   ins_encode %{
10508     __ decrementq($dst$$Address);
10509   %}
10510   ins_pipe(ialu_mem_imm);
10511 %}
10512 
10513 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10514 %{
10515   predicate(VM_Version::supports_fast_2op_lea());
10516   match(Set dst (AddL (LShiftL index scale) disp));
10517 
10518   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10519   ins_encode %{
10520     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10521     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10522   %}
10523   ins_pipe(ialu_reg_reg);
10524 %}
10525 
10526 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10527 %{
10528   predicate(VM_Version::supports_fast_3op_lea());
10529   match(Set dst (AddL (AddL base index) disp));
10530 
10531   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10532   ins_encode %{
10533     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10534   %}
10535   ins_pipe(ialu_reg_reg);
10536 %}
10537 
10538 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10539 %{
10540   predicate(VM_Version::supports_fast_2op_lea());
10541   match(Set dst (AddL base (LShiftL index scale)));
10542 
10543   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10544   ins_encode %{
10545     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10546     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10547   %}
10548   ins_pipe(ialu_reg_reg);
10549 %}
10550 
10551 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10552 %{
10553   predicate(VM_Version::supports_fast_3op_lea());
10554   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10555 
10556   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10557   ins_encode %{
10558     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10559     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10560   %}
10561   ins_pipe(ialu_reg_reg);
10562 %}
10563 
10564 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10565 %{
10566   match(Set dst (AddP dst src));
10567   effect(KILL cr);
10568   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10569 
10570   format %{ "addq    $dst, $src\t# ptr" %}
10571   ins_encode %{
10572     __ addq($dst$$Register, $src$$Register);
10573   %}
10574   ins_pipe(ialu_reg_reg);
10575 %}
10576 
10577 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10578 %{
10579   match(Set dst (AddP dst src));
10580   effect(KILL cr);
10581   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10582 
10583   format %{ "addq    $dst, $src\t# ptr" %}
10584   ins_encode %{
10585     __ addq($dst$$Register, $src$$constant);
10586   %}
10587   ins_pipe( ialu_reg );
10588 %}
10589 
10590 // XXX addP mem ops ????
10591 
10592 instruct checkCastPP(rRegP dst)
10593 %{
10594   match(Set dst (CheckCastPP dst));
10595 
10596   size(0);
10597   format %{ "# checkcastPP of $dst" %}
10598   ins_encode(/* empty encoding */);
10599   ins_pipe(empty);
10600 %}
10601 
10602 instruct castPP(rRegP dst)
10603 %{
10604   match(Set dst (CastPP dst));
10605 
10606   size(0);
10607   format %{ "# castPP of $dst" %}
10608   ins_encode(/* empty encoding */);
10609   ins_pipe(empty);
10610 %}
10611 
10612 instruct castII(rRegI dst)
10613 %{
10614   predicate(VerifyConstraintCasts == 0);
10615   match(Set dst (CastII dst));
10616 
10617   size(0);
10618   format %{ "# castII of $dst" %}
10619   ins_encode(/* empty encoding */);
10620   ins_cost(0);
10621   ins_pipe(empty);
10622 %}
10623 
10624 instruct castII_checked(rRegI dst, rFlagsReg cr)
10625 %{
10626   predicate(VerifyConstraintCasts > 0);
10627   match(Set dst (CastII dst));
10628 
10629   effect(KILL cr);
10630   format %{ "# cast_checked_II $dst" %}
10631   ins_encode %{
10632     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10633   %}
10634   ins_pipe(pipe_slow);
10635 %}
10636 
10637 instruct castLL(rRegL dst)
10638 %{
10639   predicate(VerifyConstraintCasts == 0);
10640   match(Set dst (CastLL dst));
10641 
10642   size(0);
10643   format %{ "# castLL of $dst" %}
10644   ins_encode(/* empty encoding */);
10645   ins_cost(0);
10646   ins_pipe(empty);
10647 %}
10648 
10649 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10650 %{
10651   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10652   match(Set dst (CastLL dst));
10653 
10654   effect(KILL cr);
10655   format %{ "# cast_checked_LL $dst" %}
10656   ins_encode %{
10657     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10658   %}
10659   ins_pipe(pipe_slow);
10660 %}
10661 
10662 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10663 %{
10664   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10665   match(Set dst (CastLL dst));
10666 
10667   effect(KILL cr, TEMP tmp);
10668   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10669   ins_encode %{
10670     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10671   %}
10672   ins_pipe(pipe_slow);
10673 %}
10674 
10675 instruct castFF(regF dst)
10676 %{
10677   match(Set dst (CastFF dst));
10678 
10679   size(0);
10680   format %{ "# castFF of $dst" %}
10681   ins_encode(/* empty encoding */);
10682   ins_cost(0);
10683   ins_pipe(empty);
10684 %}
10685 
10686 instruct castHH(regF dst)
10687 %{
10688   match(Set dst (CastHH dst));
10689 
10690   size(0);
10691   format %{ "# castHH of $dst" %}
10692   ins_encode(/* empty encoding */);
10693   ins_cost(0);
10694   ins_pipe(empty);
10695 %}
10696 
10697 instruct castDD(regD dst)
10698 %{
10699   match(Set dst (CastDD dst));
10700 
10701   size(0);
10702   format %{ "# castDD of $dst" %}
10703   ins_encode(/* empty encoding */);
10704   ins_cost(0);
10705   ins_pipe(empty);
10706 %}
10707 
10708 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10709 instruct compareAndSwapP(rRegI res,
10710                          memory mem_ptr,
10711                          rax_RegP oldval, rRegP newval,
10712                          rFlagsReg cr)
10713 %{
10714   predicate(n->as_LoadStore()->barrier_data() == 0);
10715   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10716   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10717   effect(KILL cr, KILL oldval);
10718 
10719   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10720             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10721             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10722   ins_encode %{
10723     __ lock();
10724     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10725     __ setcc(Assembler::equal, $res$$Register);
10726   %}
10727   ins_pipe( pipe_cmpxchg );
10728 %}
10729 
10730 instruct compareAndSwapL(rRegI res,
10731                          memory mem_ptr,
10732                          rax_RegL oldval, rRegL newval,
10733                          rFlagsReg cr)
10734 %{
10735   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10736   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10737   effect(KILL cr, KILL oldval);
10738 
10739   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10740             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10741             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10742   ins_encode %{
10743     __ lock();
10744     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10745     __ setcc(Assembler::equal, $res$$Register);
10746   %}
10747   ins_pipe( pipe_cmpxchg );
10748 %}
10749 
10750 instruct compareAndSwapI(rRegI res,
10751                          memory mem_ptr,
10752                          rax_RegI oldval, rRegI newval,
10753                          rFlagsReg cr)
10754 %{
10755   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10756   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10757   effect(KILL cr, KILL oldval);
10758 
10759   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10760             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10761             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10762   ins_encode %{
10763     __ lock();
10764     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10765     __ setcc(Assembler::equal, $res$$Register);
10766   %}
10767   ins_pipe( pipe_cmpxchg );
10768 %}
10769 
10770 instruct compareAndSwapB(rRegI res,
10771                          memory mem_ptr,
10772                          rax_RegI oldval, rRegI newval,
10773                          rFlagsReg cr)
10774 %{
10775   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10776   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10777   effect(KILL cr, KILL oldval);
10778 
10779   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10781             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10782   ins_encode %{
10783     __ lock();
10784     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10785     __ setcc(Assembler::equal, $res$$Register);
10786   %}
10787   ins_pipe( pipe_cmpxchg );
10788 %}
10789 
10790 instruct compareAndSwapS(rRegI res,
10791                          memory mem_ptr,
10792                          rax_RegI oldval, rRegI newval,
10793                          rFlagsReg cr)
10794 %{
10795   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10796   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10797   effect(KILL cr, KILL oldval);
10798 
10799   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10801             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10802   ins_encode %{
10803     __ lock();
10804     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10805     __ setcc(Assembler::equal, $res$$Register);
10806   %}
10807   ins_pipe( pipe_cmpxchg );
10808 %}
10809 
10810 instruct compareAndSwapN(rRegI res,
10811                           memory mem_ptr,
10812                           rax_RegN oldval, rRegN newval,
10813                           rFlagsReg cr) %{
10814   predicate(n->as_LoadStore()->barrier_data() == 0);
10815   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10816   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10817   effect(KILL cr, KILL oldval);
10818 
10819   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10820             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10821             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10822   ins_encode %{
10823     __ lock();
10824     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10825     __ setcc(Assembler::equal, $res$$Register);
10826   %}
10827   ins_pipe( pipe_cmpxchg );
10828 %}
10829 
10830 instruct compareAndExchangeB(
10831                          memory mem_ptr,
10832                          rax_RegI oldval, rRegI newval,
10833                          rFlagsReg cr)
10834 %{
10835   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10836   effect(KILL cr);
10837 
10838   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10839             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10840   ins_encode %{
10841     __ lock();
10842     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10843   %}
10844   ins_pipe( pipe_cmpxchg );
10845 %}
10846 
10847 instruct compareAndExchangeS(
10848                          memory mem_ptr,
10849                          rax_RegI oldval, rRegI newval,
10850                          rFlagsReg cr)
10851 %{
10852   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10853   effect(KILL cr);
10854 
10855   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10856             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10857   ins_encode %{
10858     __ lock();
10859     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10860   %}
10861   ins_pipe( pipe_cmpxchg );
10862 %}
10863 
10864 instruct compareAndExchangeI(
10865                          memory mem_ptr,
10866                          rax_RegI oldval, rRegI newval,
10867                          rFlagsReg cr)
10868 %{
10869   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10870   effect(KILL cr);
10871 
10872   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10873             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10874   ins_encode %{
10875     __ lock();
10876     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10877   %}
10878   ins_pipe( pipe_cmpxchg );
10879 %}
10880 
10881 instruct compareAndExchangeL(
10882                          memory mem_ptr,
10883                          rax_RegL oldval, rRegL newval,
10884                          rFlagsReg cr)
10885 %{
10886   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10887   effect(KILL cr);
10888 
10889   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10890             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10891   ins_encode %{
10892     __ lock();
10893     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10894   %}
10895   ins_pipe( pipe_cmpxchg );
10896 %}
10897 
10898 instruct compareAndExchangeN(
10899                           memory mem_ptr,
10900                           rax_RegN oldval, rRegN newval,
10901                           rFlagsReg cr) %{
10902   predicate(n->as_LoadStore()->barrier_data() == 0);
10903   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10904   effect(KILL cr);
10905 
10906   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10907             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10908   ins_encode %{
10909     __ lock();
10910     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10911   %}
10912   ins_pipe( pipe_cmpxchg );
10913 %}
10914 
10915 instruct compareAndExchangeP(
10916                          memory mem_ptr,
10917                          rax_RegP oldval, rRegP newval,
10918                          rFlagsReg cr)
10919 %{
10920   predicate(n->as_LoadStore()->barrier_data() == 0);
10921   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10922   effect(KILL cr);
10923 
10924   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10925             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10926   ins_encode %{
10927     __ lock();
10928     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10929   %}
10930   ins_pipe( pipe_cmpxchg );
10931 %}
10932 
10933 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10934   predicate(n->as_LoadStore()->result_not_used());
10935   match(Set dummy (GetAndAddB mem add));
10936   effect(KILL cr);
10937   format %{ "addb_lock   $mem, $add" %}
10938   ins_encode %{
10939     __ lock();
10940     __ addb($mem$$Address, $add$$Register);
10941   %}
10942   ins_pipe(pipe_cmpxchg);
10943 %}
10944 
10945 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10946   predicate(n->as_LoadStore()->result_not_used());
10947   match(Set dummy (GetAndAddB mem add));
10948   effect(KILL cr);
10949   format %{ "addb_lock   $mem, $add" %}
10950   ins_encode %{
10951     __ lock();
10952     __ addb($mem$$Address, $add$$constant);
10953   %}
10954   ins_pipe(pipe_cmpxchg);
10955 %}
10956 
10957 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10958   predicate(!n->as_LoadStore()->result_not_used());
10959   match(Set newval (GetAndAddB mem newval));
10960   effect(KILL cr);
10961   format %{ "xaddb_lock  $mem, $newval" %}
10962   ins_encode %{
10963     __ lock();
10964     __ xaddb($mem$$Address, $newval$$Register);
10965   %}
10966   ins_pipe(pipe_cmpxchg);
10967 %}
10968 
10969 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10970   predicate(n->as_LoadStore()->result_not_used());
10971   match(Set dummy (GetAndAddS mem add));
10972   effect(KILL cr);
10973   format %{ "addw_lock   $mem, $add" %}
10974   ins_encode %{
10975     __ lock();
10976     __ addw($mem$$Address, $add$$Register);
10977   %}
10978   ins_pipe(pipe_cmpxchg);
10979 %}
10980 
10981 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10982   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10983   match(Set dummy (GetAndAddS mem add));
10984   effect(KILL cr);
10985   format %{ "addw_lock   $mem, $add" %}
10986   ins_encode %{
10987     __ lock();
10988     __ addw($mem$$Address, $add$$constant);
10989   %}
10990   ins_pipe(pipe_cmpxchg);
10991 %}
10992 
10993 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10994   predicate(!n->as_LoadStore()->result_not_used());
10995   match(Set newval (GetAndAddS mem newval));
10996   effect(KILL cr);
10997   format %{ "xaddw_lock  $mem, $newval" %}
10998   ins_encode %{
10999     __ lock();
11000     __ xaddw($mem$$Address, $newval$$Register);
11001   %}
11002   ins_pipe(pipe_cmpxchg);
11003 %}
11004 
11005 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11006   predicate(n->as_LoadStore()->result_not_used());
11007   match(Set dummy (GetAndAddI mem add));
11008   effect(KILL cr);
11009   format %{ "addl_lock   $mem, $add" %}
11010   ins_encode %{
11011     __ lock();
11012     __ addl($mem$$Address, $add$$Register);
11013   %}
11014   ins_pipe(pipe_cmpxchg);
11015 %}
11016 
11017 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11018   predicate(n->as_LoadStore()->result_not_used());
11019   match(Set dummy (GetAndAddI mem add));
11020   effect(KILL cr);
11021   format %{ "addl_lock   $mem, $add" %}
11022   ins_encode %{
11023     __ lock();
11024     __ addl($mem$$Address, $add$$constant);
11025   %}
11026   ins_pipe(pipe_cmpxchg);
11027 %}
11028 
11029 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11030   predicate(!n->as_LoadStore()->result_not_used());
11031   match(Set newval (GetAndAddI mem newval));
11032   effect(KILL cr);
11033   format %{ "xaddl_lock  $mem, $newval" %}
11034   ins_encode %{
11035     __ lock();
11036     __ xaddl($mem$$Address, $newval$$Register);
11037   %}
11038   ins_pipe(pipe_cmpxchg);
11039 %}
11040 
11041 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11042   predicate(n->as_LoadStore()->result_not_used());
11043   match(Set dummy (GetAndAddL mem add));
11044   effect(KILL cr);
11045   format %{ "addq_lock   $mem, $add" %}
11046   ins_encode %{
11047     __ lock();
11048     __ addq($mem$$Address, $add$$Register);
11049   %}
11050   ins_pipe(pipe_cmpxchg);
11051 %}
11052 
11053 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11054   predicate(n->as_LoadStore()->result_not_used());
11055   match(Set dummy (GetAndAddL mem add));
11056   effect(KILL cr);
11057   format %{ "addq_lock   $mem, $add" %}
11058   ins_encode %{
11059     __ lock();
11060     __ addq($mem$$Address, $add$$constant);
11061   %}
11062   ins_pipe(pipe_cmpxchg);
11063 %}
11064 
11065 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11066   predicate(!n->as_LoadStore()->result_not_used());
11067   match(Set newval (GetAndAddL mem newval));
11068   effect(KILL cr);
11069   format %{ "xaddq_lock  $mem, $newval" %}
11070   ins_encode %{
11071     __ lock();
11072     __ xaddq($mem$$Address, $newval$$Register);
11073   %}
11074   ins_pipe(pipe_cmpxchg);
11075 %}
11076 
11077 instruct xchgB( memory mem, rRegI newval) %{
11078   match(Set newval (GetAndSetB mem newval));
11079   format %{ "XCHGB  $newval,[$mem]" %}
11080   ins_encode %{
11081     __ xchgb($newval$$Register, $mem$$Address);
11082   %}
11083   ins_pipe( pipe_cmpxchg );
11084 %}
11085 
11086 instruct xchgS( memory mem, rRegI newval) %{
11087   match(Set newval (GetAndSetS mem newval));
11088   format %{ "XCHGW  $newval,[$mem]" %}
11089   ins_encode %{
11090     __ xchgw($newval$$Register, $mem$$Address);
11091   %}
11092   ins_pipe( pipe_cmpxchg );
11093 %}
11094 
11095 instruct xchgI( memory mem, rRegI newval) %{
11096   match(Set newval (GetAndSetI mem newval));
11097   format %{ "XCHGL  $newval,[$mem]" %}
11098   ins_encode %{
11099     __ xchgl($newval$$Register, $mem$$Address);
11100   %}
11101   ins_pipe( pipe_cmpxchg );
11102 %}
11103 
11104 instruct xchgL( memory mem, rRegL newval) %{
11105   match(Set newval (GetAndSetL mem newval));
11106   format %{ "XCHGL  $newval,[$mem]" %}
11107   ins_encode %{
11108     __ xchgq($newval$$Register, $mem$$Address);
11109   %}
11110   ins_pipe( pipe_cmpxchg );
11111 %}
11112 
11113 instruct xchgP( memory mem, rRegP newval) %{
11114   match(Set newval (GetAndSetP mem newval));
11115   predicate(n->as_LoadStore()->barrier_data() == 0);
11116   format %{ "XCHGQ  $newval,[$mem]" %}
11117   ins_encode %{
11118     __ xchgq($newval$$Register, $mem$$Address);
11119   %}
11120   ins_pipe( pipe_cmpxchg );
11121 %}
11122 
11123 instruct xchgN( memory mem, rRegN newval) %{
11124   predicate(n->as_LoadStore()->barrier_data() == 0);
11125   match(Set newval (GetAndSetN mem newval));
11126   format %{ "XCHGL  $newval,$mem]" %}
11127   ins_encode %{
11128     __ xchgl($newval$$Register, $mem$$Address);
11129   %}
11130   ins_pipe( pipe_cmpxchg );
11131 %}
11132 
11133 //----------Abs Instructions-------------------------------------------
11134 
11135 // Integer Absolute Instructions
11136 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11137 %{
11138   match(Set dst (AbsI src));
11139   effect(TEMP dst, KILL cr);
11140   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11141             "subl    $dst, $src\n\t"
11142             "cmovll  $dst, $src" %}
11143   ins_encode %{
11144     __ xorl($dst$$Register, $dst$$Register);
11145     __ subl($dst$$Register, $src$$Register);
11146     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11147   %}
11148 
11149   ins_pipe(ialu_reg_reg);
11150 %}
11151 
11152 // Long Absolute Instructions
11153 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11154 %{
11155   match(Set dst (AbsL src));
11156   effect(TEMP dst, KILL cr);
11157   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11158             "subq    $dst, $src\n\t"
11159             "cmovlq  $dst, $src" %}
11160   ins_encode %{
11161     __ xorl($dst$$Register, $dst$$Register);
11162     __ subq($dst$$Register, $src$$Register);
11163     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11164   %}
11165 
11166   ins_pipe(ialu_reg_reg);
11167 %}
11168 
11169 //----------Subtraction Instructions-------------------------------------------
11170 
11171 // Integer Subtraction Instructions
11172 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11173 %{
11174   predicate(!UseAPX);
11175   match(Set dst (SubI dst src));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178 
11179   format %{ "subl    $dst, $src\t# int" %}
11180   ins_encode %{
11181     __ subl($dst$$Register, $src$$Register);
11182   %}
11183   ins_pipe(ialu_reg_reg);
11184 %}
11185 
11186 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11187 %{
11188   predicate(UseAPX);
11189   match(Set dst (SubI src1 src2));
11190   effect(KILL cr);
11191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11192 
11193   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11194   ins_encode %{
11195     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11196   %}
11197   ins_pipe(ialu_reg_reg);
11198 %}
11199 
11200 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11201 %{
11202   predicate(UseAPX);
11203   match(Set dst (SubI src1 src2));
11204   effect(KILL cr);
11205   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11206 
11207   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11208   ins_encode %{
11209     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11210   %}
11211   ins_pipe(ialu_reg_reg);
11212 %}
11213 
11214 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11215 %{
11216   predicate(UseAPX);
11217   match(Set dst (SubI (LoadI src1) src2));
11218   effect(KILL cr);
11219   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11220 
11221   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11222   ins_encode %{
11223     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11224   %}
11225   ins_pipe(ialu_reg_reg);
11226 %}
11227 
11228 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11229 %{
11230   predicate(!UseAPX);
11231   match(Set dst (SubI dst (LoadI src)));
11232   effect(KILL cr);
11233   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11234 
11235   ins_cost(150);
11236   format %{ "subl    $dst, $src\t# int" %}
11237   ins_encode %{
11238     __ subl($dst$$Register, $src$$Address);
11239   %}
11240   ins_pipe(ialu_reg_mem);
11241 %}
11242 
11243 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11244 %{
11245   predicate(UseAPX);
11246   match(Set dst (SubI src1 (LoadI src2)));
11247   effect(KILL cr);
11248   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11249 
11250   ins_cost(150);
11251   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11252   ins_encode %{
11253     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11254   %}
11255   ins_pipe(ialu_reg_mem);
11256 %}
11257 
11258 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11259 %{
11260   predicate(UseAPX);
11261   match(Set dst (SubI (LoadI src1) src2));
11262   effect(KILL cr);
11263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11264 
11265   ins_cost(150);
11266   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11267   ins_encode %{
11268     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11269   %}
11270   ins_pipe(ialu_reg_mem);
11271 %}
11272 
11273 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11274 %{
11275   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11276   effect(KILL cr);
11277   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11278 
11279   ins_cost(150);
11280   format %{ "subl    $dst, $src\t# int" %}
11281   ins_encode %{
11282     __ subl($dst$$Address, $src$$Register);
11283   %}
11284   ins_pipe(ialu_mem_reg);
11285 %}
11286 
11287 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11288 %{
11289   predicate(!UseAPX);
11290   match(Set dst (SubL dst src));
11291   effect(KILL cr);
11292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293 
11294   format %{ "subq    $dst, $src\t# long" %}
11295   ins_encode %{
11296     __ subq($dst$$Register, $src$$Register);
11297   %}
11298   ins_pipe(ialu_reg_reg);
11299 %}
11300 
11301 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11302 %{
11303   predicate(UseAPX);
11304   match(Set dst (SubL src1 src2));
11305   effect(KILL cr);
11306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11307 
11308   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11309   ins_encode %{
11310     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11311   %}
11312   ins_pipe(ialu_reg_reg);
11313 %}
11314 
11315 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11316 %{
11317   predicate(UseAPX);
11318   match(Set dst (SubL src1 src2));
11319   effect(KILL cr);
11320   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11321 
11322   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11323   ins_encode %{
11324     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11325   %}
11326   ins_pipe(ialu_reg_reg);
11327 %}
11328 
11329 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11330 %{
11331   predicate(UseAPX);
11332   match(Set dst (SubL (LoadL src1) src2));
11333   effect(KILL cr);
11334   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11335 
11336   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11337   ins_encode %{
11338     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11339   %}
11340   ins_pipe(ialu_reg_reg);
11341 %}
11342 
11343 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11344 %{
11345   predicate(!UseAPX);
11346   match(Set dst (SubL dst (LoadL src)));
11347   effect(KILL cr);
11348   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11349 
11350   ins_cost(150);
11351   format %{ "subq    $dst, $src\t# long" %}
11352   ins_encode %{
11353     __ subq($dst$$Register, $src$$Address);
11354   %}
11355   ins_pipe(ialu_reg_mem);
11356 %}
11357 
11358 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11359 %{
11360   predicate(UseAPX);
11361   match(Set dst (SubL src1 (LoadL src2)));
11362   effect(KILL cr);
11363   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11364 
11365   ins_cost(150);
11366   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11367   ins_encode %{
11368     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11369   %}
11370   ins_pipe(ialu_reg_mem);
11371 %}
11372 
11373 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11374 %{
11375   predicate(UseAPX);
11376   match(Set dst (SubL (LoadL src1) src2));
11377   effect(KILL cr);
11378   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11379 
11380   ins_cost(150);
11381   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11382   ins_encode %{
11383     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11384   %}
11385   ins_pipe(ialu_reg_mem);
11386 %}
11387 
11388 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11389 %{
11390   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11391   effect(KILL cr);
11392   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11393 
11394   ins_cost(150);
11395   format %{ "subq    $dst, $src\t# long" %}
11396   ins_encode %{
11397     __ subq($dst$$Address, $src$$Register);
11398   %}
11399   ins_pipe(ialu_mem_reg);
11400 %}
11401 
11402 // Subtract from a pointer
11403 // XXX hmpf???
11404 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11405 %{
11406   match(Set dst (AddP dst (SubI zero src)));
11407   effect(KILL cr);
11408 
11409   format %{ "subq    $dst, $src\t# ptr - int" %}
11410   ins_encode %{
11411     __ subq($dst$$Register, $src$$Register);
11412   %}
11413   ins_pipe(ialu_reg_reg);
11414 %}
11415 
11416 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11417 %{
11418   predicate(!UseAPX);
11419   match(Set dst (SubI zero dst));
11420   effect(KILL cr);
11421   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11422 
11423   format %{ "negl    $dst\t# int" %}
11424   ins_encode %{
11425     __ negl($dst$$Register);
11426   %}
11427   ins_pipe(ialu_reg);
11428 %}
11429 
11430 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11431 %{
11432   predicate(UseAPX);
11433   match(Set dst (SubI zero src));
11434   effect(KILL cr);
11435   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11436 
11437   format %{ "enegl    $dst, $src\t# int ndd" %}
11438   ins_encode %{
11439     __ enegl($dst$$Register, $src$$Register, false);
11440   %}
11441   ins_pipe(ialu_reg);
11442 %}
11443 
11444 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11445 %{
11446   predicate(!UseAPX);
11447   match(Set dst (NegI dst));
11448   effect(KILL cr);
11449   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11450 
11451   format %{ "negl    $dst\t# int" %}
11452   ins_encode %{
11453     __ negl($dst$$Register);
11454   %}
11455   ins_pipe(ialu_reg);
11456 %}
11457 
11458 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11459 %{
11460   predicate(UseAPX);
11461   match(Set dst (NegI src));
11462   effect(KILL cr);
11463   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11464 
11465   format %{ "enegl    $dst, $src\t# int ndd" %}
11466   ins_encode %{
11467     __ enegl($dst$$Register, $src$$Register, false);
11468   %}
11469   ins_pipe(ialu_reg);
11470 %}
11471 
11472 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11473 %{
11474   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11475   effect(KILL cr);
11476   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477 
11478   format %{ "negl    $dst\t# int" %}
11479   ins_encode %{
11480     __ negl($dst$$Address);
11481   %}
11482   ins_pipe(ialu_reg);
11483 %}
11484 
11485 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11486 %{
11487   predicate(!UseAPX);
11488   match(Set dst (SubL zero dst));
11489   effect(KILL cr);
11490   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11491 
11492   format %{ "negq    $dst\t# long" %}
11493   ins_encode %{
11494     __ negq($dst$$Register);
11495   %}
11496   ins_pipe(ialu_reg);
11497 %}
11498 
11499 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11500 %{
11501   predicate(UseAPX);
11502   match(Set dst (SubL zero src));
11503   effect(KILL cr);
11504   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11505 
11506   format %{ "enegq    $dst, $src\t# long ndd" %}
11507   ins_encode %{
11508     __ enegq($dst$$Register, $src$$Register, false);
11509   %}
11510   ins_pipe(ialu_reg);
11511 %}
11512 
11513 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11514 %{
11515   predicate(!UseAPX);
11516   match(Set dst (NegL dst));
11517   effect(KILL cr);
11518   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11519 
11520   format %{ "negq    $dst\t# int" %}
11521   ins_encode %{
11522     __ negq($dst$$Register);
11523   %}
11524   ins_pipe(ialu_reg);
11525 %}
11526 
11527 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11528 %{
11529   predicate(UseAPX);
11530   match(Set dst (NegL src));
11531   effect(KILL cr);
11532   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11533 
11534   format %{ "enegq    $dst, $src\t# long ndd" %}
11535   ins_encode %{
11536     __ enegq($dst$$Register, $src$$Register, false);
11537   %}
11538   ins_pipe(ialu_reg);
11539 %}
11540 
11541 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11542 %{
11543   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11544   effect(KILL cr);
11545   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11546 
11547   format %{ "negq    $dst\t# long" %}
11548   ins_encode %{
11549     __ negq($dst$$Address);
11550   %}
11551   ins_pipe(ialu_reg);
11552 %}
11553 
11554 //----------Multiplication/Division Instructions-------------------------------
11555 // Integer Multiplication Instructions
11556 // Multiply Register
11557 
11558 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11559 %{
11560   predicate(!UseAPX);
11561   match(Set dst (MulI dst src));
11562   effect(KILL cr);
11563 
11564   ins_cost(300);
11565   format %{ "imull   $dst, $src\t# int" %}
11566   ins_encode %{
11567     __ imull($dst$$Register, $src$$Register);
11568   %}
11569   ins_pipe(ialu_reg_reg_alu0);
11570 %}
11571 
11572 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11573 %{
11574   predicate(UseAPX);
11575   match(Set dst (MulI src1 src2));
11576   effect(KILL cr);
11577   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11578 
11579   ins_cost(300);
11580   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11581   ins_encode %{
11582     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11583   %}
11584   ins_pipe(ialu_reg_reg_alu0);
11585 %}
11586 
11587 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11588 %{
11589   match(Set dst (MulI src imm));
11590   effect(KILL cr);
11591 
11592   ins_cost(300);
11593   format %{ "imull   $dst, $src, $imm\t# int" %}
11594   ins_encode %{
11595     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11596   %}
11597   ins_pipe(ialu_reg_reg_alu0);
11598 %}
11599 
11600 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11601 %{
11602   predicate(!UseAPX);
11603   match(Set dst (MulI dst (LoadI src)));
11604   effect(KILL cr);
11605 
11606   ins_cost(350);
11607   format %{ "imull   $dst, $src\t# int" %}
11608   ins_encode %{
11609     __ imull($dst$$Register, $src$$Address);
11610   %}
11611   ins_pipe(ialu_reg_mem_alu0);
11612 %}
11613 
11614 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11615 %{
11616   predicate(UseAPX);
11617   match(Set dst (MulI src1 (LoadI src2)));
11618   effect(KILL cr);
11619   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11620 
11621   ins_cost(350);
11622   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11623   ins_encode %{
11624     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11625   %}
11626   ins_pipe(ialu_reg_mem_alu0);
11627 %}
11628 
11629 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11630 %{
11631   match(Set dst (MulI (LoadI src) imm));
11632   effect(KILL cr);
11633 
11634   ins_cost(300);
11635   format %{ "imull   $dst, $src, $imm\t# int" %}
11636   ins_encode %{
11637     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11638   %}
11639   ins_pipe(ialu_reg_mem_alu0);
11640 %}
11641 
11642 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11643 %{
11644   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11645   effect(KILL cr, KILL src2);
11646 
11647   expand %{ mulI_rReg(dst, src1, cr);
11648            mulI_rReg(src2, src3, cr);
11649            addI_rReg(dst, src2, cr); %}
11650 %}
11651 
11652 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11653 %{
11654   predicate(!UseAPX);
11655   match(Set dst (MulL dst src));
11656   effect(KILL cr);
11657 
11658   ins_cost(300);
11659   format %{ "imulq   $dst, $src\t# long" %}
11660   ins_encode %{
11661     __ imulq($dst$$Register, $src$$Register);
11662   %}
11663   ins_pipe(ialu_reg_reg_alu0);
11664 %}
11665 
11666 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11667 %{
11668   predicate(UseAPX);
11669   match(Set dst (MulL src1 src2));
11670   effect(KILL cr);
11671   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11672 
11673   ins_cost(300);
11674   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11675   ins_encode %{
11676     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11677   %}
11678   ins_pipe(ialu_reg_reg_alu0);
11679 %}
11680 
11681 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11682 %{
11683   match(Set dst (MulL src imm));
11684   effect(KILL cr);
11685 
11686   ins_cost(300);
11687   format %{ "imulq   $dst, $src, $imm\t# long" %}
11688   ins_encode %{
11689     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11690   %}
11691   ins_pipe(ialu_reg_reg_alu0);
11692 %}
11693 
11694 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11695 %{
11696   predicate(!UseAPX);
11697   match(Set dst (MulL dst (LoadL src)));
11698   effect(KILL cr);
11699 
11700   ins_cost(350);
11701   format %{ "imulq   $dst, $src\t# long" %}
11702   ins_encode %{
11703     __ imulq($dst$$Register, $src$$Address);
11704   %}
11705   ins_pipe(ialu_reg_mem_alu0);
11706 %}
11707 
11708 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11709 %{
11710   predicate(UseAPX);
11711   match(Set dst (MulL src1 (LoadL src2)));
11712   effect(KILL cr);
11713   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11714 
11715   ins_cost(350);
11716   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11717   ins_encode %{
11718     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11719   %}
11720   ins_pipe(ialu_reg_mem_alu0);
11721 %}
11722 
11723 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11724 %{
11725   match(Set dst (MulL (LoadL src) imm));
11726   effect(KILL cr);
11727 
11728   ins_cost(300);
11729   format %{ "imulq   $dst, $src, $imm\t# long" %}
11730   ins_encode %{
11731     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11732   %}
11733   ins_pipe(ialu_reg_mem_alu0);
11734 %}
11735 
11736 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11737 %{
11738   match(Set dst (MulHiL src rax));
11739   effect(USE_KILL rax, KILL cr);
11740 
11741   ins_cost(300);
11742   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11743   ins_encode %{
11744     __ imulq($src$$Register);
11745   %}
11746   ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748 
11749 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11750 %{
11751   match(Set dst (UMulHiL src rax));
11752   effect(USE_KILL rax, KILL cr);
11753 
11754   ins_cost(300);
11755   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11756   ins_encode %{
11757     __ mulq($src$$Register);
11758   %}
11759   ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761 
11762 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11763                    rFlagsReg cr)
11764 %{
11765   match(Set rax (DivI rax div));
11766   effect(KILL rdx, KILL cr);
11767 
11768   ins_cost(30*100+10*100); // XXX
11769   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11770             "jne,s   normal\n\t"
11771             "xorl    rdx, rdx\n\t"
11772             "cmpl    $div, -1\n\t"
11773             "je,s    done\n"
11774     "normal: cdql\n\t"
11775             "idivl   $div\n"
11776     "done:"        %}
11777   ins_encode(cdql_enc(div));
11778   ins_pipe(ialu_reg_reg_alu0);
11779 %}
11780 
11781 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11782                    rFlagsReg cr)
11783 %{
11784   match(Set rax (DivL rax div));
11785   effect(KILL rdx, KILL cr);
11786 
11787   ins_cost(30*100+10*100); // XXX
11788   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11789             "cmpq    rax, rdx\n\t"
11790             "jne,s   normal\n\t"
11791             "xorl    rdx, rdx\n\t"
11792             "cmpq    $div, -1\n\t"
11793             "je,s    done\n"
11794     "normal: cdqq\n\t"
11795             "idivq   $div\n"
11796     "done:"        %}
11797   ins_encode(cdqq_enc(div));
11798   ins_pipe(ialu_reg_reg_alu0);
11799 %}
11800 
11801 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11802 %{
11803   match(Set rax (UDivI rax div));
11804   effect(KILL rdx, KILL cr);
11805 
11806   ins_cost(300);
11807   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11808   ins_encode %{
11809     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11810   %}
11811   ins_pipe(ialu_reg_reg_alu0);
11812 %}
11813 
11814 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11815 %{
11816   match(Set rax (UDivL rax div));
11817   effect(KILL rdx, KILL cr);
11818 
11819   ins_cost(300);
11820   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11821   ins_encode %{
11822      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11823   %}
11824   ins_pipe(ialu_reg_reg_alu0);
11825 %}
11826 
11827 // Integer DIVMOD with Register, both quotient and mod results
11828 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11829                              rFlagsReg cr)
11830 %{
11831   match(DivModI rax div);
11832   effect(KILL cr);
11833 
11834   ins_cost(30*100+10*100); // XXX
11835   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11836             "jne,s   normal\n\t"
11837             "xorl    rdx, rdx\n\t"
11838             "cmpl    $div, -1\n\t"
11839             "je,s    done\n"
11840     "normal: cdql\n\t"
11841             "idivl   $div\n"
11842     "done:"        %}
11843   ins_encode(cdql_enc(div));
11844   ins_pipe(pipe_slow);
11845 %}
11846 
11847 // Long DIVMOD with Register, both quotient and mod results
11848 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11849                              rFlagsReg cr)
11850 %{
11851   match(DivModL rax div);
11852   effect(KILL cr);
11853 
11854   ins_cost(30*100+10*100); // XXX
11855   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11856             "cmpq    rax, rdx\n\t"
11857             "jne,s   normal\n\t"
11858             "xorl    rdx, rdx\n\t"
11859             "cmpq    $div, -1\n\t"
11860             "je,s    done\n"
11861     "normal: cdqq\n\t"
11862             "idivq   $div\n"
11863     "done:"        %}
11864   ins_encode(cdqq_enc(div));
11865   ins_pipe(pipe_slow);
11866 %}
11867 
11868 // Unsigned integer DIVMOD with Register, both quotient and mod results
11869 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11870                               no_rax_rdx_RegI div, rFlagsReg cr)
11871 %{
11872   match(UDivModI rax div);
11873   effect(TEMP tmp, KILL cr);
11874 
11875   ins_cost(300);
11876   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11877             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11878           %}
11879   ins_encode %{
11880     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11881   %}
11882   ins_pipe(pipe_slow);
11883 %}
11884 
11885 // Unsigned long DIVMOD with Register, both quotient and mod results
11886 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11887                               no_rax_rdx_RegL div, rFlagsReg cr)
11888 %{
11889   match(UDivModL rax div);
11890   effect(TEMP tmp, KILL cr);
11891 
11892   ins_cost(300);
11893   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11894             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11895           %}
11896   ins_encode %{
11897     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11898   %}
11899   ins_pipe(pipe_slow);
11900 %}
11901 
11902 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11903                    rFlagsReg cr)
11904 %{
11905   match(Set rdx (ModI rax div));
11906   effect(KILL rax, KILL cr);
11907 
11908   ins_cost(300); // XXX
11909   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11910             "jne,s   normal\n\t"
11911             "xorl    rdx, rdx\n\t"
11912             "cmpl    $div, -1\n\t"
11913             "je,s    done\n"
11914     "normal: cdql\n\t"
11915             "idivl   $div\n"
11916     "done:"        %}
11917   ins_encode(cdql_enc(div));
11918   ins_pipe(ialu_reg_reg_alu0);
11919 %}
11920 
11921 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11922                    rFlagsReg cr)
11923 %{
11924   match(Set rdx (ModL rax div));
11925   effect(KILL rax, KILL cr);
11926 
11927   ins_cost(300); // XXX
11928   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11929             "cmpq    rax, rdx\n\t"
11930             "jne,s   normal\n\t"
11931             "xorl    rdx, rdx\n\t"
11932             "cmpq    $div, -1\n\t"
11933             "je,s    done\n"
11934     "normal: cdqq\n\t"
11935             "idivq   $div\n"
11936     "done:"        %}
11937   ins_encode(cdqq_enc(div));
11938   ins_pipe(ialu_reg_reg_alu0);
11939 %}
11940 
11941 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11942 %{
11943   match(Set rdx (UModI rax div));
11944   effect(KILL rax, KILL cr);
11945 
11946   ins_cost(300);
11947   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11948   ins_encode %{
11949     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11950   %}
11951   ins_pipe(ialu_reg_reg_alu0);
11952 %}
11953 
11954 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11955 %{
11956   match(Set rdx (UModL rax div));
11957   effect(KILL rax, KILL cr);
11958 
11959   ins_cost(300);
11960   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11961   ins_encode %{
11962     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11963   %}
11964   ins_pipe(ialu_reg_reg_alu0);
11965 %}
11966 
11967 // Integer Shift Instructions
11968 // Shift Left by one, two, three
11969 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11970 %{
11971   predicate(!UseAPX);
11972   match(Set dst (LShiftI dst shift));
11973   effect(KILL cr);
11974 
11975   format %{ "sall    $dst, $shift" %}
11976   ins_encode %{
11977     __ sall($dst$$Register, $shift$$constant);
11978   %}
11979   ins_pipe(ialu_reg);
11980 %}
11981 
11982 // Shift Left by one, two, three
11983 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11984 %{
11985   predicate(UseAPX);
11986   match(Set dst (LShiftI src shift));
11987   effect(KILL cr);
11988   flag(PD::Flag_ndd_demotable_opr1);
11989 
11990   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11991   ins_encode %{
11992     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11993   %}
11994   ins_pipe(ialu_reg);
11995 %}
11996 
11997 // Shift Left by 8-bit immediate
11998 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11999 %{
12000   predicate(!UseAPX);
12001   match(Set dst (LShiftI dst shift));
12002   effect(KILL cr);
12003 
12004   format %{ "sall    $dst, $shift" %}
12005   ins_encode %{
12006     __ sall($dst$$Register, $shift$$constant);
12007   %}
12008   ins_pipe(ialu_reg);
12009 %}
12010 
12011 // Shift Left by 8-bit immediate
12012 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12013 %{
12014   predicate(UseAPX);
12015   match(Set dst (LShiftI src shift));
12016   effect(KILL cr);
12017   flag(PD::Flag_ndd_demotable_opr1);
12018 
12019   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12020   ins_encode %{
12021     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12022   %}
12023   ins_pipe(ialu_reg);
12024 %}
12025 
12026 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12027 %{
12028   predicate(UseAPX);
12029   match(Set dst (LShiftI (LoadI src) shift));
12030   effect(KILL cr);
12031 
12032   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12033   ins_encode %{
12034     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12035   %}
12036   ins_pipe(ialu_reg);
12037 %}
12038 
12039 // Shift Left by 8-bit immediate
12040 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12041 %{
12042   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12043   effect(KILL cr);
12044 
12045   format %{ "sall    $dst, $shift" %}
12046   ins_encode %{
12047     __ sall($dst$$Address, $shift$$constant);
12048   %}
12049   ins_pipe(ialu_mem_imm);
12050 %}
12051 
12052 // Shift Left by variable
12053 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12054 %{
12055   predicate(!VM_Version::supports_bmi2());
12056   match(Set dst (LShiftI dst shift));
12057   effect(KILL cr);
12058 
12059   format %{ "sall    $dst, $shift" %}
12060   ins_encode %{
12061     __ sall($dst$$Register);
12062   %}
12063   ins_pipe(ialu_reg_reg);
12064 %}
12065 
12066 // Shift Left by variable
12067 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12068 %{
12069   predicate(!VM_Version::supports_bmi2());
12070   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12071   effect(KILL cr);
12072 
12073   format %{ "sall    $dst, $shift" %}
12074   ins_encode %{
12075     __ sall($dst$$Address);
12076   %}
12077   ins_pipe(ialu_mem_reg);
12078 %}
12079 
12080 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12081 %{
12082   predicate(VM_Version::supports_bmi2());
12083   match(Set dst (LShiftI src shift));
12084 
12085   format %{ "shlxl   $dst, $src, $shift" %}
12086   ins_encode %{
12087     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12088   %}
12089   ins_pipe(ialu_reg_reg);
12090 %}
12091 
12092 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12093 %{
12094   predicate(VM_Version::supports_bmi2());
12095   match(Set dst (LShiftI (LoadI src) shift));
12096   ins_cost(175);
12097   format %{ "shlxl   $dst, $src, $shift" %}
12098   ins_encode %{
12099     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12100   %}
12101   ins_pipe(ialu_reg_mem);
12102 %}
12103 
12104 // Arithmetic Shift Right by 8-bit immediate
12105 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12106 %{
12107   predicate(!UseAPX);
12108   match(Set dst (RShiftI dst shift));
12109   effect(KILL cr);
12110 
12111   format %{ "sarl    $dst, $shift" %}
12112   ins_encode %{
12113     __ sarl($dst$$Register, $shift$$constant);
12114   %}
12115   ins_pipe(ialu_mem_imm);
12116 %}
12117 
12118 // Arithmetic Shift Right by 8-bit immediate
12119 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12120 %{
12121   predicate(UseAPX);
12122   match(Set dst (RShiftI src shift));
12123   effect(KILL cr);
12124   flag(PD::Flag_ndd_demotable_opr1);
12125 
12126   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12127   ins_encode %{
12128     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12129   %}
12130   ins_pipe(ialu_mem_imm);
12131 %}
12132 
12133 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12134 %{
12135   predicate(UseAPX);
12136   match(Set dst (RShiftI (LoadI src) shift));
12137   effect(KILL cr);
12138 
12139   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12140   ins_encode %{
12141     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12142   %}
12143   ins_pipe(ialu_mem_imm);
12144 %}
12145 
12146 // Arithmetic Shift Right by 8-bit immediate
12147 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12148 %{
12149   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12150   effect(KILL cr);
12151 
12152   format %{ "sarl    $dst, $shift" %}
12153   ins_encode %{
12154     __ sarl($dst$$Address, $shift$$constant);
12155   %}
12156   ins_pipe(ialu_mem_imm);
12157 %}
12158 
12159 // Arithmetic Shift Right by variable
12160 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12161 %{
12162   predicate(!VM_Version::supports_bmi2());
12163   match(Set dst (RShiftI dst shift));
12164   effect(KILL cr);
12165 
12166   format %{ "sarl    $dst, $shift" %}
12167   ins_encode %{
12168     __ sarl($dst$$Register);
12169   %}
12170   ins_pipe(ialu_reg_reg);
12171 %}
12172 
12173 // Arithmetic Shift Right by variable
12174 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12175 %{
12176   predicate(!VM_Version::supports_bmi2());
12177   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12178   effect(KILL cr);
12179 
12180   format %{ "sarl    $dst, $shift" %}
12181   ins_encode %{
12182     __ sarl($dst$$Address);
12183   %}
12184   ins_pipe(ialu_mem_reg);
12185 %}
12186 
12187 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12188 %{
12189   predicate(VM_Version::supports_bmi2());
12190   match(Set dst (RShiftI src shift));
12191 
12192   format %{ "sarxl   $dst, $src, $shift" %}
12193   ins_encode %{
12194     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12195   %}
12196   ins_pipe(ialu_reg_reg);
12197 %}
12198 
12199 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12200 %{
12201   predicate(VM_Version::supports_bmi2());
12202   match(Set dst (RShiftI (LoadI src) shift));
12203   ins_cost(175);
12204   format %{ "sarxl   $dst, $src, $shift" %}
12205   ins_encode %{
12206     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12207   %}
12208   ins_pipe(ialu_reg_mem);
12209 %}
12210 
12211 // Logical Shift Right by 8-bit immediate
12212 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12213 %{
12214   predicate(!UseAPX);
12215   match(Set dst (URShiftI dst shift));
12216   effect(KILL cr);
12217 
12218   format %{ "shrl    $dst, $shift" %}
12219   ins_encode %{
12220     __ shrl($dst$$Register, $shift$$constant);
12221   %}
12222   ins_pipe(ialu_reg);
12223 %}
12224 
12225 // Logical Shift Right by 8-bit immediate
12226 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12227 %{
12228   predicate(UseAPX);
12229   match(Set dst (URShiftI src shift));
12230   effect(KILL cr);
12231   flag(PD::Flag_ndd_demotable_opr1);
12232 
12233   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12234   ins_encode %{
12235     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12236   %}
12237   ins_pipe(ialu_reg);
12238 %}
12239 
12240 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12241 %{
12242   predicate(UseAPX);
12243   match(Set dst (URShiftI (LoadI src) shift));
12244   effect(KILL cr);
12245 
12246   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12247   ins_encode %{
12248     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12249   %}
12250   ins_pipe(ialu_reg);
12251 %}
12252 
12253 // Logical Shift Right by 8-bit immediate
12254 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12255 %{
12256   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12257   effect(KILL cr);
12258 
12259   format %{ "shrl    $dst, $shift" %}
12260   ins_encode %{
12261     __ shrl($dst$$Address, $shift$$constant);
12262   %}
12263   ins_pipe(ialu_mem_imm);
12264 %}
12265 
12266 // Logical Shift Right by variable
12267 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12268 %{
12269   predicate(!VM_Version::supports_bmi2());
12270   match(Set dst (URShiftI dst shift));
12271   effect(KILL cr);
12272 
12273   format %{ "shrl    $dst, $shift" %}
12274   ins_encode %{
12275     __ shrl($dst$$Register);
12276   %}
12277   ins_pipe(ialu_reg_reg);
12278 %}
12279 
12280 // Logical Shift Right by variable
12281 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12282 %{
12283   predicate(!VM_Version::supports_bmi2());
12284   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12285   effect(KILL cr);
12286 
12287   format %{ "shrl    $dst, $shift" %}
12288   ins_encode %{
12289     __ shrl($dst$$Address);
12290   %}
12291   ins_pipe(ialu_mem_reg);
12292 %}
12293 
12294 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12295 %{
12296   predicate(VM_Version::supports_bmi2());
12297   match(Set dst (URShiftI src shift));
12298 
12299   format %{ "shrxl   $dst, $src, $shift" %}
12300   ins_encode %{
12301     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12302   %}
12303   ins_pipe(ialu_reg_reg);
12304 %}
12305 
12306 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12307 %{
12308   predicate(VM_Version::supports_bmi2());
12309   match(Set dst (URShiftI (LoadI src) shift));
12310   ins_cost(175);
12311   format %{ "shrxl   $dst, $src, $shift" %}
12312   ins_encode %{
12313     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12314   %}
12315   ins_pipe(ialu_reg_mem);
12316 %}
12317 
12318 // Long Shift Instructions
12319 // Shift Left by one, two, three
12320 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12321 %{
12322   predicate(!UseAPX);
12323   match(Set dst (LShiftL dst shift));
12324   effect(KILL cr);
12325 
12326   format %{ "salq    $dst, $shift" %}
12327   ins_encode %{
12328     __ salq($dst$$Register, $shift$$constant);
12329   %}
12330   ins_pipe(ialu_reg);
12331 %}
12332 
12333 // Shift Left by one, two, three
12334 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12335 %{
12336   predicate(UseAPX);
12337   match(Set dst (LShiftL src shift));
12338   effect(KILL cr);
12339   flag(PD::Flag_ndd_demotable_opr1);
12340 
12341   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12342   ins_encode %{
12343     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12344   %}
12345   ins_pipe(ialu_reg);
12346 %}
12347 
12348 // Shift Left by 8-bit immediate
12349 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12350 %{
12351   predicate(!UseAPX);
12352   match(Set dst (LShiftL dst shift));
12353   effect(KILL cr);
12354 
12355   format %{ "salq    $dst, $shift" %}
12356   ins_encode %{
12357     __ salq($dst$$Register, $shift$$constant);
12358   %}
12359   ins_pipe(ialu_reg);
12360 %}
12361 
12362 // Shift Left by 8-bit immediate
12363 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12364 %{
12365   predicate(UseAPX);
12366   match(Set dst (LShiftL src shift));
12367   effect(KILL cr);
12368   flag(PD::Flag_ndd_demotable_opr1);
12369 
12370   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12371   ins_encode %{
12372     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12373   %}
12374   ins_pipe(ialu_reg);
12375 %}
12376 
12377 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12378 %{
12379   predicate(UseAPX);
12380   match(Set dst (LShiftL (LoadL src) shift));
12381   effect(KILL cr);
12382 
12383   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12384   ins_encode %{
12385     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12386   %}
12387   ins_pipe(ialu_reg);
12388 %}
12389 
12390 // Shift Left by 8-bit immediate
12391 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12392 %{
12393   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12394   effect(KILL cr);
12395 
12396   format %{ "salq    $dst, $shift" %}
12397   ins_encode %{
12398     __ salq($dst$$Address, $shift$$constant);
12399   %}
12400   ins_pipe(ialu_mem_imm);
12401 %}
12402 
12403 // Shift Left by variable
12404 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12405 %{
12406   predicate(!VM_Version::supports_bmi2());
12407   match(Set dst (LShiftL dst shift));
12408   effect(KILL cr);
12409 
12410   format %{ "salq    $dst, $shift" %}
12411   ins_encode %{
12412     __ salq($dst$$Register);
12413   %}
12414   ins_pipe(ialu_reg_reg);
12415 %}
12416 
12417 // Shift Left by variable
12418 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12419 %{
12420   predicate(!VM_Version::supports_bmi2());
12421   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12422   effect(KILL cr);
12423 
12424   format %{ "salq    $dst, $shift" %}
12425   ins_encode %{
12426     __ salq($dst$$Address);
12427   %}
12428   ins_pipe(ialu_mem_reg);
12429 %}
12430 
12431 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12432 %{
12433   predicate(VM_Version::supports_bmi2());
12434   match(Set dst (LShiftL src shift));
12435 
12436   format %{ "shlxq   $dst, $src, $shift" %}
12437   ins_encode %{
12438     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12439   %}
12440   ins_pipe(ialu_reg_reg);
12441 %}
12442 
12443 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12444 %{
12445   predicate(VM_Version::supports_bmi2());
12446   match(Set dst (LShiftL (LoadL src) shift));
12447   ins_cost(175);
12448   format %{ "shlxq   $dst, $src, $shift" %}
12449   ins_encode %{
12450     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12451   %}
12452   ins_pipe(ialu_reg_mem);
12453 %}
12454 
12455 // Arithmetic Shift Right by 8-bit immediate
12456 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12457 %{
12458   predicate(!UseAPX);
12459   match(Set dst (RShiftL dst shift));
12460   effect(KILL cr);
12461 
12462   format %{ "sarq    $dst, $shift" %}
12463   ins_encode %{
12464     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12465   %}
12466   ins_pipe(ialu_mem_imm);
12467 %}
12468 
12469 // Arithmetic Shift Right by 8-bit immediate
12470 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12471 %{
12472   predicate(UseAPX);
12473   match(Set dst (RShiftL src shift));
12474   effect(KILL cr);
12475   flag(PD::Flag_ndd_demotable_opr1);
12476 
12477   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12478   ins_encode %{
12479     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12480   %}
12481   ins_pipe(ialu_mem_imm);
12482 %}
12483 
12484 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12485 %{
12486   predicate(UseAPX);
12487   match(Set dst (RShiftL (LoadL src) shift));
12488   effect(KILL cr);
12489 
12490   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12491   ins_encode %{
12492     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12493   %}
12494   ins_pipe(ialu_mem_imm);
12495 %}
12496 
12497 // Arithmetic Shift Right by 8-bit immediate
12498 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12499 %{
12500   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12501   effect(KILL cr);
12502 
12503   format %{ "sarq    $dst, $shift" %}
12504   ins_encode %{
12505     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12506   %}
12507   ins_pipe(ialu_mem_imm);
12508 %}
12509 
12510 // Arithmetic Shift Right by variable
12511 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12512 %{
12513   predicate(!VM_Version::supports_bmi2());
12514   match(Set dst (RShiftL dst shift));
12515   effect(KILL cr);
12516 
12517   format %{ "sarq    $dst, $shift" %}
12518   ins_encode %{
12519     __ sarq($dst$$Register);
12520   %}
12521   ins_pipe(ialu_reg_reg);
12522 %}
12523 
12524 // Arithmetic Shift Right by variable
12525 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12526 %{
12527   predicate(!VM_Version::supports_bmi2());
12528   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12529   effect(KILL cr);
12530 
12531   format %{ "sarq    $dst, $shift" %}
12532   ins_encode %{
12533     __ sarq($dst$$Address);
12534   %}
12535   ins_pipe(ialu_mem_reg);
12536 %}
12537 
12538 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12539 %{
12540   predicate(VM_Version::supports_bmi2());
12541   match(Set dst (RShiftL src shift));
12542 
12543   format %{ "sarxq   $dst, $src, $shift" %}
12544   ins_encode %{
12545     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12546   %}
12547   ins_pipe(ialu_reg_reg);
12548 %}
12549 
12550 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12551 %{
12552   predicate(VM_Version::supports_bmi2());
12553   match(Set dst (RShiftL (LoadL src) shift));
12554   ins_cost(175);
12555   format %{ "sarxq   $dst, $src, $shift" %}
12556   ins_encode %{
12557     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12558   %}
12559   ins_pipe(ialu_reg_mem);
12560 %}
12561 
12562 // Logical Shift Right by 8-bit immediate
12563 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12564 %{
12565   predicate(!UseAPX);
12566   match(Set dst (URShiftL dst shift));
12567   effect(KILL cr);
12568 
12569   format %{ "shrq    $dst, $shift" %}
12570   ins_encode %{
12571     __ shrq($dst$$Register, $shift$$constant);
12572   %}
12573   ins_pipe(ialu_reg);
12574 %}
12575 
12576 // Logical Shift Right by 8-bit immediate
12577 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12578 %{
12579   predicate(UseAPX);
12580   match(Set dst (URShiftL src shift));
12581   effect(KILL cr);
12582   flag(PD::Flag_ndd_demotable_opr1);
12583 
12584   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12585   ins_encode %{
12586     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12587   %}
12588   ins_pipe(ialu_reg);
12589 %}
12590 
12591 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12592 %{
12593   predicate(UseAPX);
12594   match(Set dst (URShiftL (LoadL src) shift));
12595   effect(KILL cr);
12596 
12597   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12598   ins_encode %{
12599     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12600   %}
12601   ins_pipe(ialu_reg);
12602 %}
12603 
12604 // Logical Shift Right by 8-bit immediate
12605 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12606 %{
12607   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12608   effect(KILL cr);
12609 
12610   format %{ "shrq    $dst, $shift" %}
12611   ins_encode %{
12612     __ shrq($dst$$Address, $shift$$constant);
12613   %}
12614   ins_pipe(ialu_mem_imm);
12615 %}
12616 
12617 // Logical Shift Right by variable
12618 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12619 %{
12620   predicate(!VM_Version::supports_bmi2());
12621   match(Set dst (URShiftL dst shift));
12622   effect(KILL cr);
12623 
12624   format %{ "shrq    $dst, $shift" %}
12625   ins_encode %{
12626     __ shrq($dst$$Register);
12627   %}
12628   ins_pipe(ialu_reg_reg);
12629 %}
12630 
12631 // Logical Shift Right by variable
12632 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12633 %{
12634   predicate(!VM_Version::supports_bmi2());
12635   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12636   effect(KILL cr);
12637 
12638   format %{ "shrq    $dst, $shift" %}
12639   ins_encode %{
12640     __ shrq($dst$$Address);
12641   %}
12642   ins_pipe(ialu_mem_reg);
12643 %}
12644 
12645 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12646 %{
12647   predicate(VM_Version::supports_bmi2());
12648   match(Set dst (URShiftL src shift));
12649 
12650   format %{ "shrxq   $dst, $src, $shift" %}
12651   ins_encode %{
12652     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12653   %}
12654   ins_pipe(ialu_reg_reg);
12655 %}
12656 
12657 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12658 %{
12659   predicate(VM_Version::supports_bmi2());
12660   match(Set dst (URShiftL (LoadL src) shift));
12661   ins_cost(175);
12662   format %{ "shrxq   $dst, $src, $shift" %}
12663   ins_encode %{
12664     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12665   %}
12666   ins_pipe(ialu_reg_mem);
12667 %}
12668 
12669 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12670 // This idiom is used by the compiler for the i2b bytecode.
12671 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12672 %{
12673   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12674 
12675   format %{ "movsbl  $dst, $src\t# i2b" %}
12676   ins_encode %{
12677     __ movsbl($dst$$Register, $src$$Register);
12678   %}
12679   ins_pipe(ialu_reg_reg);
12680 %}
12681 
12682 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12683 // This idiom is used by the compiler the i2s bytecode.
12684 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12685 %{
12686   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12687 
12688   format %{ "movswl  $dst, $src\t# i2s" %}
12689   ins_encode %{
12690     __ movswl($dst$$Register, $src$$Register);
12691   %}
12692   ins_pipe(ialu_reg_reg);
12693 %}
12694 
12695 // ROL/ROR instructions
12696 
12697 // Rotate left by constant.
12698 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12699 %{
12700   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12701   match(Set dst (RotateLeft dst shift));
12702   effect(KILL cr);
12703   format %{ "roll    $dst, $shift" %}
12704   ins_encode %{
12705     __ roll($dst$$Register, $shift$$constant);
12706   %}
12707   ins_pipe(ialu_reg);
12708 %}
12709 
12710 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12711 %{
12712   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12713   match(Set dst (RotateLeft src shift));
12714   format %{ "rolxl   $dst, $src, $shift" %}
12715   ins_encode %{
12716     int shift = 32 - ($shift$$constant & 31);
12717     __ rorxl($dst$$Register, $src$$Register, shift);
12718   %}
12719   ins_pipe(ialu_reg_reg);
12720 %}
12721 
12722 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12723 %{
12724   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725   match(Set dst (RotateLeft (LoadI src) shift));
12726   ins_cost(175);
12727   format %{ "rolxl   $dst, $src, $shift" %}
12728   ins_encode %{
12729     int shift = 32 - ($shift$$constant & 31);
12730     __ rorxl($dst$$Register, $src$$Address, shift);
12731   %}
12732   ins_pipe(ialu_reg_mem);
12733 %}
12734 
12735 // Rotate Left by variable
12736 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12737 %{
12738   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12739   match(Set dst (RotateLeft dst shift));
12740   effect(KILL cr);
12741   format %{ "roll    $dst, $shift" %}
12742   ins_encode %{
12743     __ roll($dst$$Register);
12744   %}
12745   ins_pipe(ialu_reg_reg);
12746 %}
12747 
12748 // Rotate Left by variable
12749 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12750 %{
12751   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12752   match(Set dst (RotateLeft src shift));
12753   effect(KILL cr);
12754   flag(PD::Flag_ndd_demotable_opr1);
12755 
12756   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12757   ins_encode %{
12758     __ eroll($dst$$Register, $src$$Register, false);
12759   %}
12760   ins_pipe(ialu_reg_reg);
12761 %}
12762 
12763 // Rotate Right by constant.
12764 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12765 %{
12766   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12767   match(Set dst (RotateRight dst shift));
12768   effect(KILL cr);
12769   format %{ "rorl    $dst, $shift" %}
12770   ins_encode %{
12771     __ rorl($dst$$Register, $shift$$constant);
12772   %}
12773   ins_pipe(ialu_reg);
12774 %}
12775 
12776 // Rotate Right by constant.
12777 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12778 %{
12779   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12780   match(Set dst (RotateRight src shift));
12781   format %{ "rorxl   $dst, $src, $shift" %}
12782   ins_encode %{
12783     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12784   %}
12785   ins_pipe(ialu_reg_reg);
12786 %}
12787 
12788 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12789 %{
12790   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12791   match(Set dst (RotateRight (LoadI src) shift));
12792   ins_cost(175);
12793   format %{ "rorxl   $dst, $src, $shift" %}
12794   ins_encode %{
12795     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12796   %}
12797   ins_pipe(ialu_reg_mem);
12798 %}
12799 
12800 // Rotate Right by variable
12801 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12802 %{
12803   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12804   match(Set dst (RotateRight dst shift));
12805   effect(KILL cr);
12806   format %{ "rorl    $dst, $shift" %}
12807   ins_encode %{
12808     __ rorl($dst$$Register);
12809   %}
12810   ins_pipe(ialu_reg_reg);
12811 %}
12812 
12813 // Rotate Right by variable
12814 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12815 %{
12816   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12817   match(Set dst (RotateRight src shift));
12818   effect(KILL cr);
12819   flag(PD::Flag_ndd_demotable_opr1);
12820 
12821   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12822   ins_encode %{
12823     __ erorl($dst$$Register, $src$$Register, false);
12824   %}
12825   ins_pipe(ialu_reg_reg);
12826 %}
12827 
12828 // Rotate Left by constant.
12829 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12830 %{
12831   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12832   match(Set dst (RotateLeft dst shift));
12833   effect(KILL cr);
12834   format %{ "rolq    $dst, $shift" %}
12835   ins_encode %{
12836     __ rolq($dst$$Register, $shift$$constant);
12837   %}
12838   ins_pipe(ialu_reg);
12839 %}
12840 
12841 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12842 %{
12843   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12844   match(Set dst (RotateLeft src shift));
12845   format %{ "rolxq   $dst, $src, $shift" %}
12846   ins_encode %{
12847     int shift = 64 - ($shift$$constant & 63);
12848     __ rorxq($dst$$Register, $src$$Register, shift);
12849   %}
12850   ins_pipe(ialu_reg_reg);
12851 %}
12852 
12853 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12854 %{
12855   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856   match(Set dst (RotateLeft (LoadL src) shift));
12857   ins_cost(175);
12858   format %{ "rolxq   $dst, $src, $shift" %}
12859   ins_encode %{
12860     int shift = 64 - ($shift$$constant & 63);
12861     __ rorxq($dst$$Register, $src$$Address, shift);
12862   %}
12863   ins_pipe(ialu_reg_mem);
12864 %}
12865 
12866 // Rotate Left by variable
12867 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12868 %{
12869   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12870   match(Set dst (RotateLeft dst shift));
12871   effect(KILL cr);
12872 
12873   format %{ "rolq    $dst, $shift" %}
12874   ins_encode %{
12875     __ rolq($dst$$Register);
12876   %}
12877   ins_pipe(ialu_reg_reg);
12878 %}
12879 
12880 // Rotate Left by variable
12881 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12882 %{
12883   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12884   match(Set dst (RotateLeft src shift));
12885   effect(KILL cr);
12886   flag(PD::Flag_ndd_demotable_opr1);
12887 
12888   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12889   ins_encode %{
12890     __ erolq($dst$$Register, $src$$Register, false);
12891   %}
12892   ins_pipe(ialu_reg_reg);
12893 %}
12894 
12895 // Rotate Right by constant.
12896 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12897 %{
12898   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12899   match(Set dst (RotateRight dst shift));
12900   effect(KILL cr);
12901   format %{ "rorq    $dst, $shift" %}
12902   ins_encode %{
12903     __ rorq($dst$$Register, $shift$$constant);
12904   %}
12905   ins_pipe(ialu_reg);
12906 %}
12907 
12908 // Rotate Right by constant
12909 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12910 %{
12911   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12912   match(Set dst (RotateRight src shift));
12913   format %{ "rorxq   $dst, $src, $shift" %}
12914   ins_encode %{
12915     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12916   %}
12917   ins_pipe(ialu_reg_reg);
12918 %}
12919 
12920 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12921 %{
12922   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12923   match(Set dst (RotateRight (LoadL src) shift));
12924   ins_cost(175);
12925   format %{ "rorxq   $dst, $src, $shift" %}
12926   ins_encode %{
12927     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12928   %}
12929   ins_pipe(ialu_reg_mem);
12930 %}
12931 
12932 // Rotate Right by variable
12933 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12934 %{
12935   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12936   match(Set dst (RotateRight dst shift));
12937   effect(KILL cr);
12938   format %{ "rorq    $dst, $shift" %}
12939   ins_encode %{
12940     __ rorq($dst$$Register);
12941   %}
12942   ins_pipe(ialu_reg_reg);
12943 %}
12944 
12945 // Rotate Right by variable
12946 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12947 %{
12948   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12949   match(Set dst (RotateRight src shift));
12950   effect(KILL cr);
12951   flag(PD::Flag_ndd_demotable_opr1);
12952 
12953   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12954   ins_encode %{
12955     __ erorq($dst$$Register, $src$$Register, false);
12956   %}
12957   ins_pipe(ialu_reg_reg);
12958 %}
12959 
12960 //----------------------------- CompressBits/ExpandBits ------------------------
12961 
12962 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12963   predicate(n->bottom_type()->isa_long());
12964   match(Set dst (CompressBits src mask));
12965   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12966   ins_encode %{
12967     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12968   %}
12969   ins_pipe( pipe_slow );
12970 %}
12971 
12972 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12973   predicate(n->bottom_type()->isa_long());
12974   match(Set dst (ExpandBits src mask));
12975   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12976   ins_encode %{
12977     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12978   %}
12979   ins_pipe( pipe_slow );
12980 %}
12981 
12982 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12983   predicate(n->bottom_type()->isa_long());
12984   match(Set dst (CompressBits src (LoadL mask)));
12985   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12986   ins_encode %{
12987     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12988   %}
12989   ins_pipe( pipe_slow );
12990 %}
12991 
12992 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12993   predicate(n->bottom_type()->isa_long());
12994   match(Set dst (ExpandBits src (LoadL mask)));
12995   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12996   ins_encode %{
12997     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12998   %}
12999   ins_pipe( pipe_slow );
13000 %}
13001 
13002 
13003 // Logical Instructions
13004 
13005 // Integer Logical Instructions
13006 
13007 // And Instructions
13008 // And Register with Register
13009 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13010 %{
13011   predicate(!UseAPX);
13012   match(Set dst (AndI dst src));
13013   effect(KILL cr);
13014   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13015 
13016   format %{ "andl    $dst, $src\t# int" %}
13017   ins_encode %{
13018     __ andl($dst$$Register, $src$$Register);
13019   %}
13020   ins_pipe(ialu_reg_reg);
13021 %}
13022 
13023 // And Register with Register using New Data Destination (NDD)
13024 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13025 %{
13026   predicate(UseAPX);
13027   match(Set dst (AndI src1 src2));
13028   effect(KILL cr);
13029   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13030 
13031   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
13032   ins_encode %{
13033     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13034 
13035   %}
13036   ins_pipe(ialu_reg_reg);
13037 %}
13038 
13039 // And Register with Immediate 255
13040 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13041 %{
13042   match(Set dst (AndI src mask));
13043 
13044   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13045   ins_encode %{
13046     __ movzbl($dst$$Register, $src$$Register);
13047   %}
13048   ins_pipe(ialu_reg);
13049 %}
13050 
13051 // And Register with Immediate 255 and promote to long
13052 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13053 %{
13054   match(Set dst (ConvI2L (AndI src mask)));
13055 
13056   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13057   ins_encode %{
13058     __ movzbl($dst$$Register, $src$$Register);
13059   %}
13060   ins_pipe(ialu_reg);
13061 %}
13062 
13063 // And Register with Immediate 65535
13064 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13065 %{
13066   match(Set dst (AndI src mask));
13067 
13068   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13069   ins_encode %{
13070     __ movzwl($dst$$Register, $src$$Register);
13071   %}
13072   ins_pipe(ialu_reg);
13073 %}
13074 
13075 // And Register with Immediate 65535 and promote to long
13076 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13077 %{
13078   match(Set dst (ConvI2L (AndI src mask)));
13079 
13080   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13081   ins_encode %{
13082     __ movzwl($dst$$Register, $src$$Register);
13083   %}
13084   ins_pipe(ialu_reg);
13085 %}
13086 
13087 // Can skip int2long conversions after AND with small bitmask
13088 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13089 %{
13090   predicate(VM_Version::supports_bmi2());
13091   ins_cost(125);
13092   effect(TEMP tmp, KILL cr);
13093   match(Set dst (ConvI2L (AndI src mask)));
13094   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13095   ins_encode %{
13096     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13097     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13098   %}
13099   ins_pipe(ialu_reg_reg);
13100 %}
13101 
13102 // And Register with Immediate
13103 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13104 %{
13105   predicate(!UseAPX);
13106   match(Set dst (AndI dst src));
13107   effect(KILL cr);
13108   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13109 
13110   format %{ "andl    $dst, $src\t# int" %}
13111   ins_encode %{
13112     __ andl($dst$$Register, $src$$constant);
13113   %}
13114   ins_pipe(ialu_reg);
13115 %}
13116 
13117 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13118 %{
13119   predicate(UseAPX);
13120   match(Set dst (AndI src1 src2));
13121   effect(KILL cr);
13122   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13123 
13124   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13125   ins_encode %{
13126     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13127   %}
13128   ins_pipe(ialu_reg);
13129 %}
13130 
13131 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13132 %{
13133   predicate(UseAPX);
13134   match(Set dst (AndI (LoadI src1) src2));
13135   effect(KILL cr);
13136   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13137 
13138   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13139   ins_encode %{
13140     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13141   %}
13142   ins_pipe(ialu_reg);
13143 %}
13144 
13145 // And Register with Memory
13146 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13147 %{
13148   predicate(!UseAPX);
13149   match(Set dst (AndI dst (LoadI src)));
13150   effect(KILL cr);
13151   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13152 
13153   ins_cost(150);
13154   format %{ "andl    $dst, $src\t# int" %}
13155   ins_encode %{
13156     __ andl($dst$$Register, $src$$Address);
13157   %}
13158   ins_pipe(ialu_reg_mem);
13159 %}
13160 
13161 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13162 %{
13163   predicate(UseAPX);
13164   match(Set dst (AndI src1 (LoadI src2)));
13165   effect(KILL cr);
13166   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13167 
13168   ins_cost(150);
13169   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13170   ins_encode %{
13171     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13172   %}
13173   ins_pipe(ialu_reg_mem);
13174 %}
13175 
13176 // And Memory with Register
13177 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13178 %{
13179   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13180   effect(KILL cr);
13181   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182 
13183   ins_cost(150);
13184   format %{ "andb    $dst, $src\t# byte" %}
13185   ins_encode %{
13186     __ andb($dst$$Address, $src$$Register);
13187   %}
13188   ins_pipe(ialu_mem_reg);
13189 %}
13190 
13191 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13192 %{
13193   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13194   effect(KILL cr);
13195   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13196 
13197   ins_cost(150);
13198   format %{ "andl    $dst, $src\t# int" %}
13199   ins_encode %{
13200     __ andl($dst$$Address, $src$$Register);
13201   %}
13202   ins_pipe(ialu_mem_reg);
13203 %}
13204 
13205 // And Memory with Immediate
13206 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13207 %{
13208   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13209   effect(KILL cr);
13210   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13211 
13212   ins_cost(125);
13213   format %{ "andl    $dst, $src\t# int" %}
13214   ins_encode %{
13215     __ andl($dst$$Address, $src$$constant);
13216   %}
13217   ins_pipe(ialu_mem_imm);
13218 %}
13219 
13220 // BMI1 instructions
13221 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13222   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13223   predicate(UseBMI1Instructions);
13224   effect(KILL cr);
13225   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13226 
13227   ins_cost(125);
13228   format %{ "andnl  $dst, $src1, $src2" %}
13229 
13230   ins_encode %{
13231     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13232   %}
13233   ins_pipe(ialu_reg_mem);
13234 %}
13235 
13236 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13237   match(Set dst (AndI (XorI src1 minus_1) src2));
13238   predicate(UseBMI1Instructions);
13239   effect(KILL cr);
13240   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13241 
13242   format %{ "andnl  $dst, $src1, $src2" %}
13243 
13244   ins_encode %{
13245     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13246   %}
13247   ins_pipe(ialu_reg);
13248 %}
13249 
13250 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13251   match(Set dst (AndI (SubI imm_zero src) src));
13252   predicate(UseBMI1Instructions);
13253   effect(KILL cr);
13254   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13255 
13256   format %{ "blsil  $dst, $src" %}
13257 
13258   ins_encode %{
13259     __ blsil($dst$$Register, $src$$Register);
13260   %}
13261   ins_pipe(ialu_reg);
13262 %}
13263 
13264 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13265   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13266   predicate(UseBMI1Instructions);
13267   effect(KILL cr);
13268   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13269 
13270   ins_cost(125);
13271   format %{ "blsil  $dst, $src" %}
13272 
13273   ins_encode %{
13274     __ blsil($dst$$Register, $src$$Address);
13275   %}
13276   ins_pipe(ialu_reg_mem);
13277 %}
13278 
13279 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13280 %{
13281   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13282   predicate(UseBMI1Instructions);
13283   effect(KILL cr);
13284   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13285 
13286   ins_cost(125);
13287   format %{ "blsmskl $dst, $src" %}
13288 
13289   ins_encode %{
13290     __ blsmskl($dst$$Register, $src$$Address);
13291   %}
13292   ins_pipe(ialu_reg_mem);
13293 %}
13294 
13295 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13296 %{
13297   match(Set dst (XorI (AddI src minus_1) src));
13298   predicate(UseBMI1Instructions);
13299   effect(KILL cr);
13300   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13301 
13302   format %{ "blsmskl $dst, $src" %}
13303 
13304   ins_encode %{
13305     __ blsmskl($dst$$Register, $src$$Register);
13306   %}
13307 
13308   ins_pipe(ialu_reg);
13309 %}
13310 
13311 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13312 %{
13313   match(Set dst (AndI (AddI src minus_1) src) );
13314   predicate(UseBMI1Instructions);
13315   effect(KILL cr);
13316   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13317 
13318   format %{ "blsrl  $dst, $src" %}
13319 
13320   ins_encode %{
13321     __ blsrl($dst$$Register, $src$$Register);
13322   %}
13323 
13324   ins_pipe(ialu_reg_mem);
13325 %}
13326 
13327 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13328 %{
13329   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13330   predicate(UseBMI1Instructions);
13331   effect(KILL cr);
13332   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13333 
13334   ins_cost(125);
13335   format %{ "blsrl  $dst, $src" %}
13336 
13337   ins_encode %{
13338     __ blsrl($dst$$Register, $src$$Address);
13339   %}
13340 
13341   ins_pipe(ialu_reg);
13342 %}
13343 
13344 // Or Instructions
13345 // Or Register with Register
13346 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13347 %{
13348   predicate(!UseAPX);
13349   match(Set dst (OrI dst src));
13350   effect(KILL cr);
13351   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13352 
13353   format %{ "orl     $dst, $src\t# int" %}
13354   ins_encode %{
13355     __ orl($dst$$Register, $src$$Register);
13356   %}
13357   ins_pipe(ialu_reg_reg);
13358 %}
13359 
13360 // Or Register with Register using New Data Destination (NDD)
13361 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13362 %{
13363   predicate(UseAPX);
13364   match(Set dst (OrI src1 src2));
13365   effect(KILL cr);
13366   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13367 
13368   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13369   ins_encode %{
13370     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13371   %}
13372   ins_pipe(ialu_reg_reg);
13373 %}
13374 
13375 // Or Register with Immediate
13376 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13377 %{
13378   predicate(!UseAPX);
13379   match(Set dst (OrI dst src));
13380   effect(KILL cr);
13381   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13382 
13383   format %{ "orl     $dst, $src\t# int" %}
13384   ins_encode %{
13385     __ orl($dst$$Register, $src$$constant);
13386   %}
13387   ins_pipe(ialu_reg);
13388 %}
13389 
13390 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13391 %{
13392   predicate(UseAPX);
13393   match(Set dst (OrI src1 src2));
13394   effect(KILL cr);
13395   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13396 
13397   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13398   ins_encode %{
13399     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13400   %}
13401   ins_pipe(ialu_reg);
13402 %}
13403 
13404 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13405 %{
13406   predicate(UseAPX);
13407   match(Set dst (OrI src1 src2));
13408   effect(KILL cr);
13409   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13410 
13411   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13412   ins_encode %{
13413     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13414   %}
13415   ins_pipe(ialu_reg);
13416 %}
13417 
13418 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13419 %{
13420   predicate(UseAPX);
13421   match(Set dst (OrI (LoadI src1) src2));
13422   effect(KILL cr);
13423   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424 
13425   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13426   ins_encode %{
13427     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13428   %}
13429   ins_pipe(ialu_reg);
13430 %}
13431 
13432 // Or Register with Memory
13433 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13434 %{
13435   predicate(!UseAPX);
13436   match(Set dst (OrI dst (LoadI src)));
13437   effect(KILL cr);
13438   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439 
13440   ins_cost(150);
13441   format %{ "orl     $dst, $src\t# int" %}
13442   ins_encode %{
13443     __ orl($dst$$Register, $src$$Address);
13444   %}
13445   ins_pipe(ialu_reg_mem);
13446 %}
13447 
13448 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13449 %{
13450   predicate(UseAPX);
13451   match(Set dst (OrI src1 (LoadI src2)));
13452   effect(KILL cr);
13453   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13454 
13455   ins_cost(150);
13456   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13457   ins_encode %{
13458     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13459   %}
13460   ins_pipe(ialu_reg_mem);
13461 %}
13462 
13463 // Or Memory with Register
13464 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13465 %{
13466   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13467   effect(KILL cr);
13468   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469 
13470   ins_cost(150);
13471   format %{ "orb    $dst, $src\t# byte" %}
13472   ins_encode %{
13473     __ orb($dst$$Address, $src$$Register);
13474   %}
13475   ins_pipe(ialu_mem_reg);
13476 %}
13477 
13478 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13479 %{
13480   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13481   effect(KILL cr);
13482   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13483 
13484   ins_cost(150);
13485   format %{ "orl     $dst, $src\t# int" %}
13486   ins_encode %{
13487     __ orl($dst$$Address, $src$$Register);
13488   %}
13489   ins_pipe(ialu_mem_reg);
13490 %}
13491 
13492 // Or Memory with Immediate
13493 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13494 %{
13495   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13496   effect(KILL cr);
13497   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13498 
13499   ins_cost(125);
13500   format %{ "orl     $dst, $src\t# int" %}
13501   ins_encode %{
13502     __ orl($dst$$Address, $src$$constant);
13503   %}
13504   ins_pipe(ialu_mem_imm);
13505 %}
13506 
13507 // Xor Instructions
13508 // Xor Register with Register
13509 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13510 %{
13511   predicate(!UseAPX);
13512   match(Set dst (XorI dst src));
13513   effect(KILL cr);
13514   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515 
13516   format %{ "xorl    $dst, $src\t# int" %}
13517   ins_encode %{
13518     __ xorl($dst$$Register, $src$$Register);
13519   %}
13520   ins_pipe(ialu_reg_reg);
13521 %}
13522 
13523 // Xor Register with Register using New Data Destination (NDD)
13524 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13525 %{
13526   predicate(UseAPX);
13527   match(Set dst (XorI src1 src2));
13528   effect(KILL cr);
13529   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13530 
13531   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13532   ins_encode %{
13533     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13534   %}
13535   ins_pipe(ialu_reg_reg);
13536 %}
13537 
13538 // Xor Register with Immediate -1
13539 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13540 %{
13541   predicate(!UseAPX);
13542   match(Set dst (XorI dst imm));
13543 
13544   format %{ "notl    $dst" %}
13545   ins_encode %{
13546      __ notl($dst$$Register);
13547   %}
13548   ins_pipe(ialu_reg);
13549 %}
13550 
13551 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13552 %{
13553   match(Set dst (XorI src imm));
13554   predicate(UseAPX);
13555   flag(PD::Flag_ndd_demotable_opr1);
13556 
13557   format %{ "enotl    $dst, $src" %}
13558   ins_encode %{
13559      __ enotl($dst$$Register, $src$$Register);
13560   %}
13561   ins_pipe(ialu_reg);
13562 %}
13563 
13564 // Xor Register with Immediate
13565 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13566 %{
13567   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13568   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13569   match(Set dst (XorI dst src));
13570   effect(KILL cr);
13571   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13572 
13573   format %{ "xorl    $dst, $src\t# int" %}
13574   ins_encode %{
13575     __ xorl($dst$$Register, $src$$constant);
13576   %}
13577   ins_pipe(ialu_reg);
13578 %}
13579 
13580 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13581 %{
13582   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13583   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13584   match(Set dst (XorI src1 src2));
13585   effect(KILL cr);
13586   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13587 
13588   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13589   ins_encode %{
13590     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13591   %}
13592   ins_pipe(ialu_reg);
13593 %}
13594 
13595 // Xor Memory with Immediate
13596 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13597 %{
13598   predicate(UseAPX);
13599   match(Set dst (XorI (LoadI src1) src2));
13600   effect(KILL cr);
13601   ins_cost(150);
13602   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603 
13604   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13605   ins_encode %{
13606     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13607   %}
13608   ins_pipe(ialu_reg);
13609 %}
13610 
13611 // Xor Register with Memory
13612 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13613 %{
13614   predicate(!UseAPX);
13615   match(Set dst (XorI dst (LoadI src)));
13616   effect(KILL cr);
13617   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13618 
13619   ins_cost(150);
13620   format %{ "xorl    $dst, $src\t# int" %}
13621   ins_encode %{
13622     __ xorl($dst$$Register, $src$$Address);
13623   %}
13624   ins_pipe(ialu_reg_mem);
13625 %}
13626 
13627 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13628 %{
13629   predicate(UseAPX);
13630   match(Set dst (XorI src1 (LoadI src2)));
13631   effect(KILL cr);
13632   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13633 
13634   ins_cost(150);
13635   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13636   ins_encode %{
13637     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13638   %}
13639   ins_pipe(ialu_reg_mem);
13640 %}
13641 
13642 // Xor Memory with Register
13643 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13644 %{
13645   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13646   effect(KILL cr);
13647   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13648 
13649   ins_cost(150);
13650   format %{ "xorb    $dst, $src\t# byte" %}
13651   ins_encode %{
13652     __ xorb($dst$$Address, $src$$Register);
13653   %}
13654   ins_pipe(ialu_mem_reg);
13655 %}
13656 
13657 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13658 %{
13659   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13660   effect(KILL cr);
13661   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13662 
13663   ins_cost(150);
13664   format %{ "xorl    $dst, $src\t# int" %}
13665   ins_encode %{
13666     __ xorl($dst$$Address, $src$$Register);
13667   %}
13668   ins_pipe(ialu_mem_reg);
13669 %}
13670 
13671 // Xor Memory with Immediate
13672 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13673 %{
13674   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13675   effect(KILL cr);
13676   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13677 
13678   ins_cost(125);
13679   format %{ "xorl    $dst, $src\t# int" %}
13680   ins_encode %{
13681     __ xorl($dst$$Address, $src$$constant);
13682   %}
13683   ins_pipe(ialu_mem_imm);
13684 %}
13685 
13686 
13687 // Long Logical Instructions
13688 
13689 // And Instructions
13690 // And Register with Register
13691 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13692 %{
13693   predicate(!UseAPX);
13694   match(Set dst (AndL dst src));
13695   effect(KILL cr);
13696   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13697 
13698   format %{ "andq    $dst, $src\t# long" %}
13699   ins_encode %{
13700     __ andq($dst$$Register, $src$$Register);
13701   %}
13702   ins_pipe(ialu_reg_reg);
13703 %}
13704 
13705 // And Register with Register using New Data Destination (NDD)
13706 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13707 %{
13708   predicate(UseAPX);
13709   match(Set dst (AndL src1 src2));
13710   effect(KILL cr);
13711   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13712 
13713   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13714   ins_encode %{
13715     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13716 
13717   %}
13718   ins_pipe(ialu_reg_reg);
13719 %}
13720 
13721 // And Register with Immediate 255
13722 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13723 %{
13724   match(Set dst (AndL src mask));
13725 
13726   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13727   ins_encode %{
13728     // movzbl zeroes out the upper 32-bit and does not need REX.W
13729     __ movzbl($dst$$Register, $src$$Register);
13730   %}
13731   ins_pipe(ialu_reg);
13732 %}
13733 
13734 // And Register with Immediate 65535
13735 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13736 %{
13737   match(Set dst (AndL src mask));
13738 
13739   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13740   ins_encode %{
13741     // movzwl zeroes out the upper 32-bit and does not need REX.W
13742     __ movzwl($dst$$Register, $src$$Register);
13743   %}
13744   ins_pipe(ialu_reg);
13745 %}
13746 
13747 // And Register with Immediate
13748 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13749 %{
13750   predicate(!UseAPX);
13751   match(Set dst (AndL dst src));
13752   effect(KILL cr);
13753   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13754 
13755   format %{ "andq    $dst, $src\t# long" %}
13756   ins_encode %{
13757     __ andq($dst$$Register, $src$$constant);
13758   %}
13759   ins_pipe(ialu_reg);
13760 %}
13761 
13762 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13763 %{
13764   predicate(UseAPX);
13765   match(Set dst (AndL src1 src2));
13766   effect(KILL cr);
13767   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13768 
13769   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13770   ins_encode %{
13771     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13772   %}
13773   ins_pipe(ialu_reg);
13774 %}
13775 
13776 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13777 %{
13778   predicate(UseAPX);
13779   match(Set dst (AndL (LoadL src1) src2));
13780   effect(KILL cr);
13781   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782 
13783   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13784   ins_encode %{
13785     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13786   %}
13787   ins_pipe(ialu_reg);
13788 %}
13789 
13790 // And Register with Memory
13791 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13792 %{
13793   predicate(!UseAPX);
13794   match(Set dst (AndL dst (LoadL src)));
13795   effect(KILL cr);
13796   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13797 
13798   ins_cost(150);
13799   format %{ "andq    $dst, $src\t# long" %}
13800   ins_encode %{
13801     __ andq($dst$$Register, $src$$Address);
13802   %}
13803   ins_pipe(ialu_reg_mem);
13804 %}
13805 
13806 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13807 %{
13808   predicate(UseAPX);
13809   match(Set dst (AndL src1 (LoadL src2)));
13810   effect(KILL cr);
13811   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13812 
13813   ins_cost(150);
13814   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13815   ins_encode %{
13816     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13817   %}
13818   ins_pipe(ialu_reg_mem);
13819 %}
13820 
13821 // And Memory with Register
13822 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13823 %{
13824   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13825   effect(KILL cr);
13826   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13827 
13828   ins_cost(150);
13829   format %{ "andq    $dst, $src\t# long" %}
13830   ins_encode %{
13831     __ andq($dst$$Address, $src$$Register);
13832   %}
13833   ins_pipe(ialu_mem_reg);
13834 %}
13835 
13836 // And Memory with Immediate
13837 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13838 %{
13839   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13840   effect(KILL cr);
13841   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13842 
13843   ins_cost(125);
13844   format %{ "andq    $dst, $src\t# long" %}
13845   ins_encode %{
13846     __ andq($dst$$Address, $src$$constant);
13847   %}
13848   ins_pipe(ialu_mem_imm);
13849 %}
13850 
13851 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13852 %{
13853   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13854   // because AND/OR works well enough for 8/32-bit values.
13855   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13856 
13857   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13858   effect(KILL cr);
13859 
13860   ins_cost(125);
13861   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13862   ins_encode %{
13863     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13864   %}
13865   ins_pipe(ialu_mem_imm);
13866 %}
13867 
13868 // BMI1 instructions
13869 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13870   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13871   predicate(UseBMI1Instructions);
13872   effect(KILL cr);
13873   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13874 
13875   ins_cost(125);
13876   format %{ "andnq  $dst, $src1, $src2" %}
13877 
13878   ins_encode %{
13879     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13880   %}
13881   ins_pipe(ialu_reg_mem);
13882 %}
13883 
13884 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13885   match(Set dst (AndL (XorL src1 minus_1) src2));
13886   predicate(UseBMI1Instructions);
13887   effect(KILL cr);
13888   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13889 
13890   format %{ "andnq  $dst, $src1, $src2" %}
13891 
13892   ins_encode %{
13893   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13894   %}
13895   ins_pipe(ialu_reg_mem);
13896 %}
13897 
13898 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13899   match(Set dst (AndL (SubL imm_zero src) src));
13900   predicate(UseBMI1Instructions);
13901   effect(KILL cr);
13902   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13903 
13904   format %{ "blsiq  $dst, $src" %}
13905 
13906   ins_encode %{
13907     __ blsiq($dst$$Register, $src$$Register);
13908   %}
13909   ins_pipe(ialu_reg);
13910 %}
13911 
13912 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13913   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13914   predicate(UseBMI1Instructions);
13915   effect(KILL cr);
13916   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13917 
13918   ins_cost(125);
13919   format %{ "blsiq  $dst, $src" %}
13920 
13921   ins_encode %{
13922     __ blsiq($dst$$Register, $src$$Address);
13923   %}
13924   ins_pipe(ialu_reg_mem);
13925 %}
13926 
13927 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13928 %{
13929   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13930   predicate(UseBMI1Instructions);
13931   effect(KILL cr);
13932   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13933 
13934   ins_cost(125);
13935   format %{ "blsmskq $dst, $src" %}
13936 
13937   ins_encode %{
13938     __ blsmskq($dst$$Register, $src$$Address);
13939   %}
13940   ins_pipe(ialu_reg_mem);
13941 %}
13942 
13943 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13944 %{
13945   match(Set dst (XorL (AddL src minus_1) src));
13946   predicate(UseBMI1Instructions);
13947   effect(KILL cr);
13948   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13949 
13950   format %{ "blsmskq $dst, $src" %}
13951 
13952   ins_encode %{
13953     __ blsmskq($dst$$Register, $src$$Register);
13954   %}
13955 
13956   ins_pipe(ialu_reg);
13957 %}
13958 
13959 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13960 %{
13961   match(Set dst (AndL (AddL src minus_1) src) );
13962   predicate(UseBMI1Instructions);
13963   effect(KILL cr);
13964   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13965 
13966   format %{ "blsrq  $dst, $src" %}
13967 
13968   ins_encode %{
13969     __ blsrq($dst$$Register, $src$$Register);
13970   %}
13971 
13972   ins_pipe(ialu_reg);
13973 %}
13974 
13975 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13976 %{
13977   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13978   predicate(UseBMI1Instructions);
13979   effect(KILL cr);
13980   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13981 
13982   ins_cost(125);
13983   format %{ "blsrq  $dst, $src" %}
13984 
13985   ins_encode %{
13986     __ blsrq($dst$$Register, $src$$Address);
13987   %}
13988 
13989   ins_pipe(ialu_reg);
13990 %}
13991 
13992 // Or Instructions
13993 // Or Register with Register
13994 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13995 %{
13996   predicate(!UseAPX);
13997   match(Set dst (OrL dst src));
13998   effect(KILL cr);
13999   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14000 
14001   format %{ "orq     $dst, $src\t# long" %}
14002   ins_encode %{
14003     __ orq($dst$$Register, $src$$Register);
14004   %}
14005   ins_pipe(ialu_reg_reg);
14006 %}
14007 
14008 // Or Register with Register using New Data Destination (NDD)
14009 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14010 %{
14011   predicate(UseAPX);
14012   match(Set dst (OrL src1 src2));
14013   effect(KILL cr);
14014   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14015 
14016   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14017   ins_encode %{
14018     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14019 
14020   %}
14021   ins_pipe(ialu_reg_reg);
14022 %}
14023 
14024 // Use any_RegP to match R15 (TLS register) without spilling.
14025 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14026   match(Set dst (OrL dst (CastP2X src)));
14027   effect(KILL cr);
14028   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14029 
14030   format %{ "orq     $dst, $src\t# long" %}
14031   ins_encode %{
14032     __ orq($dst$$Register, $src$$Register);
14033   %}
14034   ins_pipe(ialu_reg_reg);
14035 %}
14036 
14037 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14038   match(Set dst (OrL src1 (CastP2X src2)));
14039   effect(KILL cr);
14040   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14041 
14042   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14043   ins_encode %{
14044     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14045   %}
14046   ins_pipe(ialu_reg_reg);
14047 %}
14048 
14049 // Or Register with Immediate
14050 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14051 %{
14052   predicate(!UseAPX);
14053   match(Set dst (OrL dst src));
14054   effect(KILL cr);
14055   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056 
14057   format %{ "orq     $dst, $src\t# long" %}
14058   ins_encode %{
14059     __ orq($dst$$Register, $src$$constant);
14060   %}
14061   ins_pipe(ialu_reg);
14062 %}
14063 
14064 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14065 %{
14066   predicate(UseAPX);
14067   match(Set dst (OrL src1 src2));
14068   effect(KILL cr);
14069   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14070 
14071   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14072   ins_encode %{
14073     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14074   %}
14075   ins_pipe(ialu_reg);
14076 %}
14077 
14078 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14079 %{
14080   predicate(UseAPX);
14081   match(Set dst (OrL src1 src2));
14082   effect(KILL cr);
14083   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14084 
14085   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14086   ins_encode %{
14087     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14088   %}
14089   ins_pipe(ialu_reg);
14090 %}
14091 
14092 // Or Memory with Immediate
14093 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14094 %{
14095   predicate(UseAPX);
14096   match(Set dst (OrL (LoadL src1) src2));
14097   effect(KILL cr);
14098   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14099 
14100   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14101   ins_encode %{
14102     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14103   %}
14104   ins_pipe(ialu_reg);
14105 %}
14106 
14107 // Or Register with Memory
14108 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14109 %{
14110   predicate(!UseAPX);
14111   match(Set dst (OrL dst (LoadL src)));
14112   effect(KILL cr);
14113   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14114 
14115   ins_cost(150);
14116   format %{ "orq     $dst, $src\t# long" %}
14117   ins_encode %{
14118     __ orq($dst$$Register, $src$$Address);
14119   %}
14120   ins_pipe(ialu_reg_mem);
14121 %}
14122 
14123 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14124 %{
14125   predicate(UseAPX);
14126   match(Set dst (OrL src1 (LoadL src2)));
14127   effect(KILL cr);
14128   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14129 
14130   ins_cost(150);
14131   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14132   ins_encode %{
14133     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14134   %}
14135   ins_pipe(ialu_reg_mem);
14136 %}
14137 
14138 // Or Memory with Register
14139 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14140 %{
14141   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14142   effect(KILL cr);
14143   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14144 
14145   ins_cost(150);
14146   format %{ "orq     $dst, $src\t# long" %}
14147   ins_encode %{
14148     __ orq($dst$$Address, $src$$Register);
14149   %}
14150   ins_pipe(ialu_mem_reg);
14151 %}
14152 
14153 // Or Memory with Immediate
14154 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14155 %{
14156   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14157   effect(KILL cr);
14158   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14159 
14160   ins_cost(125);
14161   format %{ "orq     $dst, $src\t# long" %}
14162   ins_encode %{
14163     __ orq($dst$$Address, $src$$constant);
14164   %}
14165   ins_pipe(ialu_mem_imm);
14166 %}
14167 
14168 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14169 %{
14170   // con should be a pure 64-bit power of 2 immediate
14171   // because AND/OR works well enough for 8/32-bit values.
14172   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14173 
14174   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14175   effect(KILL cr);
14176 
14177   ins_cost(125);
14178   format %{ "btsq    $dst, log2($con)\t# long" %}
14179   ins_encode %{
14180     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14181   %}
14182   ins_pipe(ialu_mem_imm);
14183 %}
14184 
14185 // Xor Instructions
14186 // Xor Register with Register
14187 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14188 %{
14189   predicate(!UseAPX);
14190   match(Set dst (XorL dst src));
14191   effect(KILL cr);
14192   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14193 
14194   format %{ "xorq    $dst, $src\t# long" %}
14195   ins_encode %{
14196     __ xorq($dst$$Register, $src$$Register);
14197   %}
14198   ins_pipe(ialu_reg_reg);
14199 %}
14200 
14201 // Xor Register with Register using New Data Destination (NDD)
14202 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14203 %{
14204   predicate(UseAPX);
14205   match(Set dst (XorL src1 src2));
14206   effect(KILL cr);
14207   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14208 
14209   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14210   ins_encode %{
14211     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14212   %}
14213   ins_pipe(ialu_reg_reg);
14214 %}
14215 
14216 // Xor Register with Immediate -1
14217 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14218 %{
14219   predicate(!UseAPX);
14220   match(Set dst (XorL dst imm));
14221 
14222   format %{ "notq   $dst" %}
14223   ins_encode %{
14224      __ notq($dst$$Register);
14225   %}
14226   ins_pipe(ialu_reg);
14227 %}
14228 
14229 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14230 %{
14231   predicate(UseAPX);
14232   match(Set dst (XorL src imm));
14233   flag(PD::Flag_ndd_demotable_opr1);
14234 
14235   format %{ "enotq   $dst, $src" %}
14236   ins_encode %{
14237     __ enotq($dst$$Register, $src$$Register);
14238   %}
14239   ins_pipe(ialu_reg);
14240 %}
14241 
14242 // Xor Register with Immediate
14243 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14244 %{
14245   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14246   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14247   match(Set dst (XorL dst src));
14248   effect(KILL cr);
14249   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14250 
14251   format %{ "xorq    $dst, $src\t# long" %}
14252   ins_encode %{
14253     __ xorq($dst$$Register, $src$$constant);
14254   %}
14255   ins_pipe(ialu_reg);
14256 %}
14257 
14258 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14259 %{
14260   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14261   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14262   match(Set dst (XorL src1 src2));
14263   effect(KILL cr);
14264   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14265 
14266   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14267   ins_encode %{
14268     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14269   %}
14270   ins_pipe(ialu_reg);
14271 %}
14272 
14273 // Xor Memory with Immediate
14274 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14275 %{
14276   predicate(UseAPX);
14277   match(Set dst (XorL (LoadL src1) src2));
14278   effect(KILL cr);
14279   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14280   ins_cost(150);
14281 
14282   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14283   ins_encode %{
14284     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14285   %}
14286   ins_pipe(ialu_reg);
14287 %}
14288 
14289 // Xor Register with Memory
14290 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14291 %{
14292   predicate(!UseAPX);
14293   match(Set dst (XorL dst (LoadL src)));
14294   effect(KILL cr);
14295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14296 
14297   ins_cost(150);
14298   format %{ "xorq    $dst, $src\t# long" %}
14299   ins_encode %{
14300     __ xorq($dst$$Register, $src$$Address);
14301   %}
14302   ins_pipe(ialu_reg_mem);
14303 %}
14304 
14305 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14306 %{
14307   predicate(UseAPX);
14308   match(Set dst (XorL src1 (LoadL src2)));
14309   effect(KILL cr);
14310   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14311 
14312   ins_cost(150);
14313   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14314   ins_encode %{
14315     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14316   %}
14317   ins_pipe(ialu_reg_mem);
14318 %}
14319 
14320 // Xor Memory with Register
14321 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14322 %{
14323   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14324   effect(KILL cr);
14325   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14326 
14327   ins_cost(150);
14328   format %{ "xorq    $dst, $src\t# long" %}
14329   ins_encode %{
14330     __ xorq($dst$$Address, $src$$Register);
14331   %}
14332   ins_pipe(ialu_mem_reg);
14333 %}
14334 
14335 // Xor Memory with Immediate
14336 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14337 %{
14338   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14339   effect(KILL cr);
14340   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14341 
14342   ins_cost(125);
14343   format %{ "xorq    $dst, $src\t# long" %}
14344   ins_encode %{
14345     __ xorq($dst$$Address, $src$$constant);
14346   %}
14347   ins_pipe(ialu_mem_imm);
14348 %}
14349 
14350 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14351 %{
14352   match(Set dst (CmpLTMask p q));
14353   effect(KILL cr);
14354 
14355   ins_cost(400);
14356   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14357             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14358             "negl    $dst" %}
14359   ins_encode %{
14360     __ cmpl($p$$Register, $q$$Register);
14361     __ setcc(Assembler::less, $dst$$Register);
14362     __ negl($dst$$Register);
14363   %}
14364   ins_pipe(pipe_slow);
14365 %}
14366 
14367 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14368 %{
14369   match(Set dst (CmpLTMask dst zero));
14370   effect(KILL cr);
14371 
14372   ins_cost(100);
14373   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14374   ins_encode %{
14375     __ sarl($dst$$Register, 31);
14376   %}
14377   ins_pipe(ialu_reg);
14378 %}
14379 
14380 /* Better to save a register than avoid a branch */
14381 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14382 %{
14383   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14384   effect(KILL cr);
14385   ins_cost(300);
14386   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14387             "jge     done\n\t"
14388             "addl    $p,$y\n"
14389             "done:   " %}
14390   ins_encode %{
14391     Register Rp = $p$$Register;
14392     Register Rq = $q$$Register;
14393     Register Ry = $y$$Register;
14394     Label done;
14395     __ subl(Rp, Rq);
14396     __ jccb(Assembler::greaterEqual, done);
14397     __ addl(Rp, Ry);
14398     __ bind(done);
14399   %}
14400   ins_pipe(pipe_cmplt);
14401 %}
14402 
14403 /* Better to save a register than avoid a branch */
14404 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14405 %{
14406   match(Set y (AndI (CmpLTMask p q) y));
14407   effect(KILL cr);
14408 
14409   ins_cost(300);
14410 
14411   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14412             "jlt     done\n\t"
14413             "xorl    $y, $y\n"
14414             "done:   " %}
14415   ins_encode %{
14416     Register Rp = $p$$Register;
14417     Register Rq = $q$$Register;
14418     Register Ry = $y$$Register;
14419     Label done;
14420     __ cmpl(Rp, Rq);
14421     __ jccb(Assembler::less, done);
14422     __ xorl(Ry, Ry);
14423     __ bind(done);
14424   %}
14425   ins_pipe(pipe_cmplt);
14426 %}
14427 
14428 
14429 //---------- FP Instructions------------------------------------------------
14430 
14431 // Really expensive, avoid
14432 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14433 %{
14434   match(Set cr (CmpF src1 src2));
14435 
14436   ins_cost(500);
14437   format %{ "ucomiss $src1, $src2\n\t"
14438             "jnp,s   exit\n\t"
14439             "pushfq\t# saw NaN, set CF\n\t"
14440             "andq    [rsp], #0xffffff2b\n\t"
14441             "popfq\n"
14442     "exit:" %}
14443   ins_encode %{
14444     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14445     emit_cmpfp_fixup(masm);
14446   %}
14447   ins_pipe(pipe_slow);
14448 %}
14449 
14450 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14451   match(Set cr (CmpF src1 src2));
14452 
14453   ins_cost(100);
14454   format %{ "ucomiss $src1, $src2" %}
14455   ins_encode %{
14456     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14457   %}
14458   ins_pipe(pipe_slow);
14459 %}
14460 
14461 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14462   match(Set cr (CmpF src1 src2));
14463 
14464   ins_cost(100);
14465   format %{ "vucomxss $src1, $src2" %}
14466   ins_encode %{
14467     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14468   %}
14469   ins_pipe(pipe_slow);
14470 %}
14471 
14472 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14473   match(Set cr (CmpF src1 (LoadF src2)));
14474 
14475   ins_cost(100);
14476   format %{ "ucomiss $src1, $src2" %}
14477   ins_encode %{
14478     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14479   %}
14480   ins_pipe(pipe_slow);
14481 %}
14482 
14483 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14484   match(Set cr (CmpF src1 (LoadF src2)));
14485 
14486   ins_cost(100);
14487   format %{ "vucomxss $src1, $src2" %}
14488   ins_encode %{
14489     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14490   %}
14491   ins_pipe(pipe_slow);
14492 %}
14493 
14494 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14495   match(Set cr (CmpF src con));
14496 
14497   ins_cost(100);
14498   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14499   ins_encode %{
14500     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14501   %}
14502   ins_pipe(pipe_slow);
14503 %}
14504 
14505 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14506   match(Set cr (CmpF src con));
14507 
14508   ins_cost(100);
14509   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14510   ins_encode %{
14511     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14512   %}
14513   ins_pipe(pipe_slow);
14514 %}
14515 
14516 // Really expensive, avoid
14517 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14518 %{
14519   match(Set cr (CmpD src1 src2));
14520 
14521   ins_cost(500);
14522   format %{ "ucomisd $src1, $src2\n\t"
14523             "jnp,s   exit\n\t"
14524             "pushfq\t# saw NaN, set CF\n\t"
14525             "andq    [rsp], #0xffffff2b\n\t"
14526             "popfq\n"
14527     "exit:" %}
14528   ins_encode %{
14529     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14530     emit_cmpfp_fixup(masm);
14531   %}
14532   ins_pipe(pipe_slow);
14533 %}
14534 
14535 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14536   match(Set cr (CmpD src1 src2));
14537 
14538   ins_cost(100);
14539   format %{ "ucomisd $src1, $src2 test" %}
14540   ins_encode %{
14541     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14542   %}
14543   ins_pipe(pipe_slow);
14544 %}
14545 
14546 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14547   match(Set cr (CmpD src1 src2));
14548 
14549   ins_cost(100);
14550   format %{ "vucomxsd $src1, $src2 test" %}
14551   ins_encode %{
14552     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14553   %}
14554   ins_pipe(pipe_slow);
14555 %}
14556 
14557 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14558   match(Set cr (CmpD src1 (LoadD src2)));
14559 
14560   ins_cost(100);
14561   format %{ "ucomisd $src1, $src2" %}
14562   ins_encode %{
14563     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14564   %}
14565   ins_pipe(pipe_slow);
14566 %}
14567 
14568 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14569   match(Set cr (CmpD src1 (LoadD src2)));
14570 
14571   ins_cost(100);
14572   format %{ "vucomxsd $src1, $src2" %}
14573   ins_encode %{
14574     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14575   %}
14576   ins_pipe(pipe_slow);
14577 %}
14578 
14579 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14580   match(Set cr (CmpD src con));
14581   ins_cost(100);
14582   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14583   ins_encode %{
14584     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14585   %}
14586   ins_pipe(pipe_slow);
14587 %}
14588 
14589 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14590   match(Set cr (CmpD src con));
14591 
14592   ins_cost(100);
14593   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14594   ins_encode %{
14595     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14596   %}
14597   ins_pipe(pipe_slow);
14598 %}
14599 
14600 // Compare into -1,0,1
14601 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14602 %{
14603   match(Set dst (CmpF3 src1 src2));
14604   effect(KILL cr);
14605 
14606   ins_cost(275);
14607   format %{ "ucomiss $src1, $src2\n\t"
14608             "movl    $dst, #-1\n\t"
14609             "jp,s    done\n\t"
14610             "jb,s    done\n\t"
14611             "setne   $dst\n\t"
14612             "movzbl  $dst, $dst\n"
14613     "done:" %}
14614   ins_encode %{
14615     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14616     emit_cmpfp3(masm, $dst$$Register);
14617   %}
14618   ins_pipe(pipe_slow);
14619 %}
14620 
14621 // Compare into -1,0,1
14622 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14623 %{
14624   match(Set dst (CmpF3 src1 (LoadF src2)));
14625   effect(KILL cr);
14626 
14627   ins_cost(275);
14628   format %{ "ucomiss $src1, $src2\n\t"
14629             "movl    $dst, #-1\n\t"
14630             "jp,s    done\n\t"
14631             "jb,s    done\n\t"
14632             "setne   $dst\n\t"
14633             "movzbl  $dst, $dst\n"
14634     "done:" %}
14635   ins_encode %{
14636     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14637     emit_cmpfp3(masm, $dst$$Register);
14638   %}
14639   ins_pipe(pipe_slow);
14640 %}
14641 
14642 // Compare into -1,0,1
14643 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14644   match(Set dst (CmpF3 src con));
14645   effect(KILL cr);
14646 
14647   ins_cost(275);
14648   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14649             "movl    $dst, #-1\n\t"
14650             "jp,s    done\n\t"
14651             "jb,s    done\n\t"
14652             "setne   $dst\n\t"
14653             "movzbl  $dst, $dst\n"
14654     "done:" %}
14655   ins_encode %{
14656     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14657     emit_cmpfp3(masm, $dst$$Register);
14658   %}
14659   ins_pipe(pipe_slow);
14660 %}
14661 
14662 // Compare into -1,0,1
14663 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14664 %{
14665   match(Set dst (CmpD3 src1 src2));
14666   effect(KILL cr);
14667 
14668   ins_cost(275);
14669   format %{ "ucomisd $src1, $src2\n\t"
14670             "movl    $dst, #-1\n\t"
14671             "jp,s    done\n\t"
14672             "jb,s    done\n\t"
14673             "setne   $dst\n\t"
14674             "movzbl  $dst, $dst\n"
14675     "done:" %}
14676   ins_encode %{
14677     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14678     emit_cmpfp3(masm, $dst$$Register);
14679   %}
14680   ins_pipe(pipe_slow);
14681 %}
14682 
14683 // Compare into -1,0,1
14684 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14685 %{
14686   match(Set dst (CmpD3 src1 (LoadD src2)));
14687   effect(KILL cr);
14688 
14689   ins_cost(275);
14690   format %{ "ucomisd $src1, $src2\n\t"
14691             "movl    $dst, #-1\n\t"
14692             "jp,s    done\n\t"
14693             "jb,s    done\n\t"
14694             "setne   $dst\n\t"
14695             "movzbl  $dst, $dst\n"
14696     "done:" %}
14697   ins_encode %{
14698     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14699     emit_cmpfp3(masm, $dst$$Register);
14700   %}
14701   ins_pipe(pipe_slow);
14702 %}
14703 
14704 // Compare into -1,0,1
14705 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14706   match(Set dst (CmpD3 src con));
14707   effect(KILL cr);
14708 
14709   ins_cost(275);
14710   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14711             "movl    $dst, #-1\n\t"
14712             "jp,s    done\n\t"
14713             "jb,s    done\n\t"
14714             "setne   $dst\n\t"
14715             "movzbl  $dst, $dst\n"
14716     "done:" %}
14717   ins_encode %{
14718     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14719     emit_cmpfp3(masm, $dst$$Register);
14720   %}
14721   ins_pipe(pipe_slow);
14722 %}
14723 
14724 //----------Arithmetic Conversion Instructions---------------------------------
14725 
14726 instruct convF2D_reg_reg(regD dst, regF src)
14727 %{
14728   match(Set dst (ConvF2D src));
14729 
14730   format %{ "cvtss2sd $dst, $src" %}
14731   ins_encode %{
14732     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14733   %}
14734   ins_pipe(pipe_slow); // XXX
14735 %}
14736 
14737 instruct convF2D_reg_mem(regD dst, memory src)
14738 %{
14739   predicate(UseAVX == 0);
14740   match(Set dst (ConvF2D (LoadF src)));
14741 
14742   format %{ "cvtss2sd $dst, $src" %}
14743   ins_encode %{
14744     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14745   %}
14746   ins_pipe(pipe_slow); // XXX
14747 %}
14748 
14749 instruct convD2F_reg_reg(regF dst, regD src)
14750 %{
14751   match(Set dst (ConvD2F src));
14752 
14753   format %{ "cvtsd2ss $dst, $src" %}
14754   ins_encode %{
14755     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14756   %}
14757   ins_pipe(pipe_slow); // XXX
14758 %}
14759 
14760 instruct convD2F_reg_mem(regF dst, memory src)
14761 %{
14762   predicate(UseAVX == 0);
14763   match(Set dst (ConvD2F (LoadD src)));
14764 
14765   format %{ "cvtsd2ss $dst, $src" %}
14766   ins_encode %{
14767     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14768   %}
14769   ins_pipe(pipe_slow); // XXX
14770 %}
14771 
14772 // XXX do mem variants
14773 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14774 %{
14775   predicate(!VM_Version::supports_avx10_2());
14776   match(Set dst (ConvF2I src));
14777   effect(KILL cr);
14778   format %{ "convert_f2i $dst, $src" %}
14779   ins_encode %{
14780     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14781   %}
14782   ins_pipe(pipe_slow);
14783 %}
14784 
14785 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14786 %{
14787   predicate(VM_Version::supports_avx10_2());
14788   match(Set dst (ConvF2I src));
14789   format %{ "evcvttss2sisl $dst, $src" %}
14790   ins_encode %{
14791     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14792   %}
14793   ins_pipe(pipe_slow);
14794 %}
14795 
14796 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14797 %{
14798   predicate(VM_Version::supports_avx10_2());
14799   match(Set dst (ConvF2I (LoadF src)));
14800   format %{ "evcvttss2sisl $dst, $src" %}
14801   ins_encode %{
14802     __ evcvttss2sisl($dst$$Register, $src$$Address);
14803   %}
14804   ins_pipe(pipe_slow);
14805 %}
14806 
14807 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14808 %{
14809   predicate(!VM_Version::supports_avx10_2());
14810   match(Set dst (ConvF2L src));
14811   effect(KILL cr);
14812   format %{ "convert_f2l $dst, $src"%}
14813   ins_encode %{
14814     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14815   %}
14816   ins_pipe(pipe_slow);
14817 %}
14818 
14819 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14820 %{
14821   predicate(VM_Version::supports_avx10_2());
14822   match(Set dst (ConvF2L src));
14823   format %{ "evcvttss2sisq $dst, $src" %}
14824   ins_encode %{
14825     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14826   %}
14827   ins_pipe(pipe_slow);
14828 %}
14829 
14830 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14831 %{
14832   predicate(VM_Version::supports_avx10_2());
14833   match(Set dst (ConvF2L (LoadF src)));
14834   format %{ "evcvttss2sisq $dst, $src" %}
14835   ins_encode %{
14836     __ evcvttss2sisq($dst$$Register, $src$$Address);
14837   %}
14838   ins_pipe(pipe_slow);
14839 %}
14840 
14841 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14842 %{
14843   predicate(!VM_Version::supports_avx10_2());
14844   match(Set dst (ConvD2I src));
14845   effect(KILL cr);
14846   format %{ "convert_d2i $dst, $src"%}
14847   ins_encode %{
14848     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14849   %}
14850   ins_pipe(pipe_slow);
14851 %}
14852 
14853 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14854 %{
14855   predicate(VM_Version::supports_avx10_2());
14856   match(Set dst (ConvD2I src));
14857   format %{ "evcvttsd2sisl $dst, $src" %}
14858   ins_encode %{
14859     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14860   %}
14861   ins_pipe(pipe_slow);
14862 %}
14863 
14864 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14865 %{
14866   predicate(VM_Version::supports_avx10_2());
14867   match(Set dst (ConvD2I (LoadD src)));
14868   format %{ "evcvttsd2sisl $dst, $src" %}
14869   ins_encode %{
14870     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14871   %}
14872   ins_pipe(pipe_slow);
14873 %}
14874 
14875 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14876 %{
14877   predicate(!VM_Version::supports_avx10_2());
14878   match(Set dst (ConvD2L src));
14879   effect(KILL cr);
14880   format %{ "convert_d2l $dst, $src"%}
14881   ins_encode %{
14882     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14883   %}
14884   ins_pipe(pipe_slow);
14885 %}
14886 
14887 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14888 %{
14889   predicate(VM_Version::supports_avx10_2());
14890   match(Set dst (ConvD2L src));
14891   format %{ "evcvttsd2sisq $dst, $src" %}
14892   ins_encode %{
14893     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14894   %}
14895   ins_pipe(pipe_slow);
14896 %}
14897 
14898 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14899 %{
14900   predicate(VM_Version::supports_avx10_2());
14901   match(Set dst (ConvD2L (LoadD src)));
14902   format %{ "evcvttsd2sisq $dst, $src" %}
14903   ins_encode %{
14904     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14905   %}
14906   ins_pipe(pipe_slow);
14907 %}
14908 
14909 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14910 %{
14911   match(Set dst (RoundD src));
14912   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14913   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14914   ins_encode %{
14915     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14916   %}
14917   ins_pipe(pipe_slow);
14918 %}
14919 
14920 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14921 %{
14922   match(Set dst (RoundF src));
14923   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14924   format %{ "round_float $dst,$src" %}
14925   ins_encode %{
14926     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14927   %}
14928   ins_pipe(pipe_slow);
14929 %}
14930 
14931 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14932 %{
14933   predicate(!UseXmmI2F);
14934   match(Set dst (ConvI2F src));
14935 
14936   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14937   ins_encode %{
14938     if (UseAVX > 0) {
14939       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14940     }
14941     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14942   %}
14943   ins_pipe(pipe_slow); // XXX
14944 %}
14945 
14946 instruct convI2F_reg_mem(regF dst, memory src)
14947 %{
14948   predicate(UseAVX == 0);
14949   match(Set dst (ConvI2F (LoadI src)));
14950 
14951   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14952   ins_encode %{
14953     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14954   %}
14955   ins_pipe(pipe_slow); // XXX
14956 %}
14957 
14958 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14959 %{
14960   predicate(!UseXmmI2D);
14961   match(Set dst (ConvI2D src));
14962 
14963   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14964   ins_encode %{
14965     if (UseAVX > 0) {
14966       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14967     }
14968     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14969   %}
14970   ins_pipe(pipe_slow); // XXX
14971 %}
14972 
14973 instruct convI2D_reg_mem(regD dst, memory src)
14974 %{
14975   predicate(UseAVX == 0);
14976   match(Set dst (ConvI2D (LoadI src)));
14977 
14978   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14979   ins_encode %{
14980     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14981   %}
14982   ins_pipe(pipe_slow); // XXX
14983 %}
14984 
14985 instruct convXI2F_reg(regF dst, rRegI src)
14986 %{
14987   predicate(UseXmmI2F);
14988   match(Set dst (ConvI2F src));
14989 
14990   format %{ "movdl $dst, $src\n\t"
14991             "cvtdq2psl $dst, $dst\t# i2f" %}
14992   ins_encode %{
14993     __ movdl($dst$$XMMRegister, $src$$Register);
14994     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14995   %}
14996   ins_pipe(pipe_slow); // XXX
14997 %}
14998 
14999 instruct convXI2D_reg(regD dst, rRegI src)
15000 %{
15001   predicate(UseXmmI2D);
15002   match(Set dst (ConvI2D src));
15003 
15004   format %{ "movdl $dst, $src\n\t"
15005             "cvtdq2pdl $dst, $dst\t# i2d" %}
15006   ins_encode %{
15007     __ movdl($dst$$XMMRegister, $src$$Register);
15008     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15009   %}
15010   ins_pipe(pipe_slow); // XXX
15011 %}
15012 
15013 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15014 %{
15015   match(Set dst (ConvL2F src));
15016 
15017   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15018   ins_encode %{
15019     if (UseAVX > 0) {
15020       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15021     }
15022     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15023   %}
15024   ins_pipe(pipe_slow); // XXX
15025 %}
15026 
15027 instruct convL2F_reg_mem(regF dst, memory src)
15028 %{
15029   predicate(UseAVX == 0);
15030   match(Set dst (ConvL2F (LoadL src)));
15031 
15032   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15033   ins_encode %{
15034     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15035   %}
15036   ins_pipe(pipe_slow); // XXX
15037 %}
15038 
15039 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15040 %{
15041   match(Set dst (ConvL2D src));
15042 
15043   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15044   ins_encode %{
15045     if (UseAVX > 0) {
15046       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15047     }
15048     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15049   %}
15050   ins_pipe(pipe_slow); // XXX
15051 %}
15052 
15053 instruct convL2D_reg_mem(regD dst, memory src)
15054 %{
15055   predicate(UseAVX == 0);
15056   match(Set dst (ConvL2D (LoadL src)));
15057 
15058   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15059   ins_encode %{
15060     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15061   %}
15062   ins_pipe(pipe_slow); // XXX
15063 %}
15064 
15065 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15066 %{
15067   match(Set dst (ConvI2L src));
15068 
15069   ins_cost(125);
15070   format %{ "movslq  $dst, $src\t# i2l" %}
15071   ins_encode %{
15072     __ movslq($dst$$Register, $src$$Register);
15073   %}
15074   ins_pipe(ialu_reg_reg);
15075 %}
15076 
15077 // Zero-extend convert int to long
15078 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15079 %{
15080   match(Set dst (AndL (ConvI2L src) mask));
15081 
15082   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15083   ins_encode %{
15084     if ($dst$$reg != $src$$reg) {
15085       __ movl($dst$$Register, $src$$Register);
15086     }
15087   %}
15088   ins_pipe(ialu_reg_reg);
15089 %}
15090 
15091 // Zero-extend convert int to long
15092 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15093 %{
15094   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15095 
15096   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15097   ins_encode %{
15098     __ movl($dst$$Register, $src$$Address);
15099   %}
15100   ins_pipe(ialu_reg_mem);
15101 %}
15102 
15103 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15104 %{
15105   match(Set dst (AndL src mask));
15106 
15107   format %{ "movl    $dst, $src\t# zero-extend long" %}
15108   ins_encode %{
15109     __ movl($dst$$Register, $src$$Register);
15110   %}
15111   ins_pipe(ialu_reg_reg);
15112 %}
15113 
15114 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15115 %{
15116   match(Set dst (ConvL2I src));
15117 
15118   format %{ "movl    $dst, $src\t# l2i" %}
15119   ins_encode %{
15120     __ movl($dst$$Register, $src$$Register);
15121   %}
15122   ins_pipe(ialu_reg_reg);
15123 %}
15124 
15125 
15126 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15127   match(Set dst (MoveF2I src));
15128   effect(DEF dst, USE src);
15129 
15130   ins_cost(125);
15131   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15132   ins_encode %{
15133     __ movl($dst$$Register, Address(rsp, $src$$disp));
15134   %}
15135   ins_pipe(ialu_reg_mem);
15136 %}
15137 
15138 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15139   match(Set dst (MoveI2F src));
15140   effect(DEF dst, USE src);
15141 
15142   ins_cost(125);
15143   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15144   ins_encode %{
15145     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15146   %}
15147   ins_pipe(pipe_slow);
15148 %}
15149 
15150 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15151   match(Set dst (MoveD2L src));
15152   effect(DEF dst, USE src);
15153 
15154   ins_cost(125);
15155   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15156   ins_encode %{
15157     __ movq($dst$$Register, Address(rsp, $src$$disp));
15158   %}
15159   ins_pipe(ialu_reg_mem);
15160 %}
15161 
15162 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15163   predicate(!UseXmmLoadAndClearUpper);
15164   match(Set dst (MoveL2D src));
15165   effect(DEF dst, USE src);
15166 
15167   ins_cost(125);
15168   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15169   ins_encode %{
15170     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15171   %}
15172   ins_pipe(pipe_slow);
15173 %}
15174 
15175 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15176   predicate(UseXmmLoadAndClearUpper);
15177   match(Set dst (MoveL2D src));
15178   effect(DEF dst, USE src);
15179 
15180   ins_cost(125);
15181   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15182   ins_encode %{
15183     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15184   %}
15185   ins_pipe(pipe_slow);
15186 %}
15187 
15188 
15189 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15190   match(Set dst (MoveF2I src));
15191   effect(DEF dst, USE src);
15192 
15193   ins_cost(95); // XXX
15194   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15195   ins_encode %{
15196     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15197   %}
15198   ins_pipe(pipe_slow);
15199 %}
15200 
15201 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15202   match(Set dst (MoveI2F src));
15203   effect(DEF dst, USE src);
15204 
15205   ins_cost(100);
15206   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15207   ins_encode %{
15208     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15209   %}
15210   ins_pipe( ialu_mem_reg );
15211 %}
15212 
15213 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15214   match(Set dst (MoveD2L src));
15215   effect(DEF dst, USE src);
15216 
15217   ins_cost(95); // XXX
15218   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15219   ins_encode %{
15220     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15221   %}
15222   ins_pipe(pipe_slow);
15223 %}
15224 
15225 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15226   match(Set dst (MoveL2D src));
15227   effect(DEF dst, USE src);
15228 
15229   ins_cost(100);
15230   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15231   ins_encode %{
15232     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15233   %}
15234   ins_pipe(ialu_mem_reg);
15235 %}
15236 
15237 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15238   match(Set dst (MoveF2I src));
15239   effect(DEF dst, USE src);
15240   ins_cost(85);
15241   format %{ "movd    $dst,$src\t# MoveF2I" %}
15242   ins_encode %{
15243     __ movdl($dst$$Register, $src$$XMMRegister);
15244   %}
15245   ins_pipe( pipe_slow );
15246 %}
15247 
15248 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15249   match(Set dst (MoveD2L src));
15250   effect(DEF dst, USE src);
15251   ins_cost(85);
15252   format %{ "movd    $dst,$src\t# MoveD2L" %}
15253   ins_encode %{
15254     __ movdq($dst$$Register, $src$$XMMRegister);
15255   %}
15256   ins_pipe( pipe_slow );
15257 %}
15258 
15259 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15260   match(Set dst (MoveI2F src));
15261   effect(DEF dst, USE src);
15262   ins_cost(100);
15263   format %{ "movd    $dst,$src\t# MoveI2F" %}
15264   ins_encode %{
15265     __ movdl($dst$$XMMRegister, $src$$Register);
15266   %}
15267   ins_pipe( pipe_slow );
15268 %}
15269 
15270 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15271   match(Set dst (MoveL2D src));
15272   effect(DEF dst, USE src);
15273   ins_cost(100);
15274   format %{ "movd    $dst,$src\t# MoveL2D" %}
15275   ins_encode %{
15276      __ movdq($dst$$XMMRegister, $src$$Register);
15277   %}
15278   ins_pipe( pipe_slow );
15279 %}
15280 
15281 
15282 // Fast clearing of an array
15283 // Small non-constant lenght ClearArray for non-AVX512 targets.
15284 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15285                   Universe dummy, rFlagsReg cr)
15286 %{
15287   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15288   match(Set dummy (ClearArray (Binary cnt base) val));
15289   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15290 
15291   format %{ $$template
15292     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15293     $$emit$$"jg      LARGE\n\t"
15294     $$emit$$"dec     rcx\n\t"
15295     $$emit$$"js      DONE\t# Zero length\n\t"
15296     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15297     $$emit$$"dec     rcx\n\t"
15298     $$emit$$"jge     LOOP\n\t"
15299     $$emit$$"jmp     DONE\n\t"
15300     $$emit$$"# LARGE:\n\t"
15301     if (UseFastStosb) {
15302        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15303        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15304     } else if (UseXMMForObjInit) {
15305        $$emit$$"movdq   $tmp, $val\n\t"
15306        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15307        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15308        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15309        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15311        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15312        $$emit$$"add     0x40,rax\n\t"
15313        $$emit$$"# L_zero_64_bytes:\n\t"
15314        $$emit$$"sub     0x8,rcx\n\t"
15315        $$emit$$"jge     L_loop\n\t"
15316        $$emit$$"add     0x4,rcx\n\t"
15317        $$emit$$"jl      L_tail\n\t"
15318        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15319        $$emit$$"add     0x20,rax\n\t"
15320        $$emit$$"sub     0x4,rcx\n\t"
15321        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322        $$emit$$"add     0x4,rcx\n\t"
15323        $$emit$$"jle     L_end\n\t"
15324        $$emit$$"dec     rcx\n\t"
15325        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326        $$emit$$"vmovq   xmm0,(rax)\n\t"
15327        $$emit$$"add     0x8,rax\n\t"
15328        $$emit$$"dec     rcx\n\t"
15329        $$emit$$"jge     L_sloop\n\t"
15330        $$emit$$"# L_end:\n\t"
15331     } else {
15332        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15333     }
15334     $$emit$$"# DONE"
15335   %}
15336   ins_encode %{
15337     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15338                  $tmp$$XMMRegister, false, false);
15339   %}
15340   ins_pipe(pipe_slow);
15341 %}
15342 
15343 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15344                             Universe dummy, rFlagsReg cr)
15345 %{
15346   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15347   match(Set dummy (ClearArray (Binary cnt base) val));
15348   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15349 
15350   format %{ $$template
15351     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15352     $$emit$$"jg      LARGE\n\t"
15353     $$emit$$"dec     rcx\n\t"
15354     $$emit$$"js      DONE\t# Zero length\n\t"
15355     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15356     $$emit$$"dec     rcx\n\t"
15357     $$emit$$"jge     LOOP\n\t"
15358     $$emit$$"jmp     DONE\n\t"
15359     $$emit$$"# LARGE:\n\t"
15360     if (UseXMMForObjInit) {
15361        $$emit$$"movdq   $tmp, $val\n\t"
15362        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15363        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15367        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15368        $$emit$$"add     0x40,rax\n\t"
15369        $$emit$$"# L_zero_64_bytes:\n\t"
15370        $$emit$$"sub     0x8,rcx\n\t"
15371        $$emit$$"jge     L_loop\n\t"
15372        $$emit$$"add     0x4,rcx\n\t"
15373        $$emit$$"jl      L_tail\n\t"
15374        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15375        $$emit$$"add     0x20,rax\n\t"
15376        $$emit$$"sub     0x4,rcx\n\t"
15377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378        $$emit$$"add     0x4,rcx\n\t"
15379        $$emit$$"jle     L_end\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382        $$emit$$"vmovq   xmm0,(rax)\n\t"
15383        $$emit$$"add     0x8,rax\n\t"
15384        $$emit$$"dec     rcx\n\t"
15385        $$emit$$"jge     L_sloop\n\t"
15386        $$emit$$"# L_end:\n\t"
15387     } else {
15388        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15389     }
15390     $$emit$$"# DONE"
15391   %}
15392   ins_encode %{
15393     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15394                  $tmp$$XMMRegister, false, true);
15395   %}
15396   ins_pipe(pipe_slow);
15397 %}
15398 
15399 // Small non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15401                        Universe dummy, rFlagsReg cr)
15402 %{
15403   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15404   match(Set dummy (ClearArray (Binary cnt base) val));
15405   ins_cost(125);
15406   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15407 
15408   format %{ $$template
15409     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15410     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15411     $$emit$$"jg      LARGE\n\t"
15412     $$emit$$"dec     rcx\n\t"
15413     $$emit$$"js      DONE\t# Zero length\n\t"
15414     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15415     $$emit$$"dec     rcx\n\t"
15416     $$emit$$"jge     LOOP\n\t"
15417     $$emit$$"jmp     DONE\n\t"
15418     $$emit$$"# LARGE:\n\t"
15419     if (UseFastStosb) {
15420        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15421        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15422     } else if (UseXMMForObjInit) {
15423        $$emit$$"mov     rdi,rax\n\t"
15424        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15425        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15426        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15427        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15428        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15429        $$emit$$"add     0x40,rax\n\t"
15430        $$emit$$"# L_zero_64_bytes:\n\t"
15431        $$emit$$"sub     0x8,rcx\n\t"
15432        $$emit$$"jge     L_loop\n\t"
15433        $$emit$$"add     0x4,rcx\n\t"
15434        $$emit$$"jl      L_tail\n\t"
15435        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15436        $$emit$$"add     0x20,rax\n\t"
15437        $$emit$$"sub     0x4,rcx\n\t"
15438        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15439        $$emit$$"add     0x4,rcx\n\t"
15440        $$emit$$"jle     L_end\n\t"
15441        $$emit$$"dec     rcx\n\t"
15442        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15443        $$emit$$"vmovq   xmm0,(rax)\n\t"
15444        $$emit$$"add     0x8,rax\n\t"
15445        $$emit$$"dec     rcx\n\t"
15446        $$emit$$"jge     L_sloop\n\t"
15447        $$emit$$"# L_end:\n\t"
15448     } else {
15449        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15450     }
15451     $$emit$$"# DONE"
15452   %}
15453   ins_encode %{
15454     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15461                                  Universe dummy, rFlagsReg cr)
15462 %{
15463   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15464   match(Set dummy (ClearArray (Binary cnt base) val));
15465   ins_cost(125);
15466   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15467 
15468   format %{ $$template
15469     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15470     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15471     $$emit$$"jg      LARGE\n\t"
15472     $$emit$$"dec     rcx\n\t"
15473     $$emit$$"js      DONE\t# Zero length\n\t"
15474     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15475     $$emit$$"dec     rcx\n\t"
15476     $$emit$$"jge     LOOP\n\t"
15477     $$emit$$"jmp     DONE\n\t"
15478     $$emit$$"# LARGE:\n\t"
15479     if (UseFastStosb) {
15480        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15481        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15482     } else if (UseXMMForObjInit) {
15483        $$emit$$"mov     rdi,rax\n\t"
15484        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15485        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15486        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15487        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15488        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15489        $$emit$$"add     0x40,rax\n\t"
15490        $$emit$$"# L_zero_64_bytes:\n\t"
15491        $$emit$$"sub     0x8,rcx\n\t"
15492        $$emit$$"jge     L_loop\n\t"
15493        $$emit$$"add     0x4,rcx\n\t"
15494        $$emit$$"jl      L_tail\n\t"
15495        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15496        $$emit$$"add     0x20,rax\n\t"
15497        $$emit$$"sub     0x4,rcx\n\t"
15498        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15499        $$emit$$"add     0x4,rcx\n\t"
15500        $$emit$$"jle     L_end\n\t"
15501        $$emit$$"dec     rcx\n\t"
15502        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15503        $$emit$$"vmovq   xmm0,(rax)\n\t"
15504        $$emit$$"add     0x8,rax\n\t"
15505        $$emit$$"dec     rcx\n\t"
15506        $$emit$$"jge     L_sloop\n\t"
15507        $$emit$$"# L_end:\n\t"
15508     } else {
15509        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15510     }
15511     $$emit$$"# DONE"
15512   %}
15513   ins_encode %{
15514     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15515                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15516   %}
15517   ins_pipe(pipe_slow);
15518 %}
15519 
15520 // Large non-constant length ClearArray for non-AVX512 targets.
15521 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15522                         Universe dummy, rFlagsReg cr)
15523 %{
15524   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15525   match(Set dummy (ClearArray (Binary cnt base) val));
15526   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15527 
15528   format %{ $$template
15529     if (UseFastStosb) {
15530        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15531        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15532     } else if (UseXMMForObjInit) {
15533        $$emit$$"movdq   $tmp, $val\n\t"
15534        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15535        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15536        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15537        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15538        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15539        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15540        $$emit$$"add     0x40,rax\n\t"
15541        $$emit$$"# L_zero_64_bytes:\n\t"
15542        $$emit$$"sub     0x8,rcx\n\t"
15543        $$emit$$"jge     L_loop\n\t"
15544        $$emit$$"add     0x4,rcx\n\t"
15545        $$emit$$"jl      L_tail\n\t"
15546        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15547        $$emit$$"add     0x20,rax\n\t"
15548        $$emit$$"sub     0x4,rcx\n\t"
15549        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15550        $$emit$$"add     0x4,rcx\n\t"
15551        $$emit$$"jle     L_end\n\t"
15552        $$emit$$"dec     rcx\n\t"
15553        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15554        $$emit$$"vmovq   xmm0,(rax)\n\t"
15555        $$emit$$"add     0x8,rax\n\t"
15556        $$emit$$"dec     rcx\n\t"
15557        $$emit$$"jge     L_sloop\n\t"
15558        $$emit$$"# L_end:\n\t"
15559     } else {
15560        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15561     }
15562   %}
15563   ins_encode %{
15564     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15565                  $tmp$$XMMRegister, true, false);
15566   %}
15567   ins_pipe(pipe_slow);
15568 %}
15569 
15570 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15571                                   Universe dummy, rFlagsReg cr)
15572 %{
15573   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15574   match(Set dummy (ClearArray (Binary cnt base) val));
15575   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15576 
15577   format %{ $$template
15578     if (UseXMMForObjInit) {
15579        $$emit$$"movdq   $tmp, $val\n\t"
15580        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15581        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15582        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15583        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15584        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15585        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15586        $$emit$$"add     0x40,rax\n\t"
15587        $$emit$$"# L_zero_64_bytes:\n\t"
15588        $$emit$$"sub     0x8,rcx\n\t"
15589        $$emit$$"jge     L_loop\n\t"
15590        $$emit$$"add     0x4,rcx\n\t"
15591        $$emit$$"jl      L_tail\n\t"
15592        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15593        $$emit$$"add     0x20,rax\n\t"
15594        $$emit$$"sub     0x4,rcx\n\t"
15595        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15596        $$emit$$"add     0x4,rcx\n\t"
15597        $$emit$$"jle     L_end\n\t"
15598        $$emit$$"dec     rcx\n\t"
15599        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15600        $$emit$$"vmovq   xmm0,(rax)\n\t"
15601        $$emit$$"add     0x8,rax\n\t"
15602        $$emit$$"dec     rcx\n\t"
15603        $$emit$$"jge     L_sloop\n\t"
15604        $$emit$$"# L_end:\n\t"
15605     } else {
15606        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15607     }
15608   %}
15609   ins_encode %{
15610     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15611                  $tmp$$XMMRegister, true, true);
15612   %}
15613   ins_pipe(pipe_slow);
15614 %}
15615 
15616 // Large non-constant length ClearArray for AVX512 targets.
15617 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15618                              Universe dummy, rFlagsReg cr)
15619 %{
15620   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15621   match(Set dummy (ClearArray (Binary cnt base) val));
15622   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15623 
15624   format %{ $$template
15625     if (UseFastStosb) {
15626        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15627        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15628        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15629     } else if (UseXMMForObjInit) {
15630        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15631        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15632        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15633        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15634        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15635        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15636        $$emit$$"add     0x40,rax\n\t"
15637        $$emit$$"# L_zero_64_bytes:\n\t"
15638        $$emit$$"sub     0x8,rcx\n\t"
15639        $$emit$$"jge     L_loop\n\t"
15640        $$emit$$"add     0x4,rcx\n\t"
15641        $$emit$$"jl      L_tail\n\t"
15642        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15643        $$emit$$"add     0x20,rax\n\t"
15644        $$emit$$"sub     0x4,rcx\n\t"
15645        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15646        $$emit$$"add     0x4,rcx\n\t"
15647        $$emit$$"jle     L_end\n\t"
15648        $$emit$$"dec     rcx\n\t"
15649        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15650        $$emit$$"vmovq   xmm0,(rax)\n\t"
15651        $$emit$$"add     0x8,rax\n\t"
15652        $$emit$$"dec     rcx\n\t"
15653        $$emit$$"jge     L_sloop\n\t"
15654        $$emit$$"# L_end:\n\t"
15655     } else {
15656        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15657        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15658     }
15659   %}
15660   ins_encode %{
15661     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15662                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15663   %}
15664   ins_pipe(pipe_slow);
15665 %}
15666 
15667 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15668                                        Universe dummy, rFlagsReg cr)
15669 %{
15670   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15671   match(Set dummy (ClearArray (Binary cnt base) val));
15672   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15673 
15674   format %{ $$template
15675     if (UseFastStosb) {
15676        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15677        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15678        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15679     } else if (UseXMMForObjInit) {
15680        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15681        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15682        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15683        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15684        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15685        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15686        $$emit$$"add     0x40,rax\n\t"
15687        $$emit$$"# L_zero_64_bytes:\n\t"
15688        $$emit$$"sub     0x8,rcx\n\t"
15689        $$emit$$"jge     L_loop\n\t"
15690        $$emit$$"add     0x4,rcx\n\t"
15691        $$emit$$"jl      L_tail\n\t"
15692        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15693        $$emit$$"add     0x20,rax\n\t"
15694        $$emit$$"sub     0x4,rcx\n\t"
15695        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15696        $$emit$$"add     0x4,rcx\n\t"
15697        $$emit$$"jle     L_end\n\t"
15698        $$emit$$"dec     rcx\n\t"
15699        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15700        $$emit$$"vmovq   xmm0,(rax)\n\t"
15701        $$emit$$"add     0x8,rax\n\t"
15702        $$emit$$"dec     rcx\n\t"
15703        $$emit$$"jge     L_sloop\n\t"
15704        $$emit$$"# L_end:\n\t"
15705     } else {
15706        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15707        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15708     }
15709   %}
15710   ins_encode %{
15711     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15712                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15713   %}
15714   ins_pipe(pipe_slow);
15715 %}
15716 
15717 // Small constant length ClearArray for AVX512 targets.
15718 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15719 %{
15720   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15721             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15722   match(Set dummy (ClearArray (Binary cnt base) val));
15723   ins_cost(100);
15724   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15725   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15726   ins_encode %{
15727     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15728   %}
15729   ins_pipe(pipe_slow);
15730 %}
15731 
15732 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15733                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15734 %{
15735   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15736   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15737   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15738 
15739   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15740   ins_encode %{
15741     __ string_compare($str1$$Register, $str2$$Register,
15742                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15743                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15744   %}
15745   ins_pipe( pipe_slow );
15746 %}
15747 
15748 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15749                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15750 %{
15751   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15752   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15753   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15754 
15755   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15756   ins_encode %{
15757     __ string_compare($str1$$Register, $str2$$Register,
15758                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15759                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15760   %}
15761   ins_pipe( pipe_slow );
15762 %}
15763 
15764 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15768   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770 
15771   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15772   ins_encode %{
15773     __ string_compare($str1$$Register, $str2$$Register,
15774                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15776   %}
15777   ins_pipe( pipe_slow );
15778 %}
15779 
15780 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15781                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15782 %{
15783   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15784   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15785   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15786 
15787   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15788   ins_encode %{
15789     __ string_compare($str1$$Register, $str2$$Register,
15790                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15791                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15792   %}
15793   ins_pipe( pipe_slow );
15794 %}
15795 
15796 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15797                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15798 %{
15799   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15800   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15801   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15802 
15803   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15804   ins_encode %{
15805     __ string_compare($str1$$Register, $str2$$Register,
15806                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15807                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15808   %}
15809   ins_pipe( pipe_slow );
15810 %}
15811 
15812 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15813                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15814 %{
15815   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15816   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15817   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15818 
15819   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15820   ins_encode %{
15821     __ string_compare($str1$$Register, $str2$$Register,
15822                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15823                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15824   %}
15825   ins_pipe( pipe_slow );
15826 %}
15827 
15828 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15829                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15830 %{
15831   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15832   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15833   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15834 
15835   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15836   ins_encode %{
15837     __ string_compare($str2$$Register, $str1$$Register,
15838                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15839                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15840   %}
15841   ins_pipe( pipe_slow );
15842 %}
15843 
15844 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15845                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15846 %{
15847   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15848   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15849   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15850 
15851   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15852   ins_encode %{
15853     __ string_compare($str2$$Register, $str1$$Register,
15854                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15855                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15856   %}
15857   ins_pipe( pipe_slow );
15858 %}
15859 
15860 // fast search of substring with known size.
15861 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15862                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15863 %{
15864   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15865   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15866   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15867 
15868   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15869   ins_encode %{
15870     int icnt2 = (int)$int_cnt2$$constant;
15871     if (icnt2 >= 16) {
15872       // IndexOf for constant substrings with size >= 16 elements
15873       // which don't need to be loaded through stack.
15874       __ string_indexofC8($str1$$Register, $str2$$Register,
15875                           $cnt1$$Register, $cnt2$$Register,
15876                           icnt2, $result$$Register,
15877                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15878     } else {
15879       // Small strings are loaded through stack if they cross page boundary.
15880       __ string_indexof($str1$$Register, $str2$$Register,
15881                         $cnt1$$Register, $cnt2$$Register,
15882                         icnt2, $result$$Register,
15883                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15884     }
15885   %}
15886   ins_pipe( pipe_slow );
15887 %}
15888 
15889 // fast search of substring with known size.
15890 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15891                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15892 %{
15893   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15894   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15895   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15896 
15897   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15898   ins_encode %{
15899     int icnt2 = (int)$int_cnt2$$constant;
15900     if (icnt2 >= 8) {
15901       // IndexOf for constant substrings with size >= 8 elements
15902       // which don't need to be loaded through stack.
15903       __ string_indexofC8($str1$$Register, $str2$$Register,
15904                           $cnt1$$Register, $cnt2$$Register,
15905                           icnt2, $result$$Register,
15906                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15907     } else {
15908       // Small strings are loaded through stack if they cross page boundary.
15909       __ string_indexof($str1$$Register, $str2$$Register,
15910                         $cnt1$$Register, $cnt2$$Register,
15911                         icnt2, $result$$Register,
15912                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15913     }
15914   %}
15915   ins_pipe( pipe_slow );
15916 %}
15917 
15918 // fast search of substring with known size.
15919 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15920                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15921 %{
15922   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15923   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15924   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15925 
15926   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15927   ins_encode %{
15928     int icnt2 = (int)$int_cnt2$$constant;
15929     if (icnt2 >= 8) {
15930       // IndexOf for constant substrings with size >= 8 elements
15931       // which don't need to be loaded through stack.
15932       __ string_indexofC8($str1$$Register, $str2$$Register,
15933                           $cnt1$$Register, $cnt2$$Register,
15934                           icnt2, $result$$Register,
15935                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15936     } else {
15937       // Small strings are loaded through stack if they cross page boundary.
15938       __ string_indexof($str1$$Register, $str2$$Register,
15939                         $cnt1$$Register, $cnt2$$Register,
15940                         icnt2, $result$$Register,
15941                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15942     }
15943   %}
15944   ins_pipe( pipe_slow );
15945 %}
15946 
15947 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15948                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15949 %{
15950   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15951   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15952   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15953 
15954   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15955   ins_encode %{
15956     __ string_indexof($str1$$Register, $str2$$Register,
15957                       $cnt1$$Register, $cnt2$$Register,
15958                       (-1), $result$$Register,
15959                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15960   %}
15961   ins_pipe( pipe_slow );
15962 %}
15963 
15964 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15965                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15966 %{
15967   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15968   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15969   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15970 
15971   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15972   ins_encode %{
15973     __ string_indexof($str1$$Register, $str2$$Register,
15974                       $cnt1$$Register, $cnt2$$Register,
15975                       (-1), $result$$Register,
15976                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15977   %}
15978   ins_pipe( pipe_slow );
15979 %}
15980 
15981 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15982                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15983 %{
15984   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15985   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15986   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15987 
15988   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15989   ins_encode %{
15990     __ string_indexof($str1$$Register, $str2$$Register,
15991                       $cnt1$$Register, $cnt2$$Register,
15992                       (-1), $result$$Register,
15993                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15994   %}
15995   ins_pipe( pipe_slow );
15996 %}
15997 
15998 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15999                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16000 %{
16001   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16002   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16003   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16004   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16005   ins_encode %{
16006     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16007                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16008   %}
16009   ins_pipe( pipe_slow );
16010 %}
16011 
16012 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16013                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16014 %{
16015   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16016   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16017   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16018   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16019   ins_encode %{
16020     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16021                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16022   %}
16023   ins_pipe( pipe_slow );
16024 %}
16025 
16026 // fast string equals
16027 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16028                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16029 %{
16030   predicate(!VM_Version::supports_avx512vlbw());
16031   match(Set result (StrEquals (Binary str1 str2) cnt));
16032   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16033 
16034   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16035   ins_encode %{
16036     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16037                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16038                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16039   %}
16040   ins_pipe( pipe_slow );
16041 %}
16042 
16043 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16044                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16045 %{
16046   predicate(VM_Version::supports_avx512vlbw());
16047   match(Set result (StrEquals (Binary str1 str2) cnt));
16048   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16049 
16050   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16051   ins_encode %{
16052     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16053                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16054                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16055   %}
16056   ins_pipe( pipe_slow );
16057 %}
16058 
16059 // fast array equals
16060 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16061                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16062 %{
16063   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16064   match(Set result (AryEq ary1 ary2));
16065   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16066 
16067   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16068   ins_encode %{
16069     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16070                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16071                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16072   %}
16073   ins_pipe( pipe_slow );
16074 %}
16075 
16076 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16077                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16078 %{
16079   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16080   match(Set result (AryEq ary1 ary2));
16081   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16082 
16083   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16084   ins_encode %{
16085     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16086                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16087                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16088   %}
16089   ins_pipe( pipe_slow );
16090 %}
16091 
16092 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16093                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16094 %{
16095   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16096   match(Set result (AryEq ary1 ary2));
16097   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16098 
16099   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16100   ins_encode %{
16101     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16102                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16103                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16104   %}
16105   ins_pipe( pipe_slow );
16106 %}
16107 
16108 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16109                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16110 %{
16111   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16112   match(Set result (AryEq ary1 ary2));
16113   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16114 
16115   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16116   ins_encode %{
16117     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16118                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16119                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16120   %}
16121   ins_pipe( pipe_slow );
16122 %}
16123 
16124 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16125                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16126                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16127                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16128                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16129 %{
16130   predicate(UseAVX >= 2);
16131   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16132   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16133          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16134          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16135          USE basic_type, KILL cr);
16136 
16137   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16138   ins_encode %{
16139     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16140                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16141                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16142                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16143                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16144                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16145                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16146   %}
16147   ins_pipe( pipe_slow );
16148 %}
16149 
16150 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16151                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16152 %{
16153   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16154   match(Set result (CountPositives ary1 len));
16155   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16156 
16157   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16158   ins_encode %{
16159     __ count_positives($ary1$$Register, $len$$Register,
16160                        $result$$Register, $tmp3$$Register,
16161                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16162   %}
16163   ins_pipe( pipe_slow );
16164 %}
16165 
16166 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16167                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16168 %{
16169   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16170   match(Set result (CountPositives ary1 len));
16171   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16172 
16173   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16174   ins_encode %{
16175     __ count_positives($ary1$$Register, $len$$Register,
16176                        $result$$Register, $tmp3$$Register,
16177                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16178   %}
16179   ins_pipe( pipe_slow );
16180 %}
16181 
16182 // fast char[] to byte[] compression
16183 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16184                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16185   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16186   match(Set result (StrCompressedCopy src (Binary dst len)));
16187   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16188          USE_KILL len, KILL tmp5, KILL cr);
16189 
16190   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16191   ins_encode %{
16192     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16193                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16194                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16195                            knoreg, knoreg);
16196   %}
16197   ins_pipe( pipe_slow );
16198 %}
16199 
16200 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16201                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16202   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16203   match(Set result (StrCompressedCopy src (Binary dst len)));
16204   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16205          USE_KILL len, KILL tmp5, KILL cr);
16206 
16207   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16208   ins_encode %{
16209     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16210                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16211                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16212                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16213   %}
16214   ins_pipe( pipe_slow );
16215 %}
16216 // fast byte[] to char[] inflation
16217 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16218                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16219   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16220   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16221   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16222 
16223   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16224   ins_encode %{
16225     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16226                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16227   %}
16228   ins_pipe( pipe_slow );
16229 %}
16230 
16231 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16232                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16233   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16234   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16235   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16236 
16237   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16238   ins_encode %{
16239     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16240                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16241   %}
16242   ins_pipe( pipe_slow );
16243 %}
16244 
16245 // encode char[] to byte[] in ISO_8859_1
16246 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16247                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16248                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16249   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16250   match(Set result (EncodeISOArray src (Binary dst len)));
16251   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16252 
16253   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16254   ins_encode %{
16255     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16256                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16257                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16258   %}
16259   ins_pipe( pipe_slow );
16260 %}
16261 
16262 // encode char[] to byte[] in ASCII
16263 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16264                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16265                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16266   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16267   match(Set result (EncodeISOArray src (Binary dst len)));
16268   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16269 
16270   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16271   ins_encode %{
16272     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16273                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16274                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16275   %}
16276   ins_pipe( pipe_slow );
16277 %}
16278 
16279 //----------Overflow Math Instructions-----------------------------------------
16280 
16281 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16282 %{
16283   match(Set cr (OverflowAddI op1 op2));
16284   effect(DEF cr, USE_KILL op1, USE op2);
16285 
16286   format %{ "addl    $op1, $op2\t# overflow check int" %}
16287 
16288   ins_encode %{
16289     __ addl($op1$$Register, $op2$$Register);
16290   %}
16291   ins_pipe(ialu_reg_reg);
16292 %}
16293 
16294 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16295 %{
16296   match(Set cr (OverflowAddI op1 op2));
16297   effect(DEF cr, USE_KILL op1, USE op2);
16298 
16299   format %{ "addl    $op1, $op2\t# overflow check int" %}
16300 
16301   ins_encode %{
16302     __ addl($op1$$Register, $op2$$constant);
16303   %}
16304   ins_pipe(ialu_reg_reg);
16305 %}
16306 
16307 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16308 %{
16309   match(Set cr (OverflowAddL op1 op2));
16310   effect(DEF cr, USE_KILL op1, USE op2);
16311 
16312   format %{ "addq    $op1, $op2\t# overflow check long" %}
16313   ins_encode %{
16314     __ addq($op1$$Register, $op2$$Register);
16315   %}
16316   ins_pipe(ialu_reg_reg);
16317 %}
16318 
16319 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16320 %{
16321   match(Set cr (OverflowAddL op1 op2));
16322   effect(DEF cr, USE_KILL op1, USE op2);
16323 
16324   format %{ "addq    $op1, $op2\t# overflow check long" %}
16325   ins_encode %{
16326     __ addq($op1$$Register, $op2$$constant);
16327   %}
16328   ins_pipe(ialu_reg_reg);
16329 %}
16330 
16331 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16332 %{
16333   match(Set cr (OverflowSubI op1 op2));
16334 
16335   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16336   ins_encode %{
16337     __ cmpl($op1$$Register, $op2$$Register);
16338   %}
16339   ins_pipe(ialu_reg_reg);
16340 %}
16341 
16342 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16343 %{
16344   match(Set cr (OverflowSubI op1 op2));
16345 
16346   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16347   ins_encode %{
16348     __ cmpl($op1$$Register, $op2$$constant);
16349   %}
16350   ins_pipe(ialu_reg_reg);
16351 %}
16352 
16353 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16354 %{
16355   match(Set cr (OverflowSubL op1 op2));
16356 
16357   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16358   ins_encode %{
16359     __ cmpq($op1$$Register, $op2$$Register);
16360   %}
16361   ins_pipe(ialu_reg_reg);
16362 %}
16363 
16364 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16365 %{
16366   match(Set cr (OverflowSubL op1 op2));
16367 
16368   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16369   ins_encode %{
16370     __ cmpq($op1$$Register, $op2$$constant);
16371   %}
16372   ins_pipe(ialu_reg_reg);
16373 %}
16374 
16375 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16376 %{
16377   match(Set cr (OverflowSubI zero op2));
16378   effect(DEF cr, USE_KILL op2);
16379 
16380   format %{ "negl    $op2\t# overflow check int" %}
16381   ins_encode %{
16382     __ negl($op2$$Register);
16383   %}
16384   ins_pipe(ialu_reg_reg);
16385 %}
16386 
16387 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16388 %{
16389   match(Set cr (OverflowSubL zero op2));
16390   effect(DEF cr, USE_KILL op2);
16391 
16392   format %{ "negq    $op2\t# overflow check long" %}
16393   ins_encode %{
16394     __ negq($op2$$Register);
16395   %}
16396   ins_pipe(ialu_reg_reg);
16397 %}
16398 
16399 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16400 %{
16401   match(Set cr (OverflowMulI op1 op2));
16402   effect(DEF cr, USE_KILL op1, USE op2);
16403 
16404   format %{ "imull    $op1, $op2\t# overflow check int" %}
16405   ins_encode %{
16406     __ imull($op1$$Register, $op2$$Register);
16407   %}
16408   ins_pipe(ialu_reg_reg_alu0);
16409 %}
16410 
16411 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16412 %{
16413   match(Set cr (OverflowMulI op1 op2));
16414   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16415 
16416   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16417   ins_encode %{
16418     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16419   %}
16420   ins_pipe(ialu_reg_reg_alu0);
16421 %}
16422 
16423 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16424 %{
16425   match(Set cr (OverflowMulL op1 op2));
16426   effect(DEF cr, USE_KILL op1, USE op2);
16427 
16428   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16429   ins_encode %{
16430     __ imulq($op1$$Register, $op2$$Register);
16431   %}
16432   ins_pipe(ialu_reg_reg_alu0);
16433 %}
16434 
16435 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16436 %{
16437   match(Set cr (OverflowMulL op1 op2));
16438   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16439 
16440   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16441   ins_encode %{
16442     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16443   %}
16444   ins_pipe(ialu_reg_reg_alu0);
16445 %}
16446 
16447 
16448 //----------Control Flow Instructions------------------------------------------
16449 // Signed compare Instructions
16450 
16451 // XXX more variants!!
16452 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16453 %{
16454   match(Set cr (CmpI op1 op2));
16455   effect(DEF cr, USE op1, USE op2);
16456 
16457   format %{ "cmpl    $op1, $op2" %}
16458   ins_encode %{
16459     __ cmpl($op1$$Register, $op2$$Register);
16460   %}
16461   ins_pipe(ialu_cr_reg_reg);
16462 %}
16463 
16464 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16465 %{
16466   match(Set cr (CmpI op1 op2));
16467 
16468   format %{ "cmpl    $op1, $op2" %}
16469   ins_encode %{
16470     __ cmpl($op1$$Register, $op2$$constant);
16471   %}
16472   ins_pipe(ialu_cr_reg_imm);
16473 %}
16474 
16475 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16476 %{
16477   match(Set cr (CmpI op1 (LoadI op2)));
16478 
16479   ins_cost(500); // XXX
16480   format %{ "cmpl    $op1, $op2" %}
16481   ins_encode %{
16482     __ cmpl($op1$$Register, $op2$$Address);
16483   %}
16484   ins_pipe(ialu_cr_reg_mem);
16485 %}
16486 
16487 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16488 %{
16489   match(Set cr (CmpI src zero));
16490 
16491   format %{ "testl   $src, $src" %}
16492   ins_encode %{
16493     __ testl($src$$Register, $src$$Register);
16494   %}
16495   ins_pipe(ialu_cr_reg_imm);
16496 %}
16497 
16498 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16499 %{
16500   match(Set cr (CmpI (AndI src con) zero));
16501 
16502   format %{ "testl   $src, $con" %}
16503   ins_encode %{
16504     __ testl($src$$Register, $con$$constant);
16505   %}
16506   ins_pipe(ialu_cr_reg_imm);
16507 %}
16508 
16509 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16510 %{
16511   match(Set cr (CmpI (AndI src1 src2) zero));
16512 
16513   format %{ "testl   $src1, $src2" %}
16514   ins_encode %{
16515     __ testl($src1$$Register, $src2$$Register);
16516   %}
16517   ins_pipe(ialu_cr_reg_imm);
16518 %}
16519 
16520 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16521 %{
16522   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16523 
16524   format %{ "testl   $src, $mem" %}
16525   ins_encode %{
16526     __ testl($src$$Register, $mem$$Address);
16527   %}
16528   ins_pipe(ialu_cr_reg_mem);
16529 %}
16530 
16531 // Unsigned compare Instructions; really, same as signed except they
16532 // produce an rFlagsRegU instead of rFlagsReg.
16533 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16534 %{
16535   match(Set cr (CmpU op1 op2));
16536 
16537   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16538   ins_encode %{
16539     __ cmpl($op1$$Register, $op2$$Register);
16540   %}
16541   ins_pipe(ialu_cr_reg_reg);
16542 %}
16543 
16544 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16545 %{
16546   match(Set cr (CmpU op1 op2));
16547 
16548   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16549   ins_encode %{
16550     __ cmpl($op1$$Register, $op2$$constant);
16551   %}
16552   ins_pipe(ialu_cr_reg_imm);
16553 %}
16554 
16555 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16556 %{
16557   match(Set cr (CmpU op1 (LoadI op2)));
16558 
16559   ins_cost(500); // XXX
16560   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16561   ins_encode %{
16562     __ cmpl($op1$$Register, $op2$$Address);
16563   %}
16564   ins_pipe(ialu_cr_reg_mem);
16565 %}
16566 
16567 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16568 %{
16569   match(Set cr (CmpU src zero));
16570 
16571   format %{ "testl   $src, $src\t# unsigned" %}
16572   ins_encode %{
16573     __ testl($src$$Register, $src$$Register);
16574   %}
16575   ins_pipe(ialu_cr_reg_imm);
16576 %}
16577 
16578 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16579 %{
16580   match(Set cr (CmpP op1 op2));
16581 
16582   format %{ "cmpq    $op1, $op2\t# ptr" %}
16583   ins_encode %{
16584     __ cmpq($op1$$Register, $op2$$Register);
16585   %}
16586   ins_pipe(ialu_cr_reg_reg);
16587 %}
16588 
16589 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16590 %{
16591   match(Set cr (CmpP op1 (LoadP op2)));
16592   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16593 
16594   ins_cost(500); // XXX
16595   format %{ "cmpq    $op1, $op2\t# ptr" %}
16596   ins_encode %{
16597     __ cmpq($op1$$Register, $op2$$Address);
16598   %}
16599   ins_pipe(ialu_cr_reg_mem);
16600 %}
16601 
16602 // XXX this is generalized by compP_rReg_mem???
16603 // Compare raw pointer (used in out-of-heap check).
16604 // Only works because non-oop pointers must be raw pointers
16605 // and raw pointers have no anti-dependencies.
16606 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16607 %{
16608   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16609             n->in(2)->as_Load()->barrier_data() == 0);
16610   match(Set cr (CmpP op1 (LoadP op2)));
16611 
16612   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16613   ins_encode %{
16614     __ cmpq($op1$$Register, $op2$$Address);
16615   %}
16616   ins_pipe(ialu_cr_reg_mem);
16617 %}
16618 
16619 // This will generate a signed flags result. This should be OK since
16620 // any compare to a zero should be eq/neq.
16621 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16622 %{
16623   match(Set cr (CmpP src zero));
16624 
16625   format %{ "testq   $src, $src\t# ptr" %}
16626   ins_encode %{
16627     __ testq($src$$Register, $src$$Register);
16628   %}
16629   ins_pipe(ialu_cr_reg_imm);
16630 %}
16631 
16632 // This will generate a signed flags result. This should be OK since
16633 // any compare to a zero should be eq/neq.
16634 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16635 %{
16636   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16637             n->in(1)->as_Load()->barrier_data() == 0);
16638   match(Set cr (CmpP (LoadP op) zero));
16639 
16640   ins_cost(500); // XXX
16641   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16642   ins_encode %{
16643     __ testq($op$$Address, 0xFFFFFFFF);
16644   %}
16645   ins_pipe(ialu_cr_reg_imm);
16646 %}
16647 
16648 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16649 %{
16650   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16651             n->in(1)->as_Load()->barrier_data() == 0);
16652   match(Set cr (CmpP (LoadP mem) zero));
16653 
16654   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16655   ins_encode %{
16656     __ cmpq(r12, $mem$$Address);
16657   %}
16658   ins_pipe(ialu_cr_reg_mem);
16659 %}
16660 
16661 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16662 %{
16663   match(Set cr (CmpN op1 op2));
16664 
16665   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16666   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16667   ins_pipe(ialu_cr_reg_reg);
16668 %}
16669 
16670 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16671 %{
16672   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16673   match(Set cr (CmpN src (LoadN mem)));
16674 
16675   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16676   ins_encode %{
16677     __ cmpl($src$$Register, $mem$$Address);
16678   %}
16679   ins_pipe(ialu_cr_reg_mem);
16680 %}
16681 
16682 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16683   match(Set cr (CmpN op1 op2));
16684 
16685   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16686   ins_encode %{
16687     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16688   %}
16689   ins_pipe(ialu_cr_reg_imm);
16690 %}
16691 
16692 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16693 %{
16694   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16695   match(Set cr (CmpN src (LoadN mem)));
16696 
16697   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16698   ins_encode %{
16699     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16700   %}
16701   ins_pipe(ialu_cr_reg_mem);
16702 %}
16703 
16704 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16705   match(Set cr (CmpN op1 op2));
16706 
16707   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16708   ins_encode %{
16709     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16710   %}
16711   ins_pipe(ialu_cr_reg_imm);
16712 %}
16713 
16714 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16715 %{
16716   predicate(!UseCompactObjectHeaders);
16717   match(Set cr (CmpN src (LoadNKlass mem)));
16718 
16719   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16720   ins_encode %{
16721     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16722   %}
16723   ins_pipe(ialu_cr_reg_mem);
16724 %}
16725 
16726 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16727   match(Set cr (CmpN src zero));
16728 
16729   format %{ "testl   $src, $src\t# compressed ptr" %}
16730   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16731   ins_pipe(ialu_cr_reg_imm);
16732 %}
16733 
16734 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16735 %{
16736   predicate(CompressedOops::base() != nullptr &&
16737             n->in(1)->as_Load()->barrier_data() == 0);
16738   match(Set cr (CmpN (LoadN mem) zero));
16739 
16740   ins_cost(500); // XXX
16741   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16742   ins_encode %{
16743     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16744   %}
16745   ins_pipe(ialu_cr_reg_mem);
16746 %}
16747 
16748 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16749 %{
16750   predicate(CompressedOops::base() == nullptr &&
16751             n->in(1)->as_Load()->barrier_data() == 0);
16752   match(Set cr (CmpN (LoadN mem) zero));
16753 
16754   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16755   ins_encode %{
16756     __ cmpl(r12, $mem$$Address);
16757   %}
16758   ins_pipe(ialu_cr_reg_mem);
16759 %}
16760 
16761 // Yanked all unsigned pointer compare operations.
16762 // Pointer compares are done with CmpP which is already unsigned.
16763 
16764 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16765 %{
16766   match(Set cr (CmpL op1 op2));
16767 
16768   format %{ "cmpq    $op1, $op2" %}
16769   ins_encode %{
16770     __ cmpq($op1$$Register, $op2$$Register);
16771   %}
16772   ins_pipe(ialu_cr_reg_reg);
16773 %}
16774 
16775 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16776 %{
16777   match(Set cr (CmpL op1 op2));
16778 
16779   format %{ "cmpq    $op1, $op2" %}
16780   ins_encode %{
16781     __ cmpq($op1$$Register, $op2$$constant);
16782   %}
16783   ins_pipe(ialu_cr_reg_imm);
16784 %}
16785 
16786 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16787 %{
16788   match(Set cr (CmpL op1 (LoadL op2)));
16789 
16790   format %{ "cmpq    $op1, $op2" %}
16791   ins_encode %{
16792     __ cmpq($op1$$Register, $op2$$Address);
16793   %}
16794   ins_pipe(ialu_cr_reg_mem);
16795 %}
16796 
16797 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16798 %{
16799   match(Set cr (CmpL src zero));
16800 
16801   format %{ "testq   $src, $src" %}
16802   ins_encode %{
16803     __ testq($src$$Register, $src$$Register);
16804   %}
16805   ins_pipe(ialu_cr_reg_imm);
16806 %}
16807 
16808 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16809 %{
16810   match(Set cr (CmpL (AndL src con) zero));
16811 
16812   format %{ "testq   $src, $con\t# long" %}
16813   ins_encode %{
16814     __ testq($src$$Register, $con$$constant);
16815   %}
16816   ins_pipe(ialu_cr_reg_imm);
16817 %}
16818 
16819 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16820 %{
16821   match(Set cr (CmpL (AndL src1 src2) zero));
16822 
16823   format %{ "testq   $src1, $src2\t# long" %}
16824   ins_encode %{
16825     __ testq($src1$$Register, $src2$$Register);
16826   %}
16827   ins_pipe(ialu_cr_reg_imm);
16828 %}
16829 
16830 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16831 %{
16832   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16833 
16834   format %{ "testq   $src, $mem" %}
16835   ins_encode %{
16836     __ testq($src$$Register, $mem$$Address);
16837   %}
16838   ins_pipe(ialu_cr_reg_mem);
16839 %}
16840 
16841 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16842 %{
16843   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16844 
16845   format %{ "testq   $src, $mem" %}
16846   ins_encode %{
16847     __ testq($src$$Register, $mem$$Address);
16848   %}
16849   ins_pipe(ialu_cr_reg_mem);
16850 %}
16851 
16852 // Manifest a CmpU result in an integer register.  Very painful.
16853 // This is the test to avoid.
16854 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16855 %{
16856   match(Set dst (CmpU3 src1 src2));
16857   effect(KILL flags);
16858 
16859   ins_cost(275); // XXX
16860   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16861             "movl    $dst, -1\n\t"
16862             "jb,u    done\n\t"
16863             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16864     "done:" %}
16865   ins_encode %{
16866     Label done;
16867     __ cmpl($src1$$Register, $src2$$Register);
16868     __ movl($dst$$Register, -1);
16869     __ jccb(Assembler::below, done);
16870     __ setcc(Assembler::notZero, $dst$$Register);
16871     __ bind(done);
16872   %}
16873   ins_pipe(pipe_slow);
16874 %}
16875 
16876 // Manifest a CmpL result in an integer register.  Very painful.
16877 // This is the test to avoid.
16878 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16879 %{
16880   match(Set dst (CmpL3 src1 src2));
16881   effect(KILL flags);
16882 
16883   ins_cost(275); // XXX
16884   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16885             "movl    $dst, -1\n\t"
16886             "jl,s    done\n\t"
16887             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16888     "done:" %}
16889   ins_encode %{
16890     Label done;
16891     __ cmpq($src1$$Register, $src2$$Register);
16892     __ movl($dst$$Register, -1);
16893     __ jccb(Assembler::less, done);
16894     __ setcc(Assembler::notZero, $dst$$Register);
16895     __ bind(done);
16896   %}
16897   ins_pipe(pipe_slow);
16898 %}
16899 
16900 // Manifest a CmpUL result in an integer register.  Very painful.
16901 // This is the test to avoid.
16902 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16903 %{
16904   match(Set dst (CmpUL3 src1 src2));
16905   effect(KILL flags);
16906 
16907   ins_cost(275); // XXX
16908   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16909             "movl    $dst, -1\n\t"
16910             "jb,u    done\n\t"
16911             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16912     "done:" %}
16913   ins_encode %{
16914     Label done;
16915     __ cmpq($src1$$Register, $src2$$Register);
16916     __ movl($dst$$Register, -1);
16917     __ jccb(Assembler::below, done);
16918     __ setcc(Assembler::notZero, $dst$$Register);
16919     __ bind(done);
16920   %}
16921   ins_pipe(pipe_slow);
16922 %}
16923 
16924 // Unsigned long compare Instructions; really, same as signed long except they
16925 // produce an rFlagsRegU instead of rFlagsReg.
16926 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16927 %{
16928   match(Set cr (CmpUL op1 op2));
16929 
16930   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16931   ins_encode %{
16932     __ cmpq($op1$$Register, $op2$$Register);
16933   %}
16934   ins_pipe(ialu_cr_reg_reg);
16935 %}
16936 
16937 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16938 %{
16939   match(Set cr (CmpUL op1 op2));
16940 
16941   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16942   ins_encode %{
16943     __ cmpq($op1$$Register, $op2$$constant);
16944   %}
16945   ins_pipe(ialu_cr_reg_imm);
16946 %}
16947 
16948 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16949 %{
16950   match(Set cr (CmpUL op1 (LoadL op2)));
16951 
16952   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16953   ins_encode %{
16954     __ cmpq($op1$$Register, $op2$$Address);
16955   %}
16956   ins_pipe(ialu_cr_reg_mem);
16957 %}
16958 
16959 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16960 %{
16961   match(Set cr (CmpUL src zero));
16962 
16963   format %{ "testq   $src, $src\t# unsigned" %}
16964   ins_encode %{
16965     __ testq($src$$Register, $src$$Register);
16966   %}
16967   ins_pipe(ialu_cr_reg_imm);
16968 %}
16969 
16970 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16971 %{
16972   match(Set cr (CmpI (LoadB mem) imm));
16973 
16974   ins_cost(125);
16975   format %{ "cmpb    $mem, $imm" %}
16976   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16977   ins_pipe(ialu_cr_reg_mem);
16978 %}
16979 
16980 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16981 %{
16982   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16983 
16984   ins_cost(125);
16985   format %{ "testb   $mem, $imm\t# ubyte" %}
16986   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16987   ins_pipe(ialu_cr_reg_mem);
16988 %}
16989 
16990 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16991 %{
16992   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16993 
16994   ins_cost(125);
16995   format %{ "testb   $mem, $imm\t# byte" %}
16996   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16997   ins_pipe(ialu_cr_reg_mem);
16998 %}
16999 
17000 //----------Max and Min--------------------------------------------------------
17001 // Min Instructions
17002 
17003 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17004 %{
17005   predicate(!UseAPX);
17006   effect(USE_DEF dst, USE src, USE cr);
17007 
17008   format %{ "cmovlgt $dst, $src\t# min" %}
17009   ins_encode %{
17010     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17011   %}
17012   ins_pipe(pipe_cmov_reg);
17013 %}
17014 
17015 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17016 %{
17017   predicate(UseAPX);
17018   effect(DEF dst, USE src1, USE src2, USE cr);
17019 
17020   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17021   ins_encode %{
17022     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17023   %}
17024   ins_pipe(pipe_cmov_reg);
17025 %}
17026 
17027 instruct minI_rReg(rRegI dst, rRegI src)
17028 %{
17029   predicate(!UseAPX);
17030   match(Set dst (MinI dst src));
17031 
17032   ins_cost(200);
17033   expand %{
17034     rFlagsReg cr;
17035     compI_rReg(cr, dst, src);
17036     cmovI_reg_g(dst, src, cr);
17037   %}
17038 %}
17039 
17040 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17041 %{
17042   predicate(UseAPX);
17043   match(Set dst (MinI src1 src2));
17044   effect(DEF dst, USE src1, USE src2);
17045   flag(PD::Flag_ndd_demotable_opr1);
17046 
17047   ins_cost(200);
17048   expand %{
17049     rFlagsReg cr;
17050     compI_rReg(cr, src1, src2);
17051     cmovI_reg_g_ndd(dst, src1, src2, cr);
17052   %}
17053 %}
17054 
17055 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17056 %{
17057   predicate(!UseAPX);
17058   effect(USE_DEF dst, USE src, USE cr);
17059 
17060   format %{ "cmovllt $dst, $src\t# max" %}
17061   ins_encode %{
17062     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17063   %}
17064   ins_pipe(pipe_cmov_reg);
17065 %}
17066 
17067 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17068 %{
17069   predicate(UseAPX);
17070   effect(DEF dst, USE src1, USE src2, USE cr);
17071 
17072   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17073   ins_encode %{
17074     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17075   %}
17076   ins_pipe(pipe_cmov_reg);
17077 %}
17078 
17079 instruct maxI_rReg(rRegI dst, rRegI src)
17080 %{
17081   predicate(!UseAPX);
17082   match(Set dst (MaxI dst src));
17083 
17084   ins_cost(200);
17085   expand %{
17086     rFlagsReg cr;
17087     compI_rReg(cr, dst, src);
17088     cmovI_reg_l(dst, src, cr);
17089   %}
17090 %}
17091 
17092 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17093 %{
17094   predicate(UseAPX);
17095   match(Set dst (MaxI src1 src2));
17096   effect(DEF dst, USE src1, USE src2);
17097   flag(PD::Flag_ndd_demotable_opr1);
17098 
17099   ins_cost(200);
17100   expand %{
17101     rFlagsReg cr;
17102     compI_rReg(cr, src1, src2);
17103     cmovI_reg_l_ndd(dst, src1, src2, cr);
17104   %}
17105 %}
17106 
17107 // ============================================================================
17108 // Branch Instructions
17109 
17110 // Jump Direct - Label defines a relative address from JMP+1
17111 instruct jmpDir(label labl)
17112 %{
17113   match(Goto);
17114   effect(USE labl);
17115 
17116   ins_cost(300);
17117   format %{ "jmp     $labl" %}
17118   size(5);
17119   ins_encode %{
17120     Label* L = $labl$$label;
17121     __ jmp(*L, false); // Always long jump
17122   %}
17123   ins_pipe(pipe_jmp);
17124 %}
17125 
17126 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17127 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17128 %{
17129   match(If cop cr);
17130   effect(USE labl);
17131 
17132   ins_cost(300);
17133   format %{ "j$cop     $labl" %}
17134   size(6);
17135   ins_encode %{
17136     Label* L = $labl$$label;
17137     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17138   %}
17139   ins_pipe(pipe_jcc);
17140 %}
17141 
17142 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17143 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17144 %{
17145   match(CountedLoopEnd cop cr);
17146   effect(USE labl);
17147 
17148   ins_cost(300);
17149   format %{ "j$cop     $labl\t# loop end" %}
17150   size(6);
17151   ins_encode %{
17152     Label* L = $labl$$label;
17153     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17154   %}
17155   ins_pipe(pipe_jcc);
17156 %}
17157 
17158 // Jump Direct Conditional - using unsigned comparison
17159 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17160   match(If cop cmp);
17161   effect(USE labl);
17162 
17163   ins_cost(300);
17164   format %{ "j$cop,u   $labl" %}
17165   size(6);
17166   ins_encode %{
17167     Label* L = $labl$$label;
17168     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17169   %}
17170   ins_pipe(pipe_jcc);
17171 %}
17172 
17173 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17174   match(If cop cmp);
17175   effect(USE labl);
17176 
17177   ins_cost(200);
17178   format %{ "j$cop,u   $labl" %}
17179   size(6);
17180   ins_encode %{
17181     Label* L = $labl$$label;
17182     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17183   %}
17184   ins_pipe(pipe_jcc);
17185 %}
17186 
17187 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17188   match(If cop cmp);
17189   effect(USE labl);
17190 
17191   ins_cost(200);
17192   format %{ $$template
17193     if ($cop$$cmpcode == Assembler::notEqual) {
17194       $$emit$$"jp,u    $labl\n\t"
17195       $$emit$$"j$cop,u   $labl"
17196     } else {
17197       $$emit$$"jp,u    done\n\t"
17198       $$emit$$"j$cop,u   $labl\n\t"
17199       $$emit$$"done:"
17200     }
17201   %}
17202   ins_encode %{
17203     Label* l = $labl$$label;
17204     if ($cop$$cmpcode == Assembler::notEqual) {
17205       __ jcc(Assembler::parity, *l, false);
17206       __ jcc(Assembler::notEqual, *l, false);
17207     } else if ($cop$$cmpcode == Assembler::equal) {
17208       Label done;
17209       __ jccb(Assembler::parity, done);
17210       __ jcc(Assembler::equal, *l, false);
17211       __ bind(done);
17212     } else {
17213        ShouldNotReachHere();
17214     }
17215   %}
17216   ins_pipe(pipe_jcc);
17217 %}
17218 
17219 // Jump Direct Conditional - using signed and unsigned comparison
17220 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17221   match(If cop cmp);
17222   effect(USE labl);
17223 
17224   ins_cost(200);
17225   format %{ "j$cop,su   $labl" %}
17226   size(6);
17227   ins_encode %{
17228     Label* L = $labl$$label;
17229     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17230   %}
17231   ins_pipe(pipe_jcc);
17232 %}
17233 
17234 // ============================================================================
17235 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17236 // superklass array for an instance of the superklass.  Set a hidden
17237 // internal cache on a hit (cache is checked with exposed code in
17238 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17239 // encoding ALSO sets flags.
17240 
17241 instruct partialSubtypeCheck(rdi_RegP result,
17242                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17243                              rFlagsReg cr)
17244 %{
17245   match(Set result (PartialSubtypeCheck sub super));
17246   predicate(!UseSecondarySupersTable);
17247   effect(KILL rcx, KILL cr);
17248 
17249   ins_cost(1100);  // slightly larger than the next version
17250   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17251             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17252             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17253             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17254             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17255             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17256             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17257     "miss:\t" %}
17258 
17259   ins_encode %{
17260     Label miss;
17261     // NB: Callers may assume that, when $result is a valid register,
17262     // check_klass_subtype_slow_path_linear sets it to a nonzero
17263     // value.
17264     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17265                                             $rcx$$Register, $result$$Register,
17266                                             nullptr, &miss,
17267                                             /*set_cond_codes:*/ true);
17268     __ xorptr($result$$Register, $result$$Register);
17269     __ bind(miss);
17270   %}
17271 
17272   ins_pipe(pipe_slow);
17273 %}
17274 
17275 // ============================================================================
17276 // Two versions of hashtable-based partialSubtypeCheck, both used when
17277 // we need to search for a super class in the secondary supers array.
17278 // The first is used when we don't know _a priori_ the class being
17279 // searched for. The second, far more common, is used when we do know:
17280 // this is used for instanceof, checkcast, and any case where C2 can
17281 // determine it by constant propagation.
17282 
17283 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17284                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17285                                        rFlagsReg cr)
17286 %{
17287   match(Set result (PartialSubtypeCheck sub super));
17288   predicate(UseSecondarySupersTable);
17289   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17290 
17291   ins_cost(1000);
17292   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17293 
17294   ins_encode %{
17295     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17296 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17297   %}
17298 
17299   ins_pipe(pipe_slow);
17300 %}
17301 
17302 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17303                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17304                                        rFlagsReg cr)
17305 %{
17306   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17307   predicate(UseSecondarySupersTable);
17308   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17309 
17310   ins_cost(700);  // smaller than the next version
17311   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17312 
17313   ins_encode %{
17314     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17315     if (InlineSecondarySupersTest) {
17316       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17317                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17318                                        super_klass_slot);
17319     } else {
17320       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17321     }
17322   %}
17323 
17324   ins_pipe(pipe_slow);
17325 %}
17326 
17327 // ============================================================================
17328 // Branch Instructions -- short offset versions
17329 //
17330 // These instructions are used to replace jumps of a long offset (the default
17331 // match) with jumps of a shorter offset.  These instructions are all tagged
17332 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17333 // match rules in general matching.  Instead, the ADLC generates a conversion
17334 // method in the MachNode which can be used to do in-place replacement of the
17335 // long variant with the shorter variant.  The compiler will determine if a
17336 // branch can be taken by the is_short_branch_offset() predicate in the machine
17337 // specific code section of the file.
17338 
17339 // Jump Direct - Label defines a relative address from JMP+1
17340 instruct jmpDir_short(label labl) %{
17341   match(Goto);
17342   effect(USE labl);
17343 
17344   ins_cost(300);
17345   format %{ "jmp,s   $labl" %}
17346   size(2);
17347   ins_encode %{
17348     Label* L = $labl$$label;
17349     __ jmpb(*L);
17350   %}
17351   ins_pipe(pipe_jmp);
17352   ins_short_branch(1);
17353 %}
17354 
17355 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17356 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17357   match(If cop cr);
17358   effect(USE labl);
17359 
17360   ins_cost(300);
17361   format %{ "j$cop,s   $labl" %}
17362   size(2);
17363   ins_encode %{
17364     Label* L = $labl$$label;
17365     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17366   %}
17367   ins_pipe(pipe_jcc);
17368   ins_short_branch(1);
17369 %}
17370 
17371 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17372 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17373   match(CountedLoopEnd cop cr);
17374   effect(USE labl);
17375 
17376   ins_cost(300);
17377   format %{ "j$cop,s   $labl\t# loop end" %}
17378   size(2);
17379   ins_encode %{
17380     Label* L = $labl$$label;
17381     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17382   %}
17383   ins_pipe(pipe_jcc);
17384   ins_short_branch(1);
17385 %}
17386 
17387 // Jump Direct Conditional - using unsigned comparison
17388 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17389   match(If cop cmp);
17390   effect(USE labl);
17391 
17392   ins_cost(300);
17393   format %{ "j$cop,us  $labl" %}
17394   size(2);
17395   ins_encode %{
17396     Label* L = $labl$$label;
17397     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17398   %}
17399   ins_pipe(pipe_jcc);
17400   ins_short_branch(1);
17401 %}
17402 
17403 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17404   match(If cop cmp);
17405   effect(USE labl);
17406 
17407   ins_cost(300);
17408   format %{ "j$cop,us  $labl" %}
17409   size(2);
17410   ins_encode %{
17411     Label* L = $labl$$label;
17412     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17413   %}
17414   ins_pipe(pipe_jcc);
17415   ins_short_branch(1);
17416 %}
17417 
17418 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17419   match(If cop cmp);
17420   effect(USE labl);
17421 
17422   ins_cost(300);
17423   format %{ $$template
17424     if ($cop$$cmpcode == Assembler::notEqual) {
17425       $$emit$$"jp,u,s  $labl\n\t"
17426       $$emit$$"j$cop,u,s  $labl"
17427     } else {
17428       $$emit$$"jp,u,s  done\n\t"
17429       $$emit$$"j$cop,u,s  $labl\n\t"
17430       $$emit$$"done:"
17431     }
17432   %}
17433   size(4);
17434   ins_encode %{
17435     Label* l = $labl$$label;
17436     if ($cop$$cmpcode == Assembler::notEqual) {
17437       __ jccb(Assembler::parity, *l);
17438       __ jccb(Assembler::notEqual, *l);
17439     } else if ($cop$$cmpcode == Assembler::equal) {
17440       Label done;
17441       __ jccb(Assembler::parity, done);
17442       __ jccb(Assembler::equal, *l);
17443       __ bind(done);
17444     } else {
17445        ShouldNotReachHere();
17446     }
17447   %}
17448   ins_pipe(pipe_jcc);
17449   ins_short_branch(1);
17450 %}
17451 
17452 // Jump Direct Conditional - using signed and unsigned comparison
17453 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17454   match(If cop cmp);
17455   effect(USE labl);
17456 
17457   ins_cost(300);
17458   format %{ "j$cop,sus  $labl" %}
17459   size(2);
17460   ins_encode %{
17461     Label* L = $labl$$label;
17462     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17463   %}
17464   ins_pipe(pipe_jcc);
17465   ins_short_branch(1);
17466 %}
17467 
17468 // ============================================================================
17469 // inlined locking and unlocking
17470 
17471 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17472   match(Set cr (FastLock object box));
17473   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17474   ins_cost(300);
17475   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17476   ins_encode %{
17477     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17478   %}
17479   ins_pipe(pipe_slow);
17480 %}
17481 
17482 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17483   match(Set cr (FastUnlock object rax_reg));
17484   effect(TEMP tmp, USE_KILL rax_reg);
17485   ins_cost(300);
17486   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17487   ins_encode %{
17488     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17489   %}
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 
17494 // ============================================================================
17495 // Safepoint Instructions
17496 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17497 %{
17498   match(SafePoint poll);
17499   effect(KILL cr, USE poll);
17500 
17501   format %{ "testl   rax, [$poll]\t"
17502             "# Safepoint: poll for GC" %}
17503   ins_cost(125);
17504   ins_encode %{
17505     __ relocate(relocInfo::poll_type);
17506     address pre_pc = __ pc();
17507     __ testl(rax, Address($poll$$Register, 0));
17508     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17509   %}
17510   ins_pipe(ialu_reg_mem);
17511 %}
17512 
17513 instruct mask_all_evexL(kReg dst, rRegL src) %{
17514   match(Set dst (MaskAll src));
17515   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17516   ins_encode %{
17517     int mask_len = Matcher::vector_length(this);
17518     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17519   %}
17520   ins_pipe( pipe_slow );
17521 %}
17522 
17523 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17524   predicate(Matcher::vector_length(n) > 32);
17525   match(Set dst (MaskAll src));
17526   effect(TEMP tmp);
17527   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17528   ins_encode %{
17529     int mask_len = Matcher::vector_length(this);
17530     __ movslq($tmp$$Register, $src$$Register);
17531     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17532   %}
17533   ins_pipe( pipe_slow );
17534 %}
17535 
17536 // ============================================================================
17537 // Procedure Call/Return Instructions
17538 // Call Java Static Instruction
17539 // Note: If this code changes, the corresponding ret_addr_offset() and
17540 //       compute_padding() functions will have to be adjusted.
17541 instruct CallStaticJavaDirect(method meth) %{
17542   match(CallStaticJava);
17543   effect(USE meth);
17544 
17545   ins_cost(300);
17546   format %{ "call,static " %}
17547   opcode(0xE8); /* E8 cd */
17548   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17549   ins_pipe(pipe_slow);
17550   ins_alignment(4);
17551 %}
17552 
17553 // Call Java Dynamic Instruction
17554 // Note: If this code changes, the corresponding ret_addr_offset() and
17555 //       compute_padding() functions will have to be adjusted.
17556 instruct CallDynamicJavaDirect(method meth)
17557 %{
17558   match(CallDynamicJava);
17559   effect(USE meth);
17560 
17561   ins_cost(300);
17562   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17563             "call,dynamic " %}
17564   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17565   ins_pipe(pipe_slow);
17566   ins_alignment(4);
17567 %}
17568 
17569 // Call Runtime Instruction
17570 instruct CallRuntimeDirect(method meth)
17571 %{
17572   match(CallRuntime);
17573   effect(USE meth);
17574 
17575   ins_cost(300);
17576   format %{ "call,runtime " %}
17577   ins_encode(clear_avx, Java_To_Runtime(meth));
17578   ins_pipe(pipe_slow);
17579 %}
17580 
17581 // Call runtime without safepoint
17582 instruct CallLeafDirect(method meth)
17583 %{
17584   match(CallLeaf);
17585   effect(USE meth);
17586 
17587   ins_cost(300);
17588   format %{ "call_leaf,runtime " %}
17589   ins_encode(clear_avx, Java_To_Runtime(meth));
17590   ins_pipe(pipe_slow);
17591 %}
17592 
17593 // Call runtime without safepoint and with vector arguments
17594 instruct CallLeafDirectVector(method meth)
17595 %{
17596   match(CallLeafVector);
17597   effect(USE meth);
17598 
17599   ins_cost(300);
17600   format %{ "call_leaf,vector " %}
17601   ins_encode(Java_To_Runtime(meth));
17602   ins_pipe(pipe_slow);
17603 %}
17604 
17605 // Call runtime without safepoint
17606 // entry point is null, target holds the address to call
17607 instruct CallLeafNoFPInDirect(rRegP target)
17608 %{
17609   predicate(n->as_Call()->entry_point() == nullptr);
17610   match(CallLeafNoFP target);
17611 
17612   ins_cost(300);
17613   format %{ "call_leaf_nofp,runtime indirect " %}
17614   ins_encode %{
17615      __ call($target$$Register);
17616   %}
17617 
17618   ins_pipe(pipe_slow);
17619 %}
17620 
17621 // Call runtime without safepoint
17622 instruct CallLeafNoFPDirect(method meth)
17623 %{
17624   predicate(n->as_Call()->entry_point() != nullptr);
17625   match(CallLeafNoFP);
17626   effect(USE meth);
17627 
17628   ins_cost(300);
17629   format %{ "call_leaf_nofp,runtime " %}
17630   ins_encode(clear_avx, Java_To_Runtime(meth));
17631   ins_pipe(pipe_slow);
17632 %}
17633 
17634 // Return Instruction
17635 // Remove the return address & jump to it.
17636 // Notice: We always emit a nop after a ret to make sure there is room
17637 // for safepoint patching
17638 instruct Ret()
17639 %{
17640   match(Return);
17641 
17642   format %{ "ret" %}
17643   ins_encode %{
17644     __ ret(0);
17645   %}
17646   ins_pipe(pipe_jmp);
17647 %}
17648 
17649 // Tail Call; Jump from runtime stub to Java code.
17650 // Also known as an 'interprocedural jump'.
17651 // Target of jump will eventually return to caller.
17652 // TailJump below removes the return address.
17653 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17654 // emitted just above the TailCall which has reset rbp to the caller state.
17655 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17656 %{
17657   match(TailCall jump_target method_ptr);
17658 
17659   ins_cost(300);
17660   format %{ "jmp     $jump_target\t# rbx holds method" %}
17661   ins_encode %{
17662     __ jmp($jump_target$$Register);
17663   %}
17664   ins_pipe(pipe_jmp);
17665 %}
17666 
17667 // Tail Jump; remove the return address; jump to target.
17668 // TailCall above leaves the return address around.
17669 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17670 %{
17671   match(TailJump jump_target ex_oop);
17672 
17673   ins_cost(300);
17674   format %{ "popq    rdx\t# pop return address\n\t"
17675             "jmp     $jump_target" %}
17676   ins_encode %{
17677     __ popq(as_Register(RDX_enc));
17678     __ jmp($jump_target$$Register);
17679   %}
17680   ins_pipe(pipe_jmp);
17681 %}
17682 
17683 // Forward exception.
17684 instruct ForwardExceptionjmp()
17685 %{
17686   match(ForwardException);
17687 
17688   format %{ "jmp     forward_exception_stub" %}
17689   ins_encode %{
17690     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17691   %}
17692   ins_pipe(pipe_jmp);
17693 %}
17694 
17695 // Create exception oop: created by stack-crawling runtime code.
17696 // Created exception is now available to this handler, and is setup
17697 // just prior to jumping to this handler.  No code emitted.
17698 instruct CreateException(rax_RegP ex_oop)
17699 %{
17700   match(Set ex_oop (CreateEx));
17701 
17702   size(0);
17703   // use the following format syntax
17704   format %{ "# exception oop is in rax; no code emitted" %}
17705   ins_encode();
17706   ins_pipe(empty);
17707 %}
17708 
17709 // Rethrow exception:
17710 // The exception oop will come in the first argument position.
17711 // Then JUMP (not call) to the rethrow stub code.
17712 instruct RethrowException()
17713 %{
17714   match(Rethrow);
17715 
17716   // use the following format syntax
17717   format %{ "jmp     rethrow_stub" %}
17718   ins_encode %{
17719     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17720   %}
17721   ins_pipe(pipe_jmp);
17722 %}
17723 
17724 // ============================================================================
17725 // This name is KNOWN by the ADLC and cannot be changed.
17726 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17727 // for this guy.
17728 instruct tlsLoadP(r15_RegP dst) %{
17729   match(Set dst (ThreadLocal));
17730   effect(DEF dst);
17731 
17732   size(0);
17733   format %{ "# TLS is in R15" %}
17734   ins_encode( /*empty encoding*/ );
17735   ins_pipe(ialu_reg_reg);
17736 %}
17737 
17738 instruct addF_reg(regF dst, regF src) %{
17739   predicate(UseAVX == 0);
17740   match(Set dst (AddF dst src));
17741 
17742   format %{ "addss   $dst, $src" %}
17743   ins_cost(150);
17744   ins_encode %{
17745     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17746   %}
17747   ins_pipe(pipe_slow);
17748 %}
17749 
17750 instruct addF_mem(regF dst, memory src) %{
17751   predicate(UseAVX == 0);
17752   match(Set dst (AddF dst (LoadF src)));
17753 
17754   format %{ "addss   $dst, $src" %}
17755   ins_cost(150);
17756   ins_encode %{
17757     __ addss($dst$$XMMRegister, $src$$Address);
17758   %}
17759   ins_pipe(pipe_slow);
17760 %}
17761 
17762 instruct addF_imm(regF dst, immF con) %{
17763   predicate(UseAVX == 0);
17764   match(Set dst (AddF dst con));
17765   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17766   ins_cost(150);
17767   ins_encode %{
17768     __ addss($dst$$XMMRegister, $constantaddress($con));
17769   %}
17770   ins_pipe(pipe_slow);
17771 %}
17772 
17773 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17774   predicate(UseAVX > 0);
17775   match(Set dst (AddF src1 src2));
17776 
17777   format %{ "vaddss  $dst, $src1, $src2" %}
17778   ins_cost(150);
17779   ins_encode %{
17780     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17781   %}
17782   ins_pipe(pipe_slow);
17783 %}
17784 
17785 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17786   predicate(UseAVX > 0);
17787   match(Set dst (AddF src1 (LoadF src2)));
17788 
17789   format %{ "vaddss  $dst, $src1, $src2" %}
17790   ins_cost(150);
17791   ins_encode %{
17792     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17793   %}
17794   ins_pipe(pipe_slow);
17795 %}
17796 
17797 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17798   predicate(UseAVX > 0);
17799   match(Set dst (AddF src con));
17800 
17801   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17802   ins_cost(150);
17803   ins_encode %{
17804     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17805   %}
17806   ins_pipe(pipe_slow);
17807 %}
17808 
17809 instruct addD_reg(regD dst, regD src) %{
17810   predicate(UseAVX == 0);
17811   match(Set dst (AddD dst src));
17812 
17813   format %{ "addsd   $dst, $src" %}
17814   ins_cost(150);
17815   ins_encode %{
17816     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17817   %}
17818   ins_pipe(pipe_slow);
17819 %}
17820 
17821 instruct addD_mem(regD dst, memory src) %{
17822   predicate(UseAVX == 0);
17823   match(Set dst (AddD dst (LoadD src)));
17824 
17825   format %{ "addsd   $dst, $src" %}
17826   ins_cost(150);
17827   ins_encode %{
17828     __ addsd($dst$$XMMRegister, $src$$Address);
17829   %}
17830   ins_pipe(pipe_slow);
17831 %}
17832 
17833 instruct addD_imm(regD dst, immD con) %{
17834   predicate(UseAVX == 0);
17835   match(Set dst (AddD dst con));
17836   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17837   ins_cost(150);
17838   ins_encode %{
17839     __ addsd($dst$$XMMRegister, $constantaddress($con));
17840   %}
17841   ins_pipe(pipe_slow);
17842 %}
17843 
17844 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17845   predicate(UseAVX > 0);
17846   match(Set dst (AddD src1 src2));
17847 
17848   format %{ "vaddsd  $dst, $src1, $src2" %}
17849   ins_cost(150);
17850   ins_encode %{
17851     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17852   %}
17853   ins_pipe(pipe_slow);
17854 %}
17855 
17856 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17857   predicate(UseAVX > 0);
17858   match(Set dst (AddD src1 (LoadD src2)));
17859 
17860   format %{ "vaddsd  $dst, $src1, $src2" %}
17861   ins_cost(150);
17862   ins_encode %{
17863     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17864   %}
17865   ins_pipe(pipe_slow);
17866 %}
17867 
17868 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17869   predicate(UseAVX > 0);
17870   match(Set dst (AddD src con));
17871 
17872   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17873   ins_cost(150);
17874   ins_encode %{
17875     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17876   %}
17877   ins_pipe(pipe_slow);
17878 %}
17879 
17880 instruct subF_reg(regF dst, regF src) %{
17881   predicate(UseAVX == 0);
17882   match(Set dst (SubF dst src));
17883 
17884   format %{ "subss   $dst, $src" %}
17885   ins_cost(150);
17886   ins_encode %{
17887     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17888   %}
17889   ins_pipe(pipe_slow);
17890 %}
17891 
17892 instruct subF_mem(regF dst, memory src) %{
17893   predicate(UseAVX == 0);
17894   match(Set dst (SubF dst (LoadF src)));
17895 
17896   format %{ "subss   $dst, $src" %}
17897   ins_cost(150);
17898   ins_encode %{
17899     __ subss($dst$$XMMRegister, $src$$Address);
17900   %}
17901   ins_pipe(pipe_slow);
17902 %}
17903 
17904 instruct subF_imm(regF dst, immF con) %{
17905   predicate(UseAVX == 0);
17906   match(Set dst (SubF dst con));
17907   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17908   ins_cost(150);
17909   ins_encode %{
17910     __ subss($dst$$XMMRegister, $constantaddress($con));
17911   %}
17912   ins_pipe(pipe_slow);
17913 %}
17914 
17915 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17916   predicate(UseAVX > 0);
17917   match(Set dst (SubF src1 src2));
17918 
17919   format %{ "vsubss  $dst, $src1, $src2" %}
17920   ins_cost(150);
17921   ins_encode %{
17922     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17923   %}
17924   ins_pipe(pipe_slow);
17925 %}
17926 
17927 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17928   predicate(UseAVX > 0);
17929   match(Set dst (SubF src1 (LoadF src2)));
17930 
17931   format %{ "vsubss  $dst, $src1, $src2" %}
17932   ins_cost(150);
17933   ins_encode %{
17934     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17935   %}
17936   ins_pipe(pipe_slow);
17937 %}
17938 
17939 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17940   predicate(UseAVX > 0);
17941   match(Set dst (SubF src con));
17942 
17943   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17944   ins_cost(150);
17945   ins_encode %{
17946     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17947   %}
17948   ins_pipe(pipe_slow);
17949 %}
17950 
17951 instruct subD_reg(regD dst, regD src) %{
17952   predicate(UseAVX == 0);
17953   match(Set dst (SubD dst src));
17954 
17955   format %{ "subsd   $dst, $src" %}
17956   ins_cost(150);
17957   ins_encode %{
17958     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17959   %}
17960   ins_pipe(pipe_slow);
17961 %}
17962 
17963 instruct subD_mem(regD dst, memory src) %{
17964   predicate(UseAVX == 0);
17965   match(Set dst (SubD dst (LoadD src)));
17966 
17967   format %{ "subsd   $dst, $src" %}
17968   ins_cost(150);
17969   ins_encode %{
17970     __ subsd($dst$$XMMRegister, $src$$Address);
17971   %}
17972   ins_pipe(pipe_slow);
17973 %}
17974 
17975 instruct subD_imm(regD dst, immD con) %{
17976   predicate(UseAVX == 0);
17977   match(Set dst (SubD dst con));
17978   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17979   ins_cost(150);
17980   ins_encode %{
17981     __ subsd($dst$$XMMRegister, $constantaddress($con));
17982   %}
17983   ins_pipe(pipe_slow);
17984 %}
17985 
17986 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17987   predicate(UseAVX > 0);
17988   match(Set dst (SubD src1 src2));
17989 
17990   format %{ "vsubsd  $dst, $src1, $src2" %}
17991   ins_cost(150);
17992   ins_encode %{
17993     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17994   %}
17995   ins_pipe(pipe_slow);
17996 %}
17997 
17998 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17999   predicate(UseAVX > 0);
18000   match(Set dst (SubD src1 (LoadD src2)));
18001 
18002   format %{ "vsubsd  $dst, $src1, $src2" %}
18003   ins_cost(150);
18004   ins_encode %{
18005     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18006   %}
18007   ins_pipe(pipe_slow);
18008 %}
18009 
18010 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18011   predicate(UseAVX > 0);
18012   match(Set dst (SubD src con));
18013 
18014   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18015   ins_cost(150);
18016   ins_encode %{
18017     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18018   %}
18019   ins_pipe(pipe_slow);
18020 %}
18021 
18022 instruct mulF_reg(regF dst, regF src) %{
18023   predicate(UseAVX == 0);
18024   match(Set dst (MulF dst src));
18025 
18026   format %{ "mulss   $dst, $src" %}
18027   ins_cost(150);
18028   ins_encode %{
18029     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18030   %}
18031   ins_pipe(pipe_slow);
18032 %}
18033 
18034 instruct mulF_mem(regF dst, memory src) %{
18035   predicate(UseAVX == 0);
18036   match(Set dst (MulF dst (LoadF src)));
18037 
18038   format %{ "mulss   $dst, $src" %}
18039   ins_cost(150);
18040   ins_encode %{
18041     __ mulss($dst$$XMMRegister, $src$$Address);
18042   %}
18043   ins_pipe(pipe_slow);
18044 %}
18045 
18046 instruct mulF_imm(regF dst, immF con) %{
18047   predicate(UseAVX == 0);
18048   match(Set dst (MulF dst con));
18049   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18050   ins_cost(150);
18051   ins_encode %{
18052     __ mulss($dst$$XMMRegister, $constantaddress($con));
18053   %}
18054   ins_pipe(pipe_slow);
18055 %}
18056 
18057 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18058   predicate(UseAVX > 0);
18059   match(Set dst (MulF src1 src2));
18060 
18061   format %{ "vmulss  $dst, $src1, $src2" %}
18062   ins_cost(150);
18063   ins_encode %{
18064     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18065   %}
18066   ins_pipe(pipe_slow);
18067 %}
18068 
18069 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18070   predicate(UseAVX > 0);
18071   match(Set dst (MulF src1 (LoadF src2)));
18072 
18073   format %{ "vmulss  $dst, $src1, $src2" %}
18074   ins_cost(150);
18075   ins_encode %{
18076     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18077   %}
18078   ins_pipe(pipe_slow);
18079 %}
18080 
18081 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18082   predicate(UseAVX > 0);
18083   match(Set dst (MulF src con));
18084 
18085   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18086   ins_cost(150);
18087   ins_encode %{
18088     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18089   %}
18090   ins_pipe(pipe_slow);
18091 %}
18092 
18093 instruct mulD_reg(regD dst, regD src) %{
18094   predicate(UseAVX == 0);
18095   match(Set dst (MulD dst src));
18096 
18097   format %{ "mulsd   $dst, $src" %}
18098   ins_cost(150);
18099   ins_encode %{
18100     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18101   %}
18102   ins_pipe(pipe_slow);
18103 %}
18104 
18105 instruct mulD_mem(regD dst, memory src) %{
18106   predicate(UseAVX == 0);
18107   match(Set dst (MulD dst (LoadD src)));
18108 
18109   format %{ "mulsd   $dst, $src" %}
18110   ins_cost(150);
18111   ins_encode %{
18112     __ mulsd($dst$$XMMRegister, $src$$Address);
18113   %}
18114   ins_pipe(pipe_slow);
18115 %}
18116 
18117 instruct mulD_imm(regD dst, immD con) %{
18118   predicate(UseAVX == 0);
18119   match(Set dst (MulD dst con));
18120   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18121   ins_cost(150);
18122   ins_encode %{
18123     __ mulsd($dst$$XMMRegister, $constantaddress($con));
18124   %}
18125   ins_pipe(pipe_slow);
18126 %}
18127 
18128 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18129   predicate(UseAVX > 0);
18130   match(Set dst (MulD src1 src2));
18131 
18132   format %{ "vmulsd  $dst, $src1, $src2" %}
18133   ins_cost(150);
18134   ins_encode %{
18135     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18136   %}
18137   ins_pipe(pipe_slow);
18138 %}
18139 
18140 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18141   predicate(UseAVX > 0);
18142   match(Set dst (MulD src1 (LoadD src2)));
18143 
18144   format %{ "vmulsd  $dst, $src1, $src2" %}
18145   ins_cost(150);
18146   ins_encode %{
18147     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18148   %}
18149   ins_pipe(pipe_slow);
18150 %}
18151 
18152 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18153   predicate(UseAVX > 0);
18154   match(Set dst (MulD src con));
18155 
18156   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18157   ins_cost(150);
18158   ins_encode %{
18159     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18160   %}
18161   ins_pipe(pipe_slow);
18162 %}
18163 
18164 instruct divF_reg(regF dst, regF src) %{
18165   predicate(UseAVX == 0);
18166   match(Set dst (DivF dst src));
18167 
18168   format %{ "divss   $dst, $src" %}
18169   ins_cost(150);
18170   ins_encode %{
18171     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18172   %}
18173   ins_pipe(pipe_slow);
18174 %}
18175 
18176 instruct divF_mem(regF dst, memory src) %{
18177   predicate(UseAVX == 0);
18178   match(Set dst (DivF dst (LoadF src)));
18179 
18180   format %{ "divss   $dst, $src" %}
18181   ins_cost(150);
18182   ins_encode %{
18183     __ divss($dst$$XMMRegister, $src$$Address);
18184   %}
18185   ins_pipe(pipe_slow);
18186 %}
18187 
18188 instruct divF_imm(regF dst, immF con) %{
18189   predicate(UseAVX == 0);
18190   match(Set dst (DivF dst con));
18191   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18192   ins_cost(150);
18193   ins_encode %{
18194     __ divss($dst$$XMMRegister, $constantaddress($con));
18195   %}
18196   ins_pipe(pipe_slow);
18197 %}
18198 
18199 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18200   predicate(UseAVX > 0);
18201   match(Set dst (DivF src1 src2));
18202 
18203   format %{ "vdivss  $dst, $src1, $src2" %}
18204   ins_cost(150);
18205   ins_encode %{
18206     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18207   %}
18208   ins_pipe(pipe_slow);
18209 %}
18210 
18211 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18212   predicate(UseAVX > 0);
18213   match(Set dst (DivF src1 (LoadF src2)));
18214 
18215   format %{ "vdivss  $dst, $src1, $src2" %}
18216   ins_cost(150);
18217   ins_encode %{
18218     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18219   %}
18220   ins_pipe(pipe_slow);
18221 %}
18222 
18223 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18224   predicate(UseAVX > 0);
18225   match(Set dst (DivF src con));
18226 
18227   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18228   ins_cost(150);
18229   ins_encode %{
18230     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18231   %}
18232   ins_pipe(pipe_slow);
18233 %}
18234 
18235 instruct divD_reg(regD dst, regD src) %{
18236   predicate(UseAVX == 0);
18237   match(Set dst (DivD dst src));
18238 
18239   format %{ "divsd   $dst, $src" %}
18240   ins_cost(150);
18241   ins_encode %{
18242     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18243   %}
18244   ins_pipe(pipe_slow);
18245 %}
18246 
18247 instruct divD_mem(regD dst, memory src) %{
18248   predicate(UseAVX == 0);
18249   match(Set dst (DivD dst (LoadD src)));
18250 
18251   format %{ "divsd   $dst, $src" %}
18252   ins_cost(150);
18253   ins_encode %{
18254     __ divsd($dst$$XMMRegister, $src$$Address);
18255   %}
18256   ins_pipe(pipe_slow);
18257 %}
18258 
18259 instruct divD_imm(regD dst, immD con) %{
18260   predicate(UseAVX == 0);
18261   match(Set dst (DivD dst con));
18262   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18263   ins_cost(150);
18264   ins_encode %{
18265     __ divsd($dst$$XMMRegister, $constantaddress($con));
18266   %}
18267   ins_pipe(pipe_slow);
18268 %}
18269 
18270 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18271   predicate(UseAVX > 0);
18272   match(Set dst (DivD src1 src2));
18273 
18274   format %{ "vdivsd  $dst, $src1, $src2" %}
18275   ins_cost(150);
18276   ins_encode %{
18277     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18278   %}
18279   ins_pipe(pipe_slow);
18280 %}
18281 
18282 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18283   predicate(UseAVX > 0);
18284   match(Set dst (DivD src1 (LoadD src2)));
18285 
18286   format %{ "vdivsd  $dst, $src1, $src2" %}
18287   ins_cost(150);
18288   ins_encode %{
18289     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18290   %}
18291   ins_pipe(pipe_slow);
18292 %}
18293 
18294 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18295   predicate(UseAVX > 0);
18296   match(Set dst (DivD src con));
18297 
18298   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18299   ins_cost(150);
18300   ins_encode %{
18301     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18302   %}
18303   ins_pipe(pipe_slow);
18304 %}
18305 
18306 instruct absF_reg(regF dst) %{
18307   predicate(UseAVX == 0);
18308   match(Set dst (AbsF dst));
18309   ins_cost(150);
18310   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18311   ins_encode %{
18312     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18313   %}
18314   ins_pipe(pipe_slow);
18315 %}
18316 
18317 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18318   predicate(UseAVX > 0);
18319   match(Set dst (AbsF src));
18320   ins_cost(150);
18321   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18322   ins_encode %{
18323     int vlen_enc = Assembler::AVX_128bit;
18324     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18325               ExternalAddress(float_signmask()), vlen_enc);
18326   %}
18327   ins_pipe(pipe_slow);
18328 %}
18329 
18330 instruct absD_reg(regD dst) %{
18331   predicate(UseAVX == 0);
18332   match(Set dst (AbsD dst));
18333   ins_cost(150);
18334   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18335             "# abs double by sign masking" %}
18336   ins_encode %{
18337     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18338   %}
18339   ins_pipe(pipe_slow);
18340 %}
18341 
18342 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18343   predicate(UseAVX > 0);
18344   match(Set dst (AbsD src));
18345   ins_cost(150);
18346   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18347             "# abs double by sign masking" %}
18348   ins_encode %{
18349     int vlen_enc = Assembler::AVX_128bit;
18350     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18351               ExternalAddress(double_signmask()), vlen_enc);
18352   %}
18353   ins_pipe(pipe_slow);
18354 %}
18355 
18356 instruct negF_reg(regF dst) %{
18357   predicate(UseAVX == 0);
18358   match(Set dst (NegF dst));
18359   ins_cost(150);
18360   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18361   ins_encode %{
18362     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18363   %}
18364   ins_pipe(pipe_slow);
18365 %}
18366 
18367 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18368   predicate(UseAVX > 0);
18369   match(Set dst (NegF src));
18370   ins_cost(150);
18371   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18372   ins_encode %{
18373     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18374                  ExternalAddress(float_signflip()));
18375   %}
18376   ins_pipe(pipe_slow);
18377 %}
18378 
18379 instruct negD_reg(regD dst) %{
18380   predicate(UseAVX == 0);
18381   match(Set dst (NegD dst));
18382   ins_cost(150);
18383   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18384             "# neg double by sign flipping" %}
18385   ins_encode %{
18386     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18387   %}
18388   ins_pipe(pipe_slow);
18389 %}
18390 
18391 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18392   predicate(UseAVX > 0);
18393   match(Set dst (NegD src));
18394   ins_cost(150);
18395   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18396             "# neg double by sign flipping" %}
18397   ins_encode %{
18398     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18399                  ExternalAddress(double_signflip()));
18400   %}
18401   ins_pipe(pipe_slow);
18402 %}
18403 
18404 // sqrtss instruction needs destination register to be pre initialized for best performance
18405 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18406 instruct sqrtF_reg(regF dst) %{
18407   match(Set dst (SqrtF dst));
18408   format %{ "sqrtss  $dst, $dst" %}
18409   ins_encode %{
18410     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18411   %}
18412   ins_pipe(pipe_slow);
18413 %}
18414 
18415 // sqrtsd instruction needs destination register to be pre initialized for best performance
18416 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18417 instruct sqrtD_reg(regD dst) %{
18418   match(Set dst (SqrtD dst));
18419   format %{ "sqrtsd  $dst, $dst" %}
18420   ins_encode %{
18421     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18422   %}
18423   ins_pipe(pipe_slow);
18424 %}
18425 
18426 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18427   effect(TEMP tmp);
18428   match(Set dst (ConvF2HF src));
18429   ins_cost(125);
18430   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18431   ins_encode %{
18432     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18433   %}
18434   ins_pipe( pipe_slow );
18435 %}
18436 
18437 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18438   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18439   effect(TEMP ktmp, TEMP rtmp);
18440   match(Set mem (StoreC mem (ConvF2HF src)));
18441   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18442   ins_encode %{
18443     __ movl($rtmp$$Register, 0x1);
18444     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18445     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18446   %}
18447   ins_pipe( pipe_slow );
18448 %}
18449 
18450 instruct vconvF2HF(vec dst, vec src) %{
18451   match(Set dst (VectorCastF2HF src));
18452   format %{ "vector_conv_F2HF $dst $src" %}
18453   ins_encode %{
18454     int vlen_enc = vector_length_encoding(this, $src);
18455     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18456   %}
18457   ins_pipe( pipe_slow );
18458 %}
18459 
18460 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18461   predicate(n->as_StoreVector()->memory_size() >= 16);
18462   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18463   format %{ "vcvtps2ph $mem,$src" %}
18464   ins_encode %{
18465     int vlen_enc = vector_length_encoding(this, $src);
18466     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18467   %}
18468   ins_pipe( pipe_slow );
18469 %}
18470 
18471 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18472   match(Set dst (ConvHF2F src));
18473   format %{ "vcvtph2ps $dst,$src" %}
18474   ins_encode %{
18475     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18476   %}
18477   ins_pipe( pipe_slow );
18478 %}
18479 
18480 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18481   match(Set dst (VectorCastHF2F (LoadVector mem)));
18482   format %{ "vcvtph2ps $dst,$mem" %}
18483   ins_encode %{
18484     int vlen_enc = vector_length_encoding(this);
18485     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18486   %}
18487   ins_pipe( pipe_slow );
18488 %}
18489 
18490 instruct vconvHF2F(vec dst, vec src) %{
18491   match(Set dst (VectorCastHF2F src));
18492   ins_cost(125);
18493   format %{ "vector_conv_HF2F $dst,$src" %}
18494   ins_encode %{
18495     int vlen_enc = vector_length_encoding(this);
18496     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18497   %}
18498   ins_pipe( pipe_slow );
18499 %}
18500 
18501 // ---------------------------------------- VectorReinterpret ------------------------------------
18502 instruct reinterpret_mask(kReg dst) %{
18503   predicate(n->bottom_type()->isa_vectmask() &&
18504             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18505   match(Set dst (VectorReinterpret dst));
18506   ins_cost(125);
18507   format %{ "vector_reinterpret $dst\t!" %}
18508   ins_encode %{
18509     // empty
18510   %}
18511   ins_pipe( pipe_slow );
18512 %}
18513 
18514 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18515   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18516             n->bottom_type()->isa_vectmask() &&
18517             n->in(1)->bottom_type()->isa_vectmask() &&
18518             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18519             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18520   match(Set dst (VectorReinterpret src));
18521   effect(TEMP xtmp);
18522   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18523   ins_encode %{
18524      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18525      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18526      assert(src_sz == dst_sz , "src and dst size mismatch");
18527      int vlen_enc = vector_length_encoding(src_sz);
18528      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18529      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18530   %}
18531   ins_pipe( pipe_slow );
18532 %}
18533 
18534 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18535   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18536             n->bottom_type()->isa_vectmask() &&
18537             n->in(1)->bottom_type()->isa_vectmask() &&
18538             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18539              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18540             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18541   match(Set dst (VectorReinterpret src));
18542   effect(TEMP xtmp);
18543   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18544   ins_encode %{
18545      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18546      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18547      assert(src_sz == dst_sz , "src and dst size mismatch");
18548      int vlen_enc = vector_length_encoding(src_sz);
18549      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18550      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18551   %}
18552   ins_pipe( pipe_slow );
18553 %}
18554 
18555 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18556   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18557             n->bottom_type()->isa_vectmask() &&
18558             n->in(1)->bottom_type()->isa_vectmask() &&
18559             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18560              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18561             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18562   match(Set dst (VectorReinterpret src));
18563   effect(TEMP xtmp);
18564   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18565   ins_encode %{
18566      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18567      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18568      assert(src_sz == dst_sz , "src and dst size mismatch");
18569      int vlen_enc = vector_length_encoding(src_sz);
18570      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18571      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18572   %}
18573   ins_pipe( pipe_slow );
18574 %}
18575 
18576 instruct reinterpret(vec dst) %{
18577   predicate(!n->bottom_type()->isa_vectmask() &&
18578             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18579   match(Set dst (VectorReinterpret dst));
18580   ins_cost(125);
18581   format %{ "vector_reinterpret $dst\t!" %}
18582   ins_encode %{
18583     // empty
18584   %}
18585   ins_pipe( pipe_slow );
18586 %}
18587 
18588 instruct reinterpret_expand(vec dst, vec src) %{
18589   predicate(UseAVX == 0 &&
18590             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18591   match(Set dst (VectorReinterpret src));
18592   ins_cost(125);
18593   effect(TEMP dst);
18594   format %{ "vector_reinterpret_expand $dst,$src" %}
18595   ins_encode %{
18596     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18597     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18598 
18599     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18600     if (src_vlen_in_bytes == 4) {
18601       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18602     } else {
18603       assert(src_vlen_in_bytes == 8, "");
18604       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18605     }
18606     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18607   %}
18608   ins_pipe( pipe_slow );
18609 %}
18610 
18611 instruct vreinterpret_expand4(legVec dst, vec src) %{
18612   predicate(UseAVX > 0 &&
18613             !n->bottom_type()->isa_vectmask() &&
18614             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18615             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18616   match(Set dst (VectorReinterpret src));
18617   ins_cost(125);
18618   format %{ "vector_reinterpret_expand $dst,$src" %}
18619   ins_encode %{
18620     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18621   %}
18622   ins_pipe( pipe_slow );
18623 %}
18624 
18625 
18626 instruct vreinterpret_expand(legVec dst, vec src) %{
18627   predicate(UseAVX > 0 &&
18628             !n->bottom_type()->isa_vectmask() &&
18629             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18630             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18631   match(Set dst (VectorReinterpret src));
18632   ins_cost(125);
18633   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18634   ins_encode %{
18635     switch (Matcher::vector_length_in_bytes(this, $src)) {
18636       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18637       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18638       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18639       default: ShouldNotReachHere();
18640     }
18641   %}
18642   ins_pipe( pipe_slow );
18643 %}
18644 
18645 instruct reinterpret_shrink(vec dst, legVec src) %{
18646   predicate(!n->bottom_type()->isa_vectmask() &&
18647             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18648   match(Set dst (VectorReinterpret src));
18649   ins_cost(125);
18650   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18651   ins_encode %{
18652     switch (Matcher::vector_length_in_bytes(this)) {
18653       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18654       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18655       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18656       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18657       default: ShouldNotReachHere();
18658     }
18659   %}
18660   ins_pipe( pipe_slow );
18661 %}
18662 
18663 // ----------------------------------------------------------------------------------------------------
18664 
18665 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18666   match(Set dst (RoundDoubleMode src rmode));
18667   format %{ "roundsd $dst,$src" %}
18668   ins_cost(150);
18669   ins_encode %{
18670     assert(UseSSE >= 4, "required");
18671     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18672       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18673     }
18674     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18675   %}
18676   ins_pipe(pipe_slow);
18677 %}
18678 
18679 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18680   match(Set dst (RoundDoubleMode con rmode));
18681   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18682   ins_cost(150);
18683   ins_encode %{
18684     assert(UseSSE >= 4, "required");
18685     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18686   %}
18687   ins_pipe(pipe_slow);
18688 %}
18689 
18690 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18691   predicate(Matcher::vector_length(n) < 8);
18692   match(Set dst (RoundDoubleModeV src rmode));
18693   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18694   ins_encode %{
18695     assert(UseAVX > 0, "required");
18696     int vlen_enc = vector_length_encoding(this);
18697     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18698   %}
18699   ins_pipe( pipe_slow );
18700 %}
18701 
18702 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18703   predicate(Matcher::vector_length(n) == 8);
18704   match(Set dst (RoundDoubleModeV src rmode));
18705   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18706   ins_encode %{
18707     assert(UseAVX > 2, "required");
18708     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18709   %}
18710   ins_pipe( pipe_slow );
18711 %}
18712 
18713 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18714   predicate(Matcher::vector_length(n) < 8);
18715   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18716   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18717   ins_encode %{
18718     assert(UseAVX > 0, "required");
18719     int vlen_enc = vector_length_encoding(this);
18720     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18721   %}
18722   ins_pipe( pipe_slow );
18723 %}
18724 
18725 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18726   predicate(Matcher::vector_length(n) == 8);
18727   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18728   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18729   ins_encode %{
18730     assert(UseAVX > 2, "required");
18731     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18732   %}
18733   ins_pipe( pipe_slow );
18734 %}
18735 
18736 instruct onspinwait() %{
18737   match(OnSpinWait);
18738   ins_cost(200);
18739 
18740   format %{
18741     $$template
18742     $$emit$$"pause\t! membar_onspinwait"
18743   %}
18744   ins_encode %{
18745     __ pause();
18746   %}
18747   ins_pipe(pipe_slow);
18748 %}
18749 
18750 // a * b + c
18751 instruct fmaD_reg(regD a, regD b, regD c) %{
18752   match(Set c (FmaD  c (Binary a b)));
18753   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18754   ins_cost(150);
18755   ins_encode %{
18756     assert(UseFMA, "Needs FMA instructions support.");
18757     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18758   %}
18759   ins_pipe( pipe_slow );
18760 %}
18761 
18762 // a * b + c
18763 instruct fmaF_reg(regF a, regF b, regF c) %{
18764   match(Set c (FmaF  c (Binary a b)));
18765   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18766   ins_cost(150);
18767   ins_encode %{
18768     assert(UseFMA, "Needs FMA instructions support.");
18769     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18770   %}
18771   ins_pipe( pipe_slow );
18772 %}
18773 
18774 // ====================VECTOR INSTRUCTIONS=====================================
18775 
18776 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18777 instruct MoveVec2Leg(legVec dst, vec src) %{
18778   match(Set dst src);
18779   format %{ "" %}
18780   ins_encode %{
18781     ShouldNotReachHere();
18782   %}
18783   ins_pipe( fpu_reg_reg );
18784 %}
18785 
18786 instruct MoveLeg2Vec(vec dst, legVec src) %{
18787   match(Set dst src);
18788   format %{ "" %}
18789   ins_encode %{
18790     ShouldNotReachHere();
18791   %}
18792   ins_pipe( fpu_reg_reg );
18793 %}
18794 
18795 // ============================================================================
18796 
18797 // Load vectors generic operand pattern
18798 instruct loadV(vec dst, memory mem) %{
18799   match(Set dst (LoadVector mem));
18800   ins_cost(125);
18801   format %{ "load_vector $dst,$mem" %}
18802   ins_encode %{
18803     BasicType bt = Matcher::vector_element_basic_type(this);
18804     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18805   %}
18806   ins_pipe( pipe_slow );
18807 %}
18808 
18809 // Store vectors generic operand pattern.
18810 instruct storeV(memory mem, vec src) %{
18811   match(Set mem (StoreVector mem src));
18812   ins_cost(145);
18813   format %{ "store_vector $mem,$src\n\t" %}
18814   ins_encode %{
18815     switch (Matcher::vector_length_in_bytes(this, $src)) {
18816       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18817       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18818       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18819       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18820       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18821       default: ShouldNotReachHere();
18822     }
18823   %}
18824   ins_pipe( pipe_slow );
18825 %}
18826 
18827 // ---------------------------------------- Gather ------------------------------------
18828 
18829 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18830 
18831 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18832   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18833             Matcher::vector_length_in_bytes(n) <= 32);
18834   match(Set dst (LoadVectorGather mem idx));
18835   effect(TEMP dst, TEMP tmp, TEMP mask);
18836   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18837   ins_encode %{
18838     int vlen_enc = vector_length_encoding(this);
18839     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18840     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18841     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18842     __ lea($tmp$$Register, $mem$$Address);
18843     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18844   %}
18845   ins_pipe( pipe_slow );
18846 %}
18847 
18848 
18849 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18850   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18851             !is_subword_type(Matcher::vector_element_basic_type(n)));
18852   match(Set dst (LoadVectorGather mem idx));
18853   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18854   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18855   ins_encode %{
18856     int vlen_enc = vector_length_encoding(this);
18857     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18858     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18859     __ lea($tmp$$Register, $mem$$Address);
18860     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18861   %}
18862   ins_pipe( pipe_slow );
18863 %}
18864 
18865 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18866   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18867             !is_subword_type(Matcher::vector_element_basic_type(n)));
18868   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18869   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18870   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18871   ins_encode %{
18872     assert(UseAVX > 2, "sanity");
18873     int vlen_enc = vector_length_encoding(this);
18874     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18875     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18876     // Note: Since gather instruction partially updates the opmask register used
18877     // for predication hense moving mask operand to a temporary.
18878     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18879     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18880     __ lea($tmp$$Register, $mem$$Address);
18881     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18882   %}
18883   ins_pipe( pipe_slow );
18884 %}
18885 
18886 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18887   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18888   match(Set dst (LoadVectorGather mem idx_base));
18889   effect(TEMP tmp, TEMP rtmp);
18890   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18891   ins_encode %{
18892     int vlen_enc = vector_length_encoding(this);
18893     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18894     __ lea($tmp$$Register, $mem$$Address);
18895     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18896   %}
18897   ins_pipe( pipe_slow );
18898 %}
18899 
18900 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18901                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18902   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18903   match(Set dst (LoadVectorGather mem idx_base));
18904   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18905   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18906   ins_encode %{
18907     int vlen_enc = vector_length_encoding(this);
18908     int vector_len = Matcher::vector_length(this);
18909     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18910     __ lea($tmp$$Register, $mem$$Address);
18911     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18912     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18913                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18914   %}
18915   ins_pipe( pipe_slow );
18916 %}
18917 
18918 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18919   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18920   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18921   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18922   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18923   ins_encode %{
18924     int vlen_enc = vector_length_encoding(this);
18925     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18926     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18927     __ lea($tmp$$Register, $mem$$Address);
18928     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18929     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18930   %}
18931   ins_pipe( pipe_slow );
18932 %}
18933 
18934 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18935                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18936   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18937   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18938   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18939   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18940   ins_encode %{
18941     int vlen_enc = vector_length_encoding(this);
18942     int vector_len = Matcher::vector_length(this);
18943     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18944     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18945     __ lea($tmp$$Register, $mem$$Address);
18946     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18947     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18948     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18949                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18950   %}
18951   ins_pipe( pipe_slow );
18952 %}
18953 
18954 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18955   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18956   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18957   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18958   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18959   ins_encode %{
18960     int vlen_enc = vector_length_encoding(this);
18961     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18962     __ lea($tmp$$Register, $mem$$Address);
18963     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18964     if (elem_bt == T_SHORT) {
18965       __ movl($mask_idx$$Register, 0x55555555);
18966       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18967     }
18968     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18969     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18970   %}
18971   ins_pipe( pipe_slow );
18972 %}
18973 
18974 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18975                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18976   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18977   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18978   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18979   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18980   ins_encode %{
18981     int vlen_enc = vector_length_encoding(this);
18982     int vector_len = Matcher::vector_length(this);
18983     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18984     __ lea($tmp$$Register, $mem$$Address);
18985     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18986     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18987     if (elem_bt == T_SHORT) {
18988       __ movl($mask_idx$$Register, 0x55555555);
18989       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18990     }
18991     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18992     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18993                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18994   %}
18995   ins_pipe( pipe_slow );
18996 %}
18997 
18998 // ====================Scatter=======================================
18999 
19000 // Scatter INT, LONG, FLOAT, DOUBLE
19001 
19002 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19003   predicate(UseAVX > 2);
19004   match(Set mem (StoreVectorScatter mem (Binary src idx)));
19005   effect(TEMP tmp, TEMP ktmp);
19006   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19007   ins_encode %{
19008     int vlen_enc = vector_length_encoding(this, $src);
19009     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19010 
19011     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19012     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19013 
19014     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19015     __ lea($tmp$$Register, $mem$$Address);
19016     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19017   %}
19018   ins_pipe( pipe_slow );
19019 %}
19020 
19021 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19022   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19023   effect(TEMP tmp, TEMP ktmp);
19024   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19025   ins_encode %{
19026     int vlen_enc = vector_length_encoding(this, $src);
19027     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19028     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19029     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19030     // Note: Since scatter instruction partially updates the opmask register used
19031     // for predication hense moving mask operand to a temporary.
19032     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19033     __ lea($tmp$$Register, $mem$$Address);
19034     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19035   %}
19036   ins_pipe( pipe_slow );
19037 %}
19038 
19039 // ====================REPLICATE=======================================
19040 
19041 // Replicate byte scalar to be vector
19042 instruct vReplB_reg(vec dst, rRegI src) %{
19043   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19044   match(Set dst (Replicate src));
19045   format %{ "replicateB $dst,$src" %}
19046   ins_encode %{
19047     uint vlen = Matcher::vector_length(this);
19048     if (UseAVX >= 2) {
19049       int vlen_enc = vector_length_encoding(this);
19050       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19051         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19052         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19053       } else {
19054         __ movdl($dst$$XMMRegister, $src$$Register);
19055         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19056       }
19057     } else {
19058        assert(UseAVX < 2, "");
19059       __ movdl($dst$$XMMRegister, $src$$Register);
19060       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19061       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19062       if (vlen >= 16) {
19063         assert(vlen == 16, "");
19064         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19065       }
19066     }
19067   %}
19068   ins_pipe( pipe_slow );
19069 %}
19070 
19071 instruct ReplB_mem(vec dst, memory mem) %{
19072   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19073   match(Set dst (Replicate (LoadB mem)));
19074   format %{ "replicateB $dst,$mem" %}
19075   ins_encode %{
19076     int vlen_enc = vector_length_encoding(this);
19077     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19078   %}
19079   ins_pipe( pipe_slow );
19080 %}
19081 
19082 // ====================ReplicateS=======================================
19083 
19084 instruct vReplS_reg(vec dst, rRegI src) %{
19085   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19086   match(Set dst (Replicate src));
19087   format %{ "replicateS $dst,$src" %}
19088   ins_encode %{
19089     uint vlen = Matcher::vector_length(this);
19090     int vlen_enc = vector_length_encoding(this);
19091     if (UseAVX >= 2) {
19092       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19093         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19094         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19095       } else {
19096         __ movdl($dst$$XMMRegister, $src$$Register);
19097         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19098       }
19099     } else {
19100       assert(UseAVX < 2, "");
19101       __ movdl($dst$$XMMRegister, $src$$Register);
19102       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19103       if (vlen >= 8) {
19104         assert(vlen == 8, "");
19105         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19106       }
19107     }
19108   %}
19109   ins_pipe( pipe_slow );
19110 %}
19111 
19112 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19113   match(Set dst (Replicate con));
19114   effect(TEMP rtmp);
19115   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19116   ins_encode %{
19117     int vlen_enc = vector_length_encoding(this);
19118     BasicType bt = Matcher::vector_element_basic_type(this);
19119     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19120     __ movl($rtmp$$Register, $con$$constant);
19121     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19122   %}
19123   ins_pipe( pipe_slow );
19124 %}
19125 
19126 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19127   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19128   match(Set dst (Replicate src));
19129   effect(TEMP rtmp);
19130   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19131   ins_encode %{
19132     int vlen_enc = vector_length_encoding(this);
19133     __ vmovw($rtmp$$Register, $src$$XMMRegister);
19134     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19135   %}
19136   ins_pipe( pipe_slow );
19137 %}
19138 
19139 instruct ReplS_mem(vec dst, memory mem) %{
19140   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19141   match(Set dst (Replicate (LoadS mem)));
19142   format %{ "replicateS $dst,$mem" %}
19143   ins_encode %{
19144     int vlen_enc = vector_length_encoding(this);
19145     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19146   %}
19147   ins_pipe( pipe_slow );
19148 %}
19149 
19150 // ====================ReplicateI=======================================
19151 
19152 instruct ReplI_reg(vec dst, rRegI src) %{
19153   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19154   match(Set dst (Replicate src));
19155   format %{ "replicateI $dst,$src" %}
19156   ins_encode %{
19157     uint vlen = Matcher::vector_length(this);
19158     int vlen_enc = vector_length_encoding(this);
19159     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19160       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19161     } else if (VM_Version::supports_avx2()) {
19162       __ movdl($dst$$XMMRegister, $src$$Register);
19163       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19164     } else {
19165       __ movdl($dst$$XMMRegister, $src$$Register);
19166       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19167     }
19168   %}
19169   ins_pipe( pipe_slow );
19170 %}
19171 
19172 instruct ReplI_mem(vec dst, memory mem) %{
19173   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19174   match(Set dst (Replicate (LoadI mem)));
19175   format %{ "replicateI $dst,$mem" %}
19176   ins_encode %{
19177     int vlen_enc = vector_length_encoding(this);
19178     if (VM_Version::supports_avx2()) {
19179       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19180     } else if (VM_Version::supports_avx()) {
19181       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19182     } else {
19183       __ movdl($dst$$XMMRegister, $mem$$Address);
19184       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19185     }
19186   %}
19187   ins_pipe( pipe_slow );
19188 %}
19189 
19190 instruct ReplI_imm(vec dst, immI con) %{
19191   predicate(Matcher::is_non_long_integral_vector(n));
19192   match(Set dst (Replicate con));
19193   format %{ "replicateI $dst,$con" %}
19194   ins_encode %{
19195     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19196                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19197                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19198     BasicType bt = Matcher::vector_element_basic_type(this);
19199     int vlen = Matcher::vector_length_in_bytes(this);
19200     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19201   %}
19202   ins_pipe( pipe_slow );
19203 %}
19204 
19205 // Replicate scalar zero to be vector
19206 instruct ReplI_zero(vec dst, immI_0 zero) %{
19207   predicate(Matcher::is_non_long_integral_vector(n));
19208   match(Set dst (Replicate zero));
19209   format %{ "replicateI $dst,$zero" %}
19210   ins_encode %{
19211     int vlen_enc = vector_length_encoding(this);
19212     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19213       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19214     } else {
19215       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19216     }
19217   %}
19218   ins_pipe( fpu_reg_reg );
19219 %}
19220 
19221 instruct ReplI_M1(vec dst, immI_M1 con) %{
19222   predicate(Matcher::is_non_long_integral_vector(n));
19223   match(Set dst (Replicate con));
19224   format %{ "vallones $dst" %}
19225   ins_encode %{
19226     int vector_len = vector_length_encoding(this);
19227     __ vallones($dst$$XMMRegister, vector_len);
19228   %}
19229   ins_pipe( pipe_slow );
19230 %}
19231 
19232 // ====================ReplicateL=======================================
19233 
19234 // Replicate long (8 byte) scalar to be vector
19235 instruct ReplL_reg(vec dst, rRegL src) %{
19236   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19237   match(Set dst (Replicate src));
19238   format %{ "replicateL $dst,$src" %}
19239   ins_encode %{
19240     int vlen = Matcher::vector_length(this);
19241     int vlen_enc = vector_length_encoding(this);
19242     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19243       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19244     } else if (VM_Version::supports_avx2()) {
19245       __ movdq($dst$$XMMRegister, $src$$Register);
19246       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19247     } else {
19248       __ movdq($dst$$XMMRegister, $src$$Register);
19249       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19250     }
19251   %}
19252   ins_pipe( pipe_slow );
19253 %}
19254 
19255 instruct ReplL_mem(vec dst, memory mem) %{
19256   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19257   match(Set dst (Replicate (LoadL mem)));
19258   format %{ "replicateL $dst,$mem" %}
19259   ins_encode %{
19260     int vlen_enc = vector_length_encoding(this);
19261     if (VM_Version::supports_avx2()) {
19262       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19263     } else if (VM_Version::supports_sse3()) {
19264       __ movddup($dst$$XMMRegister, $mem$$Address);
19265     } else {
19266       __ movq($dst$$XMMRegister, $mem$$Address);
19267       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19268     }
19269   %}
19270   ins_pipe( pipe_slow );
19271 %}
19272 
19273 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19274 instruct ReplL_imm(vec dst, immL con) %{
19275   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19276   match(Set dst (Replicate con));
19277   format %{ "replicateL $dst,$con" %}
19278   ins_encode %{
19279     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19280     int vlen = Matcher::vector_length_in_bytes(this);
19281     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19282   %}
19283   ins_pipe( pipe_slow );
19284 %}
19285 
19286 instruct ReplL_zero(vec dst, immL0 zero) %{
19287   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19288   match(Set dst (Replicate zero));
19289   format %{ "replicateL $dst,$zero" %}
19290   ins_encode %{
19291     int vlen_enc = vector_length_encoding(this);
19292     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19293       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19294     } else {
19295       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19296     }
19297   %}
19298   ins_pipe( fpu_reg_reg );
19299 %}
19300 
19301 instruct ReplL_M1(vec dst, immL_M1 con) %{
19302   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19303   match(Set dst (Replicate con));
19304   format %{ "vallones $dst" %}
19305   ins_encode %{
19306     int vector_len = vector_length_encoding(this);
19307     __ vallones($dst$$XMMRegister, vector_len);
19308   %}
19309   ins_pipe( pipe_slow );
19310 %}
19311 
19312 // ====================ReplicateF=======================================
19313 
19314 instruct vReplF_reg(vec dst, vlRegF src) %{
19315   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19316   match(Set dst (Replicate src));
19317   format %{ "replicateF $dst,$src" %}
19318   ins_encode %{
19319     uint vlen = Matcher::vector_length(this);
19320     int vlen_enc = vector_length_encoding(this);
19321     if (vlen <= 4) {
19322       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19323     } else if (VM_Version::supports_avx2()) {
19324       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19325     } else {
19326       assert(vlen == 8, "sanity");
19327       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19328       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19329     }
19330   %}
19331   ins_pipe( pipe_slow );
19332 %}
19333 
19334 instruct ReplF_reg(vec dst, vlRegF src) %{
19335   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19336   match(Set dst (Replicate src));
19337   format %{ "replicateF $dst,$src" %}
19338   ins_encode %{
19339     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19340   %}
19341   ins_pipe( pipe_slow );
19342 %}
19343 
19344 instruct ReplF_mem(vec dst, memory mem) %{
19345   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19346   match(Set dst (Replicate (LoadF mem)));
19347   format %{ "replicateF $dst,$mem" %}
19348   ins_encode %{
19349     int vlen_enc = vector_length_encoding(this);
19350     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19351   %}
19352   ins_pipe( pipe_slow );
19353 %}
19354 
19355 // Replicate float scalar immediate to be vector by loading from const table.
19356 instruct ReplF_imm(vec dst, immF con) %{
19357   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19358   match(Set dst (Replicate con));
19359   format %{ "replicateF $dst,$con" %}
19360   ins_encode %{
19361     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19362                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19363     int vlen = Matcher::vector_length_in_bytes(this);
19364     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19365   %}
19366   ins_pipe( pipe_slow );
19367 %}
19368 
19369 instruct ReplF_zero(vec dst, immF0 zero) %{
19370   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19371   match(Set dst (Replicate zero));
19372   format %{ "replicateF $dst,$zero" %}
19373   ins_encode %{
19374     int vlen_enc = vector_length_encoding(this);
19375     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19376       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19377     } else {
19378       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19379     }
19380   %}
19381   ins_pipe( fpu_reg_reg );
19382 %}
19383 
19384 // ====================ReplicateD=======================================
19385 
19386 // Replicate double (8 bytes) scalar to be vector
19387 instruct vReplD_reg(vec dst, vlRegD src) %{
19388   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19389   match(Set dst (Replicate src));
19390   format %{ "replicateD $dst,$src" %}
19391   ins_encode %{
19392     uint vlen = Matcher::vector_length(this);
19393     int vlen_enc = vector_length_encoding(this);
19394     if (vlen <= 2) {
19395       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19396     } else if (VM_Version::supports_avx2()) {
19397       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19398     } else {
19399       assert(vlen == 4, "sanity");
19400       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19401       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19402     }
19403   %}
19404   ins_pipe( pipe_slow );
19405 %}
19406 
19407 instruct ReplD_reg(vec dst, vlRegD src) %{
19408   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19409   match(Set dst (Replicate src));
19410   format %{ "replicateD $dst,$src" %}
19411   ins_encode %{
19412     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19413   %}
19414   ins_pipe( pipe_slow );
19415 %}
19416 
19417 instruct ReplD_mem(vec dst, memory mem) %{
19418   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19419   match(Set dst (Replicate (LoadD mem)));
19420   format %{ "replicateD $dst,$mem" %}
19421   ins_encode %{
19422     if (Matcher::vector_length(this) >= 4) {
19423       int vlen_enc = vector_length_encoding(this);
19424       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19425     } else {
19426       __ movddup($dst$$XMMRegister, $mem$$Address);
19427     }
19428   %}
19429   ins_pipe( pipe_slow );
19430 %}
19431 
19432 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19433 instruct ReplD_imm(vec dst, immD con) %{
19434   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19435   match(Set dst (Replicate con));
19436   format %{ "replicateD $dst,$con" %}
19437   ins_encode %{
19438     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19439     int vlen = Matcher::vector_length_in_bytes(this);
19440     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19441   %}
19442   ins_pipe( pipe_slow );
19443 %}
19444 
19445 instruct ReplD_zero(vec dst, immD0 zero) %{
19446   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19447   match(Set dst (Replicate zero));
19448   format %{ "replicateD $dst,$zero" %}
19449   ins_encode %{
19450     int vlen_enc = vector_length_encoding(this);
19451     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19452       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19453     } else {
19454       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19455     }
19456   %}
19457   ins_pipe( fpu_reg_reg );
19458 %}
19459 
19460 // ====================VECTOR INSERT=======================================
19461 
19462 instruct insert(vec dst, rRegI val, immU8 idx) %{
19463   predicate(Matcher::vector_length_in_bytes(n) < 32);
19464   match(Set dst (VectorInsert (Binary dst val) idx));
19465   format %{ "vector_insert $dst,$val,$idx" %}
19466   ins_encode %{
19467     assert(UseSSE >= 4, "required");
19468     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19469 
19470     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19471 
19472     assert(is_integral_type(elem_bt), "");
19473     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19474 
19475     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19476   %}
19477   ins_pipe( pipe_slow );
19478 %}
19479 
19480 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19481   predicate(Matcher::vector_length_in_bytes(n) == 32);
19482   match(Set dst (VectorInsert (Binary src val) idx));
19483   effect(TEMP vtmp);
19484   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19485   ins_encode %{
19486     int vlen_enc = Assembler::AVX_256bit;
19487     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19488     int elem_per_lane = 16/type2aelembytes(elem_bt);
19489     int log2epr = log2(elem_per_lane);
19490 
19491     assert(is_integral_type(elem_bt), "sanity");
19492     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19493 
19494     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19495     uint y_idx = ($idx$$constant >> log2epr) & 1;
19496     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19497     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19498     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19499   %}
19500   ins_pipe( pipe_slow );
19501 %}
19502 
19503 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19504   predicate(Matcher::vector_length_in_bytes(n) == 64);
19505   match(Set dst (VectorInsert (Binary src val) idx));
19506   effect(TEMP vtmp);
19507   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19508   ins_encode %{
19509     assert(UseAVX > 2, "sanity");
19510 
19511     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19512     int elem_per_lane = 16/type2aelembytes(elem_bt);
19513     int log2epr = log2(elem_per_lane);
19514 
19515     assert(is_integral_type(elem_bt), "");
19516     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19517 
19518     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19519     uint y_idx = ($idx$$constant >> log2epr) & 3;
19520     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19521     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19522     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19523   %}
19524   ins_pipe( pipe_slow );
19525 %}
19526 
19527 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19528   predicate(Matcher::vector_length(n) == 2);
19529   match(Set dst (VectorInsert (Binary dst val) idx));
19530   format %{ "vector_insert $dst,$val,$idx" %}
19531   ins_encode %{
19532     assert(UseSSE >= 4, "required");
19533     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19534     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19535 
19536     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19537   %}
19538   ins_pipe( pipe_slow );
19539 %}
19540 
19541 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19542   predicate(Matcher::vector_length(n) == 4);
19543   match(Set dst (VectorInsert (Binary src val) idx));
19544   effect(TEMP vtmp);
19545   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19546   ins_encode %{
19547     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19548     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19549 
19550     uint x_idx = $idx$$constant & right_n_bits(1);
19551     uint y_idx = ($idx$$constant >> 1) & 1;
19552     int vlen_enc = Assembler::AVX_256bit;
19553     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19554     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19555     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19556   %}
19557   ins_pipe( pipe_slow );
19558 %}
19559 
19560 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19561   predicate(Matcher::vector_length(n) == 8);
19562   match(Set dst (VectorInsert (Binary src val) idx));
19563   effect(TEMP vtmp);
19564   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19565   ins_encode %{
19566     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19567     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19568 
19569     uint x_idx = $idx$$constant & right_n_bits(1);
19570     uint y_idx = ($idx$$constant >> 1) & 3;
19571     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19572     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19573     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19574   %}
19575   ins_pipe( pipe_slow );
19576 %}
19577 
19578 instruct insertF(vec dst, regF val, immU8 idx) %{
19579   predicate(Matcher::vector_length(n) < 8);
19580   match(Set dst (VectorInsert (Binary dst val) idx));
19581   format %{ "vector_insert $dst,$val,$idx" %}
19582   ins_encode %{
19583     assert(UseSSE >= 4, "sanity");
19584 
19585     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19586     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19587 
19588     uint x_idx = $idx$$constant & right_n_bits(2);
19589     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19590   %}
19591   ins_pipe( pipe_slow );
19592 %}
19593 
19594 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19595   predicate(Matcher::vector_length(n) >= 8);
19596   match(Set dst (VectorInsert (Binary src val) idx));
19597   effect(TEMP vtmp);
19598   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19599   ins_encode %{
19600     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19601     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19602 
19603     int vlen = Matcher::vector_length(this);
19604     uint x_idx = $idx$$constant & right_n_bits(2);
19605     if (vlen == 8) {
19606       uint y_idx = ($idx$$constant >> 2) & 1;
19607       int vlen_enc = Assembler::AVX_256bit;
19608       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19609       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19610       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19611     } else {
19612       assert(vlen == 16, "sanity");
19613       uint y_idx = ($idx$$constant >> 2) & 3;
19614       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19615       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19616       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19617     }
19618   %}
19619   ins_pipe( pipe_slow );
19620 %}
19621 
19622 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19623   predicate(Matcher::vector_length(n) == 2);
19624   match(Set dst (VectorInsert (Binary dst val) idx));
19625   effect(TEMP tmp);
19626   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19627   ins_encode %{
19628     assert(UseSSE >= 4, "sanity");
19629     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19630     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19631 
19632     __ movq($tmp$$Register, $val$$XMMRegister);
19633     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19634   %}
19635   ins_pipe( pipe_slow );
19636 %}
19637 
19638 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19639   predicate(Matcher::vector_length(n) == 4);
19640   match(Set dst (VectorInsert (Binary src val) idx));
19641   effect(TEMP vtmp, TEMP tmp);
19642   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19643   ins_encode %{
19644     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19645     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19646 
19647     uint x_idx = $idx$$constant & right_n_bits(1);
19648     uint y_idx = ($idx$$constant >> 1) & 1;
19649     int vlen_enc = Assembler::AVX_256bit;
19650     __ movq($tmp$$Register, $val$$XMMRegister);
19651     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19652     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19653     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19654   %}
19655   ins_pipe( pipe_slow );
19656 %}
19657 
19658 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19659   predicate(Matcher::vector_length(n) == 8);
19660   match(Set dst (VectorInsert (Binary src val) idx));
19661   effect(TEMP tmp, TEMP vtmp);
19662   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19663   ins_encode %{
19664     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19665     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19666 
19667     uint x_idx = $idx$$constant & right_n_bits(1);
19668     uint y_idx = ($idx$$constant >> 1) & 3;
19669     __ movq($tmp$$Register, $val$$XMMRegister);
19670     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19671     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19672     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19673   %}
19674   ins_pipe( pipe_slow );
19675 %}
19676 
19677 // ====================REDUCTION ARITHMETIC=======================================
19678 
19679 // =======================Int Reduction==========================================
19680 
19681 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19682   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19683   match(Set dst (AddReductionVI src1 src2));
19684   match(Set dst (MulReductionVI src1 src2));
19685   match(Set dst (AndReductionV  src1 src2));
19686   match(Set dst ( OrReductionV  src1 src2));
19687   match(Set dst (XorReductionV  src1 src2));
19688   match(Set dst (MinReductionV  src1 src2));
19689   match(Set dst (MaxReductionV  src1 src2));
19690   match(Set dst (UMinReductionV  src1 src2));
19691   match(Set dst (UMaxReductionV  src1 src2));
19692   effect(TEMP vtmp1, TEMP vtmp2);
19693   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19694   ins_encode %{
19695     int opcode = this->ideal_Opcode();
19696     int vlen = Matcher::vector_length(this, $src2);
19697     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19698   %}
19699   ins_pipe( pipe_slow );
19700 %}
19701 
19702 // =======================Long Reduction==========================================
19703 
19704 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19705   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19706   match(Set dst (AddReductionVL src1 src2));
19707   match(Set dst (MulReductionVL src1 src2));
19708   match(Set dst (AndReductionV  src1 src2));
19709   match(Set dst ( OrReductionV  src1 src2));
19710   match(Set dst (XorReductionV  src1 src2));
19711   match(Set dst (MinReductionV  src1 src2));
19712   match(Set dst (MaxReductionV  src1 src2));
19713   match(Set dst (UMinReductionV  src1 src2));
19714   match(Set dst (UMaxReductionV  src1 src2));
19715   effect(TEMP vtmp1, TEMP vtmp2);
19716   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19717   ins_encode %{
19718     int opcode = this->ideal_Opcode();
19719     int vlen = Matcher::vector_length(this, $src2);
19720     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19721   %}
19722   ins_pipe( pipe_slow );
19723 %}
19724 
19725 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19726   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19727   match(Set dst (AddReductionVL src1 src2));
19728   match(Set dst (MulReductionVL src1 src2));
19729   match(Set dst (AndReductionV  src1 src2));
19730   match(Set dst ( OrReductionV  src1 src2));
19731   match(Set dst (XorReductionV  src1 src2));
19732   match(Set dst (MinReductionV  src1 src2));
19733   match(Set dst (MaxReductionV  src1 src2));
19734   match(Set dst (UMinReductionV  src1 src2));
19735   match(Set dst (UMaxReductionV  src1 src2));
19736   effect(TEMP vtmp1, TEMP vtmp2);
19737   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19738   ins_encode %{
19739     int opcode = this->ideal_Opcode();
19740     int vlen = Matcher::vector_length(this, $src2);
19741     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19742   %}
19743   ins_pipe( pipe_slow );
19744 %}
19745 
19746 // =======================Float Reduction==========================================
19747 
19748 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19749   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19750   match(Set dst (AddReductionVF dst src));
19751   match(Set dst (MulReductionVF dst src));
19752   effect(TEMP dst, TEMP vtmp);
19753   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19754   ins_encode %{
19755     int opcode = this->ideal_Opcode();
19756     int vlen = Matcher::vector_length(this, $src);
19757     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19758   %}
19759   ins_pipe( pipe_slow );
19760 %}
19761 
19762 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19763   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19764   match(Set dst (AddReductionVF dst src));
19765   match(Set dst (MulReductionVF dst src));
19766   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19767   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19768   ins_encode %{
19769     int opcode = this->ideal_Opcode();
19770     int vlen = Matcher::vector_length(this, $src);
19771     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19772   %}
19773   ins_pipe( pipe_slow );
19774 %}
19775 
19776 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19777   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19778   match(Set dst (AddReductionVF dst src));
19779   match(Set dst (MulReductionVF dst src));
19780   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19781   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19782   ins_encode %{
19783     int opcode = this->ideal_Opcode();
19784     int vlen = Matcher::vector_length(this, $src);
19785     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19786   %}
19787   ins_pipe( pipe_slow );
19788 %}
19789 
19790 
19791 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19792   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19793   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19794   // src1 contains reduction identity
19795   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19796   match(Set dst (AddReductionVF src1 src2));
19797   match(Set dst (MulReductionVF src1 src2));
19798   effect(TEMP dst);
19799   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19800   ins_encode %{
19801     int opcode = this->ideal_Opcode();
19802     int vlen = Matcher::vector_length(this, $src2);
19803     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19804   %}
19805   ins_pipe( pipe_slow );
19806 %}
19807 
19808 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19809   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19810   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19811   // src1 contains reduction identity
19812   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19813   match(Set dst (AddReductionVF src1 src2));
19814   match(Set dst (MulReductionVF src1 src2));
19815   effect(TEMP dst, TEMP vtmp);
19816   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19817   ins_encode %{
19818     int opcode = this->ideal_Opcode();
19819     int vlen = Matcher::vector_length(this, $src2);
19820     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19821   %}
19822   ins_pipe( pipe_slow );
19823 %}
19824 
19825 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19826   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19827   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19828   // src1 contains reduction identity
19829   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19830   match(Set dst (AddReductionVF src1 src2));
19831   match(Set dst (MulReductionVF src1 src2));
19832   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19833   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19834   ins_encode %{
19835     int opcode = this->ideal_Opcode();
19836     int vlen = Matcher::vector_length(this, $src2);
19837     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19838   %}
19839   ins_pipe( pipe_slow );
19840 %}
19841 
19842 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19843   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19844   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19845   // src1 contains reduction identity
19846   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19847   match(Set dst (AddReductionVF src1 src2));
19848   match(Set dst (MulReductionVF src1 src2));
19849   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19850   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19851   ins_encode %{
19852     int opcode = this->ideal_Opcode();
19853     int vlen = Matcher::vector_length(this, $src2);
19854     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19855   %}
19856   ins_pipe( pipe_slow );
19857 %}
19858 
19859 // =======================Double Reduction==========================================
19860 
19861 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19862   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19863   match(Set dst (AddReductionVD dst src));
19864   match(Set dst (MulReductionVD dst src));
19865   effect(TEMP dst, TEMP vtmp);
19866   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19867   ins_encode %{
19868     int opcode = this->ideal_Opcode();
19869     int vlen = Matcher::vector_length(this, $src);
19870     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19871 %}
19872   ins_pipe( pipe_slow );
19873 %}
19874 
19875 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19876   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19877   match(Set dst (AddReductionVD dst src));
19878   match(Set dst (MulReductionVD dst src));
19879   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19880   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19881   ins_encode %{
19882     int opcode = this->ideal_Opcode();
19883     int vlen = Matcher::vector_length(this, $src);
19884     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19885   %}
19886   ins_pipe( pipe_slow );
19887 %}
19888 
19889 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19890   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19891   match(Set dst (AddReductionVD dst src));
19892   match(Set dst (MulReductionVD dst src));
19893   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19894   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19895   ins_encode %{
19896     int opcode = this->ideal_Opcode();
19897     int vlen = Matcher::vector_length(this, $src);
19898     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19899   %}
19900   ins_pipe( pipe_slow );
19901 %}
19902 
19903 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19904   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19905   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19906   // src1 contains reduction identity
19907   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19908   match(Set dst (AddReductionVD src1 src2));
19909   match(Set dst (MulReductionVD src1 src2));
19910   effect(TEMP dst);
19911   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19912   ins_encode %{
19913     int opcode = this->ideal_Opcode();
19914     int vlen = Matcher::vector_length(this, $src2);
19915     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19916 %}
19917   ins_pipe( pipe_slow );
19918 %}
19919 
19920 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19921   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19922   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19923   // src1 contains reduction identity
19924   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19925   match(Set dst (AddReductionVD src1 src2));
19926   match(Set dst (MulReductionVD src1 src2));
19927   effect(TEMP dst, TEMP vtmp);
19928   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19929   ins_encode %{
19930     int opcode = this->ideal_Opcode();
19931     int vlen = Matcher::vector_length(this, $src2);
19932     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19933   %}
19934   ins_pipe( pipe_slow );
19935 %}
19936 
19937 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19938   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19939   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19940   // src1 contains reduction identity
19941   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19942   match(Set dst (AddReductionVD src1 src2));
19943   match(Set dst (MulReductionVD src1 src2));
19944   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19945   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19946   ins_encode %{
19947     int opcode = this->ideal_Opcode();
19948     int vlen = Matcher::vector_length(this, $src2);
19949     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19950   %}
19951   ins_pipe( pipe_slow );
19952 %}
19953 
19954 // =======================Byte Reduction==========================================
19955 
19956 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19957   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19958   match(Set dst (AddReductionVI src1 src2));
19959   match(Set dst (AndReductionV  src1 src2));
19960   match(Set dst ( OrReductionV  src1 src2));
19961   match(Set dst (XorReductionV  src1 src2));
19962   match(Set dst (MinReductionV  src1 src2));
19963   match(Set dst (MaxReductionV  src1 src2));
19964   match(Set dst (UMinReductionV  src1 src2));
19965   match(Set dst (UMaxReductionV  src1 src2));
19966   effect(TEMP vtmp1, TEMP vtmp2);
19967   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19968   ins_encode %{
19969     int opcode = this->ideal_Opcode();
19970     int vlen = Matcher::vector_length(this, $src2);
19971     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19972   %}
19973   ins_pipe( pipe_slow );
19974 %}
19975 
19976 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19977   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19978   match(Set dst (AddReductionVI src1 src2));
19979   match(Set dst (AndReductionV  src1 src2));
19980   match(Set dst ( OrReductionV  src1 src2));
19981   match(Set dst (XorReductionV  src1 src2));
19982   match(Set dst (MinReductionV  src1 src2));
19983   match(Set dst (MaxReductionV  src1 src2));
19984   match(Set dst (UMinReductionV  src1 src2));
19985   match(Set dst (UMaxReductionV  src1 src2));
19986   effect(TEMP vtmp1, TEMP vtmp2);
19987   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19988   ins_encode %{
19989     int opcode = this->ideal_Opcode();
19990     int vlen = Matcher::vector_length(this, $src2);
19991     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19992   %}
19993   ins_pipe( pipe_slow );
19994 %}
19995 
19996 // =======================Short Reduction==========================================
19997 
19998 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19999   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20000   match(Set dst (AddReductionVI src1 src2));
20001   match(Set dst (MulReductionVI src1 src2));
20002   match(Set dst (AndReductionV  src1 src2));
20003   match(Set dst ( OrReductionV  src1 src2));
20004   match(Set dst (XorReductionV  src1 src2));
20005   match(Set dst (MinReductionV  src1 src2));
20006   match(Set dst (MaxReductionV  src1 src2));
20007   match(Set dst (UMinReductionV  src1 src2));
20008   match(Set dst (UMaxReductionV  src1 src2));
20009   effect(TEMP vtmp1, TEMP vtmp2);
20010   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20011   ins_encode %{
20012     int opcode = this->ideal_Opcode();
20013     int vlen = Matcher::vector_length(this, $src2);
20014     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20015   %}
20016   ins_pipe( pipe_slow );
20017 %}
20018 
20019 // =======================Mul Reduction==========================================
20020 
20021 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20022   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20023             Matcher::vector_length(n->in(2)) <= 32); // src2
20024   match(Set dst (MulReductionVI src1 src2));
20025   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20026   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20027   ins_encode %{
20028     int opcode = this->ideal_Opcode();
20029     int vlen = Matcher::vector_length(this, $src2);
20030     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20036   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20037             Matcher::vector_length(n->in(2)) == 64); // src2
20038   match(Set dst (MulReductionVI src1 src2));
20039   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20040   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20041   ins_encode %{
20042     int opcode = this->ideal_Opcode();
20043     int vlen = Matcher::vector_length(this, $src2);
20044     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20045   %}
20046   ins_pipe( pipe_slow );
20047 %}
20048 
20049 //--------------------Min/Max Float Reduction --------------------
20050 // Float Min Reduction
20051 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20052                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20053   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20054             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20055              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20056             Matcher::vector_length(n->in(2)) == 2);
20057   match(Set dst (MinReductionV src1 src2));
20058   match(Set dst (MaxReductionV src1 src2));
20059   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20060   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20061   ins_encode %{
20062     assert(UseAVX > 0, "sanity");
20063 
20064     int opcode = this->ideal_Opcode();
20065     int vlen = Matcher::vector_length(this, $src2);
20066     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20067                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20068   %}
20069   ins_pipe( pipe_slow );
20070 %}
20071 
20072 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20073                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20074   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20075             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20076              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20077             Matcher::vector_length(n->in(2)) >= 4);
20078   match(Set dst (MinReductionV src1 src2));
20079   match(Set dst (MaxReductionV src1 src2));
20080   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20081   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20082   ins_encode %{
20083     assert(UseAVX > 0, "sanity");
20084 
20085     int opcode = this->ideal_Opcode();
20086     int vlen = Matcher::vector_length(this, $src2);
20087     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20088                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20089   %}
20090   ins_pipe( pipe_slow );
20091 %}
20092 
20093 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20094                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20095   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20096             Matcher::vector_length(n->in(2)) == 2);
20097   match(Set dst (MinReductionV dst src));
20098   match(Set dst (MaxReductionV dst src));
20099   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20100   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20101   ins_encode %{
20102     assert(UseAVX > 0, "sanity");
20103 
20104     int opcode = this->ideal_Opcode();
20105     int vlen = Matcher::vector_length(this, $src);
20106     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20107                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20108   %}
20109   ins_pipe( pipe_slow );
20110 %}
20111 
20112 
20113 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20114                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20115   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20116             Matcher::vector_length(n->in(2)) >= 4);
20117   match(Set dst (MinReductionV dst src));
20118   match(Set dst (MaxReductionV dst src));
20119   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20120   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20121   ins_encode %{
20122     assert(UseAVX > 0, "sanity");
20123 
20124     int opcode = this->ideal_Opcode();
20125     int vlen = Matcher::vector_length(this, $src);
20126     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20127                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20128   %}
20129   ins_pipe( pipe_slow );
20130 %}
20131 
20132 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20133   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20134             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20135              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20136             Matcher::vector_length(n->in(2)) == 2);
20137   match(Set dst (MinReductionV src1 src2));
20138   match(Set dst (MaxReductionV src1 src2));
20139   effect(TEMP dst, TEMP xtmp1);
20140   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20141   ins_encode %{
20142     int opcode = this->ideal_Opcode();
20143     int vlen = Matcher::vector_length(this, $src2);
20144     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20145                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20146   %}
20147   ins_pipe( pipe_slow );
20148 %}
20149 
20150 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20151   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20152             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20153              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20154             Matcher::vector_length(n->in(2)) >= 4);
20155   match(Set dst (MinReductionV src1 src2));
20156   match(Set dst (MaxReductionV src1 src2));
20157   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20158   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20159   ins_encode %{
20160     int opcode = this->ideal_Opcode();
20161     int vlen = Matcher::vector_length(this, $src2);
20162     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20163                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20164   %}
20165   ins_pipe( pipe_slow );
20166 %}
20167 
20168 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20169   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20170             Matcher::vector_length(n->in(2)) == 2);
20171   match(Set dst (MinReductionV dst src));
20172   match(Set dst (MaxReductionV dst src));
20173   effect(TEMP dst, TEMP xtmp1);
20174   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20175   ins_encode %{
20176     int opcode = this->ideal_Opcode();
20177     int vlen = Matcher::vector_length(this, $src);
20178     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20179                          $xtmp1$$XMMRegister);
20180   %}
20181   ins_pipe( pipe_slow );
20182 %}
20183 
20184 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20185   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20186             Matcher::vector_length(n->in(2)) >= 4);
20187   match(Set dst (MinReductionV dst src));
20188   match(Set dst (MaxReductionV dst src));
20189   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20190   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20191   ins_encode %{
20192     int opcode = this->ideal_Opcode();
20193     int vlen = Matcher::vector_length(this, $src);
20194     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20195                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20196   %}
20197   ins_pipe( pipe_slow );
20198 %}
20199 
20200 //--------------------Min Double Reduction --------------------
20201 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20202                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20203   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20204             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20205              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20206             Matcher::vector_length(n->in(2)) == 2);
20207   match(Set dst (MinReductionV src1 src2));
20208   match(Set dst (MaxReductionV src1 src2));
20209   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20210   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20211   ins_encode %{
20212     assert(UseAVX > 0, "sanity");
20213 
20214     int opcode = this->ideal_Opcode();
20215     int vlen = Matcher::vector_length(this, $src2);
20216     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20217                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20218   %}
20219   ins_pipe( pipe_slow );
20220 %}
20221 
20222 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20223                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20224   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20225             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20226              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20227             Matcher::vector_length(n->in(2)) >= 4);
20228   match(Set dst (MinReductionV src1 src2));
20229   match(Set dst (MaxReductionV src1 src2));
20230   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20231   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20232   ins_encode %{
20233     assert(UseAVX > 0, "sanity");
20234 
20235     int opcode = this->ideal_Opcode();
20236     int vlen = Matcher::vector_length(this, $src2);
20237     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20238                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20239   %}
20240   ins_pipe( pipe_slow );
20241 %}
20242 
20243 
20244 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20245                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20246   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20247             Matcher::vector_length(n->in(2)) == 2);
20248   match(Set dst (MinReductionV dst src));
20249   match(Set dst (MaxReductionV dst src));
20250   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20251   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20252   ins_encode %{
20253     assert(UseAVX > 0, "sanity");
20254 
20255     int opcode = this->ideal_Opcode();
20256     int vlen = Matcher::vector_length(this, $src);
20257     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20258                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20259   %}
20260   ins_pipe( pipe_slow );
20261 %}
20262 
20263 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20264                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20265   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20266             Matcher::vector_length(n->in(2)) >= 4);
20267   match(Set dst (MinReductionV dst src));
20268   match(Set dst (MaxReductionV dst src));
20269   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20270   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20271   ins_encode %{
20272     assert(UseAVX > 0, "sanity");
20273 
20274     int opcode = this->ideal_Opcode();
20275     int vlen = Matcher::vector_length(this, $src);
20276     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20277                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20278   %}
20279   ins_pipe( pipe_slow );
20280 %}
20281 
20282 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20283   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20284             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20285              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20286             Matcher::vector_length(n->in(2)) == 2);
20287   match(Set dst (MinReductionV src1 src2));
20288   match(Set dst (MaxReductionV src1 src2));
20289   effect(TEMP dst, TEMP xtmp1);
20290   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20291   ins_encode %{
20292     int opcode = this->ideal_Opcode();
20293     int vlen = Matcher::vector_length(this, $src2);
20294     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20295                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20296   %}
20297   ins_pipe( pipe_slow );
20298 %}
20299 
20300 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20301   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20302             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20303              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20304             Matcher::vector_length(n->in(2)) >= 4);
20305   match(Set dst (MinReductionV src1 src2));
20306   match(Set dst (MaxReductionV src1 src2));
20307   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20308   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20309   ins_encode %{
20310     int opcode = this->ideal_Opcode();
20311     int vlen = Matcher::vector_length(this, $src2);
20312     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20313                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20314   %}
20315   ins_pipe( pipe_slow );
20316 %}
20317 
20318 
20319 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20320   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20321             Matcher::vector_length(n->in(2)) == 2);
20322   match(Set dst (MinReductionV dst src));
20323   match(Set dst (MaxReductionV dst src));
20324   effect(TEMP dst, TEMP xtmp1);
20325   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20326   ins_encode %{
20327     int opcode = this->ideal_Opcode();
20328     int vlen = Matcher::vector_length(this, $src);
20329     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20330                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20331   %}
20332   ins_pipe( pipe_slow );
20333 %}
20334 
20335 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20336   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20337             Matcher::vector_length(n->in(2)) >= 4);
20338   match(Set dst (MinReductionV dst src));
20339   match(Set dst (MaxReductionV dst src));
20340   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20341   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20342   ins_encode %{
20343     int opcode = this->ideal_Opcode();
20344     int vlen = Matcher::vector_length(this, $src);
20345     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20346                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20347   %}
20348   ins_pipe( pipe_slow );
20349 %}
20350 
20351 // ====================VECTOR ARITHMETIC=======================================
20352 
20353 // --------------------------------- ADD --------------------------------------
20354 
20355 // Bytes vector add
20356 instruct vaddB(vec dst, vec src) %{
20357   predicate(UseAVX == 0);
20358   match(Set dst (AddVB dst src));
20359   format %{ "paddb   $dst,$src\t! add packedB" %}
20360   ins_encode %{
20361     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20362   %}
20363   ins_pipe( pipe_slow );
20364 %}
20365 
20366 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20367   predicate(UseAVX > 0);
20368   match(Set dst (AddVB src1 src2));
20369   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20370   ins_encode %{
20371     int vlen_enc = vector_length_encoding(this);
20372     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20373   %}
20374   ins_pipe( pipe_slow );
20375 %}
20376 
20377 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20378   predicate((UseAVX > 0) &&
20379             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20380   match(Set dst (AddVB src (LoadVector mem)));
20381   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20382   ins_encode %{
20383     int vlen_enc = vector_length_encoding(this);
20384     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20385   %}
20386   ins_pipe( pipe_slow );
20387 %}
20388 
20389 // Shorts/Chars vector add
20390 instruct vaddS(vec dst, vec src) %{
20391   predicate(UseAVX == 0);
20392   match(Set dst (AddVS dst src));
20393   format %{ "paddw   $dst,$src\t! add packedS" %}
20394   ins_encode %{
20395     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20396   %}
20397   ins_pipe( pipe_slow );
20398 %}
20399 
20400 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20401   predicate(UseAVX > 0);
20402   match(Set dst (AddVS src1 src2));
20403   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20404   ins_encode %{
20405     int vlen_enc = vector_length_encoding(this);
20406     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20407   %}
20408   ins_pipe( pipe_slow );
20409 %}
20410 
20411 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20412   predicate((UseAVX > 0) &&
20413             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20414   match(Set dst (AddVS src (LoadVector mem)));
20415   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20416   ins_encode %{
20417     int vlen_enc = vector_length_encoding(this);
20418     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20419   %}
20420   ins_pipe( pipe_slow );
20421 %}
20422 
20423 // Integers vector add
20424 instruct vaddI(vec dst, vec src) %{
20425   predicate(UseAVX == 0);
20426   match(Set dst (AddVI dst src));
20427   format %{ "paddd   $dst,$src\t! add packedI" %}
20428   ins_encode %{
20429     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20430   %}
20431   ins_pipe( pipe_slow );
20432 %}
20433 
20434 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20435   predicate(UseAVX > 0);
20436   match(Set dst (AddVI src1 src2));
20437   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20438   ins_encode %{
20439     int vlen_enc = vector_length_encoding(this);
20440     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20441   %}
20442   ins_pipe( pipe_slow );
20443 %}
20444 
20445 
20446 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20447   predicate((UseAVX > 0) &&
20448             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20449   match(Set dst (AddVI src (LoadVector mem)));
20450   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20451   ins_encode %{
20452     int vlen_enc = vector_length_encoding(this);
20453     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20454   %}
20455   ins_pipe( pipe_slow );
20456 %}
20457 
20458 // Longs vector add
20459 instruct vaddL(vec dst, vec src) %{
20460   predicate(UseAVX == 0);
20461   match(Set dst (AddVL dst src));
20462   format %{ "paddq   $dst,$src\t! add packedL" %}
20463   ins_encode %{
20464     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20465   %}
20466   ins_pipe( pipe_slow );
20467 %}
20468 
20469 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20470   predicate(UseAVX > 0);
20471   match(Set dst (AddVL src1 src2));
20472   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20473   ins_encode %{
20474     int vlen_enc = vector_length_encoding(this);
20475     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20476   %}
20477   ins_pipe( pipe_slow );
20478 %}
20479 
20480 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20481   predicate((UseAVX > 0) &&
20482             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20483   match(Set dst (AddVL src (LoadVector mem)));
20484   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20485   ins_encode %{
20486     int vlen_enc = vector_length_encoding(this);
20487     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20488   %}
20489   ins_pipe( pipe_slow );
20490 %}
20491 
20492 // Floats vector add
20493 instruct vaddF(vec dst, vec src) %{
20494   predicate(UseAVX == 0);
20495   match(Set dst (AddVF dst src));
20496   format %{ "addps   $dst,$src\t! add packedF" %}
20497   ins_encode %{
20498     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20499   %}
20500   ins_pipe( pipe_slow );
20501 %}
20502 
20503 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20504   predicate(UseAVX > 0);
20505   match(Set dst (AddVF src1 src2));
20506   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20507   ins_encode %{
20508     int vlen_enc = vector_length_encoding(this);
20509     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20510   %}
20511   ins_pipe( pipe_slow );
20512 %}
20513 
20514 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20515   predicate((UseAVX > 0) &&
20516             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20517   match(Set dst (AddVF src (LoadVector mem)));
20518   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20519   ins_encode %{
20520     int vlen_enc = vector_length_encoding(this);
20521     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20522   %}
20523   ins_pipe( pipe_slow );
20524 %}
20525 
20526 // Doubles vector add
20527 instruct vaddD(vec dst, vec src) %{
20528   predicate(UseAVX == 0);
20529   match(Set dst (AddVD dst src));
20530   format %{ "addpd   $dst,$src\t! add packedD" %}
20531   ins_encode %{
20532     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20533   %}
20534   ins_pipe( pipe_slow );
20535 %}
20536 
20537 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20538   predicate(UseAVX > 0);
20539   match(Set dst (AddVD src1 src2));
20540   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20541   ins_encode %{
20542     int vlen_enc = vector_length_encoding(this);
20543     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20544   %}
20545   ins_pipe( pipe_slow );
20546 %}
20547 
20548 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20549   predicate((UseAVX > 0) &&
20550             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20551   match(Set dst (AddVD src (LoadVector mem)));
20552   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20553   ins_encode %{
20554     int vlen_enc = vector_length_encoding(this);
20555     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20556   %}
20557   ins_pipe( pipe_slow );
20558 %}
20559 
20560 // --------------------------------- SUB --------------------------------------
20561 
20562 // Bytes vector sub
20563 instruct vsubB(vec dst, vec src) %{
20564   predicate(UseAVX == 0);
20565   match(Set dst (SubVB dst src));
20566   format %{ "psubb   $dst,$src\t! sub packedB" %}
20567   ins_encode %{
20568     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20569   %}
20570   ins_pipe( pipe_slow );
20571 %}
20572 
20573 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20574   predicate(UseAVX > 0);
20575   match(Set dst (SubVB src1 src2));
20576   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20577   ins_encode %{
20578     int vlen_enc = vector_length_encoding(this);
20579     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20580   %}
20581   ins_pipe( pipe_slow );
20582 %}
20583 
20584 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20585   predicate((UseAVX > 0) &&
20586             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20587   match(Set dst (SubVB src (LoadVector mem)));
20588   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20589   ins_encode %{
20590     int vlen_enc = vector_length_encoding(this);
20591     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20592   %}
20593   ins_pipe( pipe_slow );
20594 %}
20595 
20596 // Shorts/Chars vector sub
20597 instruct vsubS(vec dst, vec src) %{
20598   predicate(UseAVX == 0);
20599   match(Set dst (SubVS dst src));
20600   format %{ "psubw   $dst,$src\t! sub packedS" %}
20601   ins_encode %{
20602     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20603   %}
20604   ins_pipe( pipe_slow );
20605 %}
20606 
20607 
20608 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20609   predicate(UseAVX > 0);
20610   match(Set dst (SubVS src1 src2));
20611   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20612   ins_encode %{
20613     int vlen_enc = vector_length_encoding(this);
20614     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20615   %}
20616   ins_pipe( pipe_slow );
20617 %}
20618 
20619 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20620   predicate((UseAVX > 0) &&
20621             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20622   match(Set dst (SubVS src (LoadVector mem)));
20623   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20624   ins_encode %{
20625     int vlen_enc = vector_length_encoding(this);
20626     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20627   %}
20628   ins_pipe( pipe_slow );
20629 %}
20630 
20631 // Integers vector sub
20632 instruct vsubI(vec dst, vec src) %{
20633   predicate(UseAVX == 0);
20634   match(Set dst (SubVI dst src));
20635   format %{ "psubd   $dst,$src\t! sub packedI" %}
20636   ins_encode %{
20637     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20638   %}
20639   ins_pipe( pipe_slow );
20640 %}
20641 
20642 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20643   predicate(UseAVX > 0);
20644   match(Set dst (SubVI src1 src2));
20645   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20646   ins_encode %{
20647     int vlen_enc = vector_length_encoding(this);
20648     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20649   %}
20650   ins_pipe( pipe_slow );
20651 %}
20652 
20653 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20654   predicate((UseAVX > 0) &&
20655             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20656   match(Set dst (SubVI src (LoadVector mem)));
20657   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20658   ins_encode %{
20659     int vlen_enc = vector_length_encoding(this);
20660     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20661   %}
20662   ins_pipe( pipe_slow );
20663 %}
20664 
20665 // Longs vector sub
20666 instruct vsubL(vec dst, vec src) %{
20667   predicate(UseAVX == 0);
20668   match(Set dst (SubVL dst src));
20669   format %{ "psubq   $dst,$src\t! sub packedL" %}
20670   ins_encode %{
20671     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20672   %}
20673   ins_pipe( pipe_slow );
20674 %}
20675 
20676 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20677   predicate(UseAVX > 0);
20678   match(Set dst (SubVL src1 src2));
20679   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20680   ins_encode %{
20681     int vlen_enc = vector_length_encoding(this);
20682     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20683   %}
20684   ins_pipe( pipe_slow );
20685 %}
20686 
20687 
20688 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20689   predicate((UseAVX > 0) &&
20690             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20691   match(Set dst (SubVL src (LoadVector mem)));
20692   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20693   ins_encode %{
20694     int vlen_enc = vector_length_encoding(this);
20695     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20696   %}
20697   ins_pipe( pipe_slow );
20698 %}
20699 
20700 // Floats vector sub
20701 instruct vsubF(vec dst, vec src) %{
20702   predicate(UseAVX == 0);
20703   match(Set dst (SubVF dst src));
20704   format %{ "subps   $dst,$src\t! sub packedF" %}
20705   ins_encode %{
20706     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20707   %}
20708   ins_pipe( pipe_slow );
20709 %}
20710 
20711 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20712   predicate(UseAVX > 0);
20713   match(Set dst (SubVF src1 src2));
20714   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20715   ins_encode %{
20716     int vlen_enc = vector_length_encoding(this);
20717     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20718   %}
20719   ins_pipe( pipe_slow );
20720 %}
20721 
20722 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20723   predicate((UseAVX > 0) &&
20724             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20725   match(Set dst (SubVF src (LoadVector mem)));
20726   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20727   ins_encode %{
20728     int vlen_enc = vector_length_encoding(this);
20729     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20730   %}
20731   ins_pipe( pipe_slow );
20732 %}
20733 
20734 // Doubles vector sub
20735 instruct vsubD(vec dst, vec src) %{
20736   predicate(UseAVX == 0);
20737   match(Set dst (SubVD dst src));
20738   format %{ "subpd   $dst,$src\t! sub packedD" %}
20739   ins_encode %{
20740     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20741   %}
20742   ins_pipe( pipe_slow );
20743 %}
20744 
20745 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20746   predicate(UseAVX > 0);
20747   match(Set dst (SubVD src1 src2));
20748   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20749   ins_encode %{
20750     int vlen_enc = vector_length_encoding(this);
20751     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20752   %}
20753   ins_pipe( pipe_slow );
20754 %}
20755 
20756 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20757   predicate((UseAVX > 0) &&
20758             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20759   match(Set dst (SubVD src (LoadVector mem)));
20760   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20761   ins_encode %{
20762     int vlen_enc = vector_length_encoding(this);
20763     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20764   %}
20765   ins_pipe( pipe_slow );
20766 %}
20767 
20768 // --------------------------------- MUL --------------------------------------
20769 
20770 // Byte vector mul
20771 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20772   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20773   match(Set dst (MulVB src1 src2));
20774   effect(TEMP dst, TEMP xtmp);
20775   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20776   ins_encode %{
20777     assert(UseSSE > 3, "required");
20778     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20779     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20780     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20781     __ psllw($dst$$XMMRegister, 8);
20782     __ psrlw($dst$$XMMRegister, 8);
20783     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20784   %}
20785   ins_pipe( pipe_slow );
20786 %}
20787 
20788 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20789   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20790   match(Set dst (MulVB src1 src2));
20791   effect(TEMP dst, TEMP xtmp);
20792   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20793   ins_encode %{
20794     assert(UseSSE > 3, "required");
20795     // Odd-index elements
20796     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20797     __ psrlw($dst$$XMMRegister, 8);
20798     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20799     __ psrlw($xtmp$$XMMRegister, 8);
20800     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20801     __ psllw($dst$$XMMRegister, 8);
20802     // Even-index elements
20803     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20804     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20805     __ psllw($xtmp$$XMMRegister, 8);
20806     __ psrlw($xtmp$$XMMRegister, 8);
20807     // Combine
20808     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20809   %}
20810   ins_pipe( pipe_slow );
20811 %}
20812 
20813 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20814   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20815   match(Set dst (MulVB src1 src2));
20816   effect(TEMP xtmp1, TEMP xtmp2);
20817   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20818   ins_encode %{
20819     int vlen_enc = vector_length_encoding(this);
20820     // Odd-index elements
20821     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20822     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20823     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20824     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20825     // Even-index elements
20826     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20827     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20828     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20829     // Combine
20830     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20831   %}
20832   ins_pipe( pipe_slow );
20833 %}
20834 
20835 // Shorts/Chars vector mul
20836 instruct vmulS(vec dst, vec src) %{
20837   predicate(UseAVX == 0);
20838   match(Set dst (MulVS dst src));
20839   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20840   ins_encode %{
20841     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20842   %}
20843   ins_pipe( pipe_slow );
20844 %}
20845 
20846 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20847   predicate(UseAVX > 0);
20848   match(Set dst (MulVS src1 src2));
20849   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20850   ins_encode %{
20851     int vlen_enc = vector_length_encoding(this);
20852     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20853   %}
20854   ins_pipe( pipe_slow );
20855 %}
20856 
20857 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20858   predicate((UseAVX > 0) &&
20859             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20860   match(Set dst (MulVS src (LoadVector mem)));
20861   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20862   ins_encode %{
20863     int vlen_enc = vector_length_encoding(this);
20864     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20865   %}
20866   ins_pipe( pipe_slow );
20867 %}
20868 
20869 // Integers vector mul
20870 instruct vmulI(vec dst, vec src) %{
20871   predicate(UseAVX == 0);
20872   match(Set dst (MulVI dst src));
20873   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20874   ins_encode %{
20875     assert(UseSSE > 3, "required");
20876     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20877   %}
20878   ins_pipe( pipe_slow );
20879 %}
20880 
20881 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20882   predicate(UseAVX > 0);
20883   match(Set dst (MulVI src1 src2));
20884   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20885   ins_encode %{
20886     int vlen_enc = vector_length_encoding(this);
20887     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20888   %}
20889   ins_pipe( pipe_slow );
20890 %}
20891 
20892 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20893   predicate((UseAVX > 0) &&
20894             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20895   match(Set dst (MulVI src (LoadVector mem)));
20896   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20897   ins_encode %{
20898     int vlen_enc = vector_length_encoding(this);
20899     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20900   %}
20901   ins_pipe( pipe_slow );
20902 %}
20903 
20904 // Longs vector mul
20905 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20906   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20907              VM_Version::supports_avx512dq()) ||
20908             VM_Version::supports_avx512vldq());
20909   match(Set dst (MulVL src1 src2));
20910   ins_cost(500);
20911   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20912   ins_encode %{
20913     assert(UseAVX > 2, "required");
20914     int vlen_enc = vector_length_encoding(this);
20915     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20916   %}
20917   ins_pipe( pipe_slow );
20918 %}
20919 
20920 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20921   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20922              VM_Version::supports_avx512dq()) ||
20923             (Matcher::vector_length_in_bytes(n) > 8 &&
20924              VM_Version::supports_avx512vldq()));
20925   match(Set dst (MulVL src (LoadVector mem)));
20926   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20927   ins_cost(500);
20928   ins_encode %{
20929     assert(UseAVX > 2, "required");
20930     int vlen_enc = vector_length_encoding(this);
20931     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20932   %}
20933   ins_pipe( pipe_slow );
20934 %}
20935 
20936 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20937   predicate(UseAVX == 0);
20938   match(Set dst (MulVL src1 src2));
20939   ins_cost(500);
20940   effect(TEMP dst, TEMP xtmp);
20941   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20942   ins_encode %{
20943     assert(VM_Version::supports_sse4_1(), "required");
20944     // Get the lo-hi products, only the lower 32 bits is in concerns
20945     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20946     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20947     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20948     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20949     __ psllq($dst$$XMMRegister, 32);
20950     // Get the lo-lo products
20951     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20952     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20953     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20954   %}
20955   ins_pipe( pipe_slow );
20956 %}
20957 
20958 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20959   predicate(UseAVX > 0 &&
20960             ((Matcher::vector_length_in_bytes(n) == 64 &&
20961               !VM_Version::supports_avx512dq()) ||
20962              (Matcher::vector_length_in_bytes(n) < 64 &&
20963               !VM_Version::supports_avx512vldq())));
20964   match(Set dst (MulVL src1 src2));
20965   effect(TEMP xtmp1, TEMP xtmp2);
20966   ins_cost(500);
20967   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20968   ins_encode %{
20969     int vlen_enc = vector_length_encoding(this);
20970     // Get the lo-hi products, only the lower 32 bits is in concerns
20971     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20972     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20973     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20974     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20975     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20976     // Get the lo-lo products
20977     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20978     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20979   %}
20980   ins_pipe( pipe_slow );
20981 %}
20982 
20983 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20984   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20985   match(Set dst (MulVL src1 src2));
20986   ins_cost(100);
20987   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20988   ins_encode %{
20989     int vlen_enc = vector_length_encoding(this);
20990     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20991   %}
20992   ins_pipe( pipe_slow );
20993 %}
20994 
20995 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20996   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20997   match(Set dst (MulVL src1 src2));
20998   ins_cost(100);
20999   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21000   ins_encode %{
21001     int vlen_enc = vector_length_encoding(this);
21002     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21003   %}
21004   ins_pipe( pipe_slow );
21005 %}
21006 
21007 // Floats vector mul
21008 instruct vmulF(vec dst, vec src) %{
21009   predicate(UseAVX == 0);
21010   match(Set dst (MulVF dst src));
21011   format %{ "mulps   $dst,$src\t! mul packedF" %}
21012   ins_encode %{
21013     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21014   %}
21015   ins_pipe( pipe_slow );
21016 %}
21017 
21018 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21019   predicate(UseAVX > 0);
21020   match(Set dst (MulVF src1 src2));
21021   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
21022   ins_encode %{
21023     int vlen_enc = vector_length_encoding(this);
21024     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21025   %}
21026   ins_pipe( pipe_slow );
21027 %}
21028 
21029 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21030   predicate((UseAVX > 0) &&
21031             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21032   match(Set dst (MulVF src (LoadVector mem)));
21033   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
21034   ins_encode %{
21035     int vlen_enc = vector_length_encoding(this);
21036     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21037   %}
21038   ins_pipe( pipe_slow );
21039 %}
21040 
21041 // Doubles vector mul
21042 instruct vmulD(vec dst, vec src) %{
21043   predicate(UseAVX == 0);
21044   match(Set dst (MulVD dst src));
21045   format %{ "mulpd   $dst,$src\t! mul packedD" %}
21046   ins_encode %{
21047     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21048   %}
21049   ins_pipe( pipe_slow );
21050 %}
21051 
21052 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21053   predicate(UseAVX > 0);
21054   match(Set dst (MulVD src1 src2));
21055   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
21056   ins_encode %{
21057     int vlen_enc = vector_length_encoding(this);
21058     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21059   %}
21060   ins_pipe( pipe_slow );
21061 %}
21062 
21063 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21064   predicate((UseAVX > 0) &&
21065             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21066   match(Set dst (MulVD src (LoadVector mem)));
21067   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
21068   ins_encode %{
21069     int vlen_enc = vector_length_encoding(this);
21070     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21071   %}
21072   ins_pipe( pipe_slow );
21073 %}
21074 
21075 // --------------------------------- DIV --------------------------------------
21076 
21077 // Floats vector div
21078 instruct vdivF(vec dst, vec src) %{
21079   predicate(UseAVX == 0);
21080   match(Set dst (DivVF dst src));
21081   format %{ "divps   $dst,$src\t! div packedF" %}
21082   ins_encode %{
21083     __ divps($dst$$XMMRegister, $src$$XMMRegister);
21084   %}
21085   ins_pipe( pipe_slow );
21086 %}
21087 
21088 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21089   predicate(UseAVX > 0);
21090   match(Set dst (DivVF src1 src2));
21091   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
21092   ins_encode %{
21093     int vlen_enc = vector_length_encoding(this);
21094     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21095   %}
21096   ins_pipe( pipe_slow );
21097 %}
21098 
21099 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21100   predicate((UseAVX > 0) &&
21101             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21102   match(Set dst (DivVF src (LoadVector mem)));
21103   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
21104   ins_encode %{
21105     int vlen_enc = vector_length_encoding(this);
21106     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21107   %}
21108   ins_pipe( pipe_slow );
21109 %}
21110 
21111 // Doubles vector div
21112 instruct vdivD(vec dst, vec src) %{
21113   predicate(UseAVX == 0);
21114   match(Set dst (DivVD dst src));
21115   format %{ "divpd   $dst,$src\t! div packedD" %}
21116   ins_encode %{
21117     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21118   %}
21119   ins_pipe( pipe_slow );
21120 %}
21121 
21122 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21123   predicate(UseAVX > 0);
21124   match(Set dst (DivVD src1 src2));
21125   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
21126   ins_encode %{
21127     int vlen_enc = vector_length_encoding(this);
21128     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21129   %}
21130   ins_pipe( pipe_slow );
21131 %}
21132 
21133 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21134   predicate((UseAVX > 0) &&
21135             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21136   match(Set dst (DivVD src (LoadVector mem)));
21137   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
21138   ins_encode %{
21139     int vlen_enc = vector_length_encoding(this);
21140     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21141   %}
21142   ins_pipe( pipe_slow );
21143 %}
21144 
21145 // ------------------------------ MinMax ---------------------------------------
21146 
21147 // Byte, Short, Int vector Min/Max
21148 instruct minmax_reg_sse(vec dst, vec src) %{
21149   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21150             UseAVX == 0);
21151   match(Set dst (MinV dst src));
21152   match(Set dst (MaxV dst src));
21153   format %{ "vector_minmax  $dst,$src\t!  " %}
21154   ins_encode %{
21155     assert(UseSSE >= 4, "required");
21156 
21157     int opcode = this->ideal_Opcode();
21158     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21159     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21160   %}
21161   ins_pipe( pipe_slow );
21162 %}
21163 
21164 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21165   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21166             UseAVX > 0);
21167   match(Set dst (MinV src1 src2));
21168   match(Set dst (MaxV src1 src2));
21169   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21170   ins_encode %{
21171     int opcode = this->ideal_Opcode();
21172     int vlen_enc = vector_length_encoding(this);
21173     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21174 
21175     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21176   %}
21177   ins_pipe( pipe_slow );
21178 %}
21179 
21180 // Long vector Min/Max
21181 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21182   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21183             UseAVX == 0);
21184   match(Set dst (MinV dst src));
21185   match(Set dst (MaxV src dst));
21186   effect(TEMP dst, TEMP tmp);
21187   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21188   ins_encode %{
21189     assert(UseSSE >= 4, "required");
21190 
21191     int opcode = this->ideal_Opcode();
21192     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21193     assert(elem_bt == T_LONG, "sanity");
21194 
21195     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21196   %}
21197   ins_pipe( pipe_slow );
21198 %}
21199 
21200 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21201   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21202             UseAVX > 0 && !VM_Version::supports_avx512vl());
21203   match(Set dst (MinV src1 src2));
21204   match(Set dst (MaxV src1 src2));
21205   effect(TEMP dst);
21206   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21207   ins_encode %{
21208     int vlen_enc = vector_length_encoding(this);
21209     int opcode = this->ideal_Opcode();
21210     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21211     assert(elem_bt == T_LONG, "sanity");
21212 
21213     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21214   %}
21215   ins_pipe( pipe_slow );
21216 %}
21217 
21218 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21219   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21220             Matcher::vector_element_basic_type(n) == T_LONG);
21221   match(Set dst (MinV src1 src2));
21222   match(Set dst (MaxV src1 src2));
21223   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21224   ins_encode %{
21225     assert(UseAVX > 2, "required");
21226 
21227     int vlen_enc = vector_length_encoding(this);
21228     int opcode = this->ideal_Opcode();
21229     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21230     assert(elem_bt == T_LONG, "sanity");
21231 
21232     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21233   %}
21234   ins_pipe( pipe_slow );
21235 %}
21236 
21237 // Float/Double vector Min/Max
21238 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21239   predicate(VM_Version::supports_avx10_2() &&
21240             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21241   match(Set dst (MinV a b));
21242   match(Set dst (MaxV a b));
21243   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21244   ins_encode %{
21245     int vlen_enc = vector_length_encoding(this);
21246     int opcode = this->ideal_Opcode();
21247     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21248     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21249   %}
21250   ins_pipe( pipe_slow );
21251 %}
21252 
21253 // Float/Double vector Min/Max
21254 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21255   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21256             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21257             UseAVX > 0);
21258   match(Set dst (MinV a b));
21259   match(Set dst (MaxV a b));
21260   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21261   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21262   ins_encode %{
21263     assert(UseAVX > 0, "required");
21264 
21265     int opcode = this->ideal_Opcode();
21266     int vlen_enc = vector_length_encoding(this);
21267     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21268 
21269     __ vminmax_fp(opcode, elem_bt,
21270                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21271                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21272   %}
21273   ins_pipe( pipe_slow );
21274 %}
21275 
21276 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21277   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21278             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21279   match(Set dst (MinV a b));
21280   match(Set dst (MaxV a b));
21281   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21282   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21283   ins_encode %{
21284     assert(UseAVX > 2, "required");
21285 
21286     int opcode = this->ideal_Opcode();
21287     int vlen_enc = vector_length_encoding(this);
21288     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21289 
21290     __ evminmax_fp(opcode, elem_bt,
21291                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21292                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21293   %}
21294   ins_pipe( pipe_slow );
21295 %}
21296 
21297 // ------------------------------ Unsigned vector Min/Max ----------------------
21298 
21299 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21300   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21301   match(Set dst (UMinV a b));
21302   match(Set dst (UMaxV a b));
21303   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21304   ins_encode %{
21305     int opcode = this->ideal_Opcode();
21306     int vlen_enc = vector_length_encoding(this);
21307     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21308     assert(is_integral_type(elem_bt), "");
21309     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21310   %}
21311   ins_pipe( pipe_slow );
21312 %}
21313 
21314 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21315   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21316   match(Set dst (UMinV a (LoadVector b)));
21317   match(Set dst (UMaxV a (LoadVector b)));
21318   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21319   ins_encode %{
21320     int opcode = this->ideal_Opcode();
21321     int vlen_enc = vector_length_encoding(this);
21322     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21323     assert(is_integral_type(elem_bt), "");
21324     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21330   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21331   match(Set dst (UMinV a b));
21332   match(Set dst (UMaxV a b));
21333   effect(TEMP xtmp1, TEMP xtmp2);
21334   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21335   ins_encode %{
21336     int opcode = this->ideal_Opcode();
21337     int vlen_enc = vector_length_encoding(this);
21338     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21339   %}
21340   ins_pipe( pipe_slow );
21341 %}
21342 
21343 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21344   match(Set dst (UMinV (Binary dst src2) mask));
21345   match(Set dst (UMaxV (Binary dst src2) mask));
21346   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21347   ins_encode %{
21348     int vlen_enc = vector_length_encoding(this);
21349     BasicType bt = Matcher::vector_element_basic_type(this);
21350     int opc = this->ideal_Opcode();
21351     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21352                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21353   %}
21354   ins_pipe( pipe_slow );
21355 %}
21356 
21357 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21358   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21359   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21360   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21361   ins_encode %{
21362     int vlen_enc = vector_length_encoding(this);
21363     BasicType bt = Matcher::vector_element_basic_type(this);
21364     int opc = this->ideal_Opcode();
21365     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21366                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21367   %}
21368   ins_pipe( pipe_slow );
21369 %}
21370 
21371 // --------------------------------- Signum/CopySign ---------------------------
21372 
21373 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21374   match(Set dst (SignumF dst (Binary zero one)));
21375   effect(KILL cr);
21376   format %{ "signumF $dst, $dst" %}
21377   ins_encode %{
21378     int opcode = this->ideal_Opcode();
21379     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21380   %}
21381   ins_pipe( pipe_slow );
21382 %}
21383 
21384 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21385   match(Set dst (SignumD dst (Binary zero one)));
21386   effect(KILL cr);
21387   format %{ "signumD $dst, $dst" %}
21388   ins_encode %{
21389     int opcode = this->ideal_Opcode();
21390     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21391   %}
21392   ins_pipe( pipe_slow );
21393 %}
21394 
21395 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21396   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21397   match(Set dst (SignumVF src (Binary zero one)));
21398   match(Set dst (SignumVD src (Binary zero one)));
21399   effect(TEMP dst, TEMP xtmp1);
21400   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21401   ins_encode %{
21402     int opcode = this->ideal_Opcode();
21403     int vec_enc = vector_length_encoding(this);
21404     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21405                          $xtmp1$$XMMRegister, vec_enc);
21406   %}
21407   ins_pipe( pipe_slow );
21408 %}
21409 
21410 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21411   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21412   match(Set dst (SignumVF src (Binary zero one)));
21413   match(Set dst (SignumVD src (Binary zero one)));
21414   effect(TEMP dst, TEMP ktmp1);
21415   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21416   ins_encode %{
21417     int opcode = this->ideal_Opcode();
21418     int vec_enc = vector_length_encoding(this);
21419     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21420                           $ktmp1$$KRegister, vec_enc);
21421   %}
21422   ins_pipe( pipe_slow );
21423 %}
21424 
21425 // ---------------------------------------
21426 // For copySign use 0xE4 as writemask for vpternlog
21427 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21428 // C (xmm2) is set to 0x7FFFFFFF
21429 // Wherever xmm2 is 0, we want to pick from B (sign)
21430 // Wherever xmm2 is 1, we want to pick from A (src)
21431 //
21432 // A B C Result
21433 // 0 0 0 0
21434 // 0 0 1 0
21435 // 0 1 0 1
21436 // 0 1 1 0
21437 // 1 0 0 0
21438 // 1 0 1 1
21439 // 1 1 0 1
21440 // 1 1 1 1
21441 //
21442 // Result going from high bit to low bit is 0x11100100 = 0xe4
21443 // ---------------------------------------
21444 
21445 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21446   match(Set dst (CopySignF dst src));
21447   effect(TEMP tmp1, TEMP tmp2);
21448   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21449   ins_encode %{
21450     __ movl($tmp2$$Register, 0x7FFFFFFF);
21451     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21452     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21453   %}
21454   ins_pipe( pipe_slow );
21455 %}
21456 
21457 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21458   match(Set dst (CopySignD dst (Binary src zero)));
21459   ins_cost(100);
21460   effect(TEMP tmp1, TEMP tmp2);
21461   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21462   ins_encode %{
21463     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21464     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21465     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21466   %}
21467   ins_pipe( pipe_slow );
21468 %}
21469 
21470 //----------------------------- CompressBits/ExpandBits ------------------------
21471 
21472 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21473   predicate(n->bottom_type()->isa_int());
21474   match(Set dst (CompressBits src mask));
21475   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21476   ins_encode %{
21477     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21478   %}
21479   ins_pipe( pipe_slow );
21480 %}
21481 
21482 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21483   predicate(n->bottom_type()->isa_int());
21484   match(Set dst (ExpandBits src mask));
21485   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21486   ins_encode %{
21487     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21488   %}
21489   ins_pipe( pipe_slow );
21490 %}
21491 
21492 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21493   predicate(n->bottom_type()->isa_int());
21494   match(Set dst (CompressBits src (LoadI mask)));
21495   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21496   ins_encode %{
21497     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21498   %}
21499   ins_pipe( pipe_slow );
21500 %}
21501 
21502 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21503   predicate(n->bottom_type()->isa_int());
21504   match(Set dst (ExpandBits src (LoadI mask)));
21505   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21506   ins_encode %{
21507     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21508   %}
21509   ins_pipe( pipe_slow );
21510 %}
21511 
21512 // --------------------------------- Sqrt --------------------------------------
21513 
21514 instruct vsqrtF_reg(vec dst, vec src) %{
21515   match(Set dst (SqrtVF src));
21516   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21517   ins_encode %{
21518     assert(UseAVX > 0, "required");
21519     int vlen_enc = vector_length_encoding(this);
21520     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21521   %}
21522   ins_pipe( pipe_slow );
21523 %}
21524 
21525 instruct vsqrtF_mem(vec dst, memory mem) %{
21526   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21527   match(Set dst (SqrtVF (LoadVector mem)));
21528   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21529   ins_encode %{
21530     assert(UseAVX > 0, "required");
21531     int vlen_enc = vector_length_encoding(this);
21532     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21533   %}
21534   ins_pipe( pipe_slow );
21535 %}
21536 
21537 // Floating point vector sqrt
21538 instruct vsqrtD_reg(vec dst, vec src) %{
21539   match(Set dst (SqrtVD src));
21540   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21541   ins_encode %{
21542     assert(UseAVX > 0, "required");
21543     int vlen_enc = vector_length_encoding(this);
21544     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21545   %}
21546   ins_pipe( pipe_slow );
21547 %}
21548 
21549 instruct vsqrtD_mem(vec dst, memory mem) %{
21550   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21551   match(Set dst (SqrtVD (LoadVector mem)));
21552   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21553   ins_encode %{
21554     assert(UseAVX > 0, "required");
21555     int vlen_enc = vector_length_encoding(this);
21556     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21557   %}
21558   ins_pipe( pipe_slow );
21559 %}
21560 
21561 // ------------------------------ Shift ---------------------------------------
21562 
21563 // Left and right shift count vectors are the same on x86
21564 // (only lowest bits of xmm reg are used for count).
21565 instruct vshiftcnt(vec dst, rRegI cnt) %{
21566   match(Set dst (LShiftCntV cnt));
21567   match(Set dst (RShiftCntV cnt));
21568   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21569   ins_encode %{
21570     __ movdl($dst$$XMMRegister, $cnt$$Register);
21571   %}
21572   ins_pipe( pipe_slow );
21573 %}
21574 
21575 // Byte vector shift
21576 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21577   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21578   match(Set dst ( LShiftVB src shift));
21579   match(Set dst ( RShiftVB src shift));
21580   match(Set dst (URShiftVB src shift));
21581   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21582   format %{"vector_byte_shift $dst,$src,$shift" %}
21583   ins_encode %{
21584     assert(UseSSE > 3, "required");
21585     int opcode = this->ideal_Opcode();
21586     bool sign = (opcode != Op_URShiftVB);
21587     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21588     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21589     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21590     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21591     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21592   %}
21593   ins_pipe( pipe_slow );
21594 %}
21595 
21596 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21597   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21598             UseAVX <= 1);
21599   match(Set dst ( LShiftVB src shift));
21600   match(Set dst ( RShiftVB src shift));
21601   match(Set dst (URShiftVB src shift));
21602   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21603   format %{"vector_byte_shift $dst,$src,$shift" %}
21604   ins_encode %{
21605     assert(UseSSE > 3, "required");
21606     int opcode = this->ideal_Opcode();
21607     bool sign = (opcode != Op_URShiftVB);
21608     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21609     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21610     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21611     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21612     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21613     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21614     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21615     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21616     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21617   %}
21618   ins_pipe( pipe_slow );
21619 %}
21620 
21621 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21622   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21623             UseAVX > 1);
21624   match(Set dst ( LShiftVB src shift));
21625   match(Set dst ( RShiftVB src shift));
21626   match(Set dst (URShiftVB src shift));
21627   effect(TEMP dst, TEMP tmp);
21628   format %{"vector_byte_shift $dst,$src,$shift" %}
21629   ins_encode %{
21630     int opcode = this->ideal_Opcode();
21631     bool sign = (opcode != Op_URShiftVB);
21632     int vlen_enc = Assembler::AVX_256bit;
21633     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21634     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21635     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21636     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21637     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21638   %}
21639   ins_pipe( pipe_slow );
21640 %}
21641 
21642 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21643   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21644   match(Set dst ( LShiftVB src shift));
21645   match(Set dst ( RShiftVB src shift));
21646   match(Set dst (URShiftVB src shift));
21647   effect(TEMP dst, TEMP tmp);
21648   format %{"vector_byte_shift $dst,$src,$shift" %}
21649   ins_encode %{
21650     assert(UseAVX > 1, "required");
21651     int opcode = this->ideal_Opcode();
21652     bool sign = (opcode != Op_URShiftVB);
21653     int vlen_enc = Assembler::AVX_256bit;
21654     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21655     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21656     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21657     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21658     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21659     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21660     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21661     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21662     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21663   %}
21664   ins_pipe( pipe_slow );
21665 %}
21666 
21667 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21668   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21669   match(Set dst ( LShiftVB src shift));
21670   match(Set dst  (RShiftVB src shift));
21671   match(Set dst (URShiftVB src shift));
21672   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21673   format %{"vector_byte_shift $dst,$src,$shift" %}
21674   ins_encode %{
21675     assert(UseAVX > 2, "required");
21676     int opcode = this->ideal_Opcode();
21677     bool sign = (opcode != Op_URShiftVB);
21678     int vlen_enc = Assembler::AVX_512bit;
21679     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21680     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21681     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21682     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21683     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21684     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21685     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21686     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21687     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21688     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21689     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21690     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21691   %}
21692   ins_pipe( pipe_slow );
21693 %}
21694 
21695 // Shorts vector logical right shift produces incorrect Java result
21696 // for negative data because java code convert short value into int with
21697 // sign extension before a shift. But char vectors are fine since chars are
21698 // unsigned values.
21699 // Shorts/Chars vector left shift
21700 instruct vshiftS(vec dst, vec src, vec shift) %{
21701   predicate(!n->as_ShiftV()->is_var_shift());
21702   match(Set dst ( LShiftVS src shift));
21703   match(Set dst ( RShiftVS src shift));
21704   match(Set dst (URShiftVS src shift));
21705   effect(TEMP dst, USE src, USE shift);
21706   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21707   ins_encode %{
21708     int opcode = this->ideal_Opcode();
21709     if (UseAVX > 0) {
21710       int vlen_enc = vector_length_encoding(this);
21711       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21712     } else {
21713       int vlen = Matcher::vector_length(this);
21714       if (vlen == 2) {
21715         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21716         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21717       } else if (vlen == 4) {
21718         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21719         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21720       } else {
21721         assert (vlen == 8, "sanity");
21722         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21723         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21724       }
21725     }
21726   %}
21727   ins_pipe( pipe_slow );
21728 %}
21729 
21730 // Integers vector left shift
21731 instruct vshiftI(vec dst, vec src, vec shift) %{
21732   predicate(!n->as_ShiftV()->is_var_shift());
21733   match(Set dst ( LShiftVI src shift));
21734   match(Set dst ( RShiftVI src shift));
21735   match(Set dst (URShiftVI src shift));
21736   effect(TEMP dst, USE src, USE shift);
21737   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21738   ins_encode %{
21739     int opcode = this->ideal_Opcode();
21740     if (UseAVX > 0) {
21741       int vlen_enc = vector_length_encoding(this);
21742       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21743     } else {
21744       int vlen = Matcher::vector_length(this);
21745       if (vlen == 2) {
21746         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21747         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21748       } else {
21749         assert(vlen == 4, "sanity");
21750         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21751         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21752       }
21753     }
21754   %}
21755   ins_pipe( pipe_slow );
21756 %}
21757 
21758 // Integers vector left constant shift
21759 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21760   match(Set dst (LShiftVI src (LShiftCntV shift)));
21761   match(Set dst (RShiftVI src (RShiftCntV shift)));
21762   match(Set dst (URShiftVI src (RShiftCntV shift)));
21763   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21764   ins_encode %{
21765     int opcode = this->ideal_Opcode();
21766     if (UseAVX > 0) {
21767       int vector_len = vector_length_encoding(this);
21768       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21769     } else {
21770       int vlen = Matcher::vector_length(this);
21771       if (vlen == 2) {
21772         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21773         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21774       } else {
21775         assert(vlen == 4, "sanity");
21776         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21777         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21778       }
21779     }
21780   %}
21781   ins_pipe( pipe_slow );
21782 %}
21783 
21784 // Longs vector shift
21785 instruct vshiftL(vec dst, vec src, vec shift) %{
21786   predicate(!n->as_ShiftV()->is_var_shift());
21787   match(Set dst ( LShiftVL src shift));
21788   match(Set dst (URShiftVL src shift));
21789   effect(TEMP dst, USE src, USE shift);
21790   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21791   ins_encode %{
21792     int opcode = this->ideal_Opcode();
21793     if (UseAVX > 0) {
21794       int vlen_enc = vector_length_encoding(this);
21795       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21796     } else {
21797       assert(Matcher::vector_length(this) == 2, "");
21798       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21799       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21800     }
21801   %}
21802   ins_pipe( pipe_slow );
21803 %}
21804 
21805 // Longs vector constant shift
21806 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21807   match(Set dst (LShiftVL src (LShiftCntV shift)));
21808   match(Set dst (URShiftVL src (RShiftCntV shift)));
21809   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21810   ins_encode %{
21811     int opcode = this->ideal_Opcode();
21812     if (UseAVX > 0) {
21813       int vector_len = vector_length_encoding(this);
21814       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21815     } else {
21816       assert(Matcher::vector_length(this) == 2, "");
21817       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21818       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21819     }
21820   %}
21821   ins_pipe( pipe_slow );
21822 %}
21823 
21824 // -------------------ArithmeticRightShift -----------------------------------
21825 // Long vector arithmetic right shift
21826 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21827   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21828   match(Set dst (RShiftVL src shift));
21829   effect(TEMP dst, TEMP tmp);
21830   format %{ "vshiftq $dst,$src,$shift" %}
21831   ins_encode %{
21832     uint vlen = Matcher::vector_length(this);
21833     if (vlen == 2) {
21834       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21835       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21836       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21837       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21838       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21839       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21840     } else {
21841       assert(vlen == 4, "sanity");
21842       assert(UseAVX > 1, "required");
21843       int vlen_enc = Assembler::AVX_256bit;
21844       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21845       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21846       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21847       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21848       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21849     }
21850   %}
21851   ins_pipe( pipe_slow );
21852 %}
21853 
21854 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21855   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21856   match(Set dst (RShiftVL src shift));
21857   format %{ "vshiftq $dst,$src,$shift" %}
21858   ins_encode %{
21859     int vlen_enc = vector_length_encoding(this);
21860     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21861   %}
21862   ins_pipe( pipe_slow );
21863 %}
21864 
21865 // ------------------- Variable Shift -----------------------------
21866 // Byte variable shift
21867 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21868   predicate(Matcher::vector_length(n) <= 8 &&
21869             n->as_ShiftV()->is_var_shift() &&
21870             !VM_Version::supports_avx512bw());
21871   match(Set dst ( LShiftVB src shift));
21872   match(Set dst ( RShiftVB src shift));
21873   match(Set dst (URShiftVB src shift));
21874   effect(TEMP dst, TEMP vtmp);
21875   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21876   ins_encode %{
21877     assert(UseAVX >= 2, "required");
21878 
21879     int opcode = this->ideal_Opcode();
21880     int vlen_enc = Assembler::AVX_128bit;
21881     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21882     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21883   %}
21884   ins_pipe( pipe_slow );
21885 %}
21886 
21887 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21888   predicate(Matcher::vector_length(n) == 16 &&
21889             n->as_ShiftV()->is_var_shift() &&
21890             !VM_Version::supports_avx512bw());
21891   match(Set dst ( LShiftVB src shift));
21892   match(Set dst ( RShiftVB src shift));
21893   match(Set dst (URShiftVB src shift));
21894   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21895   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21896   ins_encode %{
21897     assert(UseAVX >= 2, "required");
21898 
21899     int opcode = this->ideal_Opcode();
21900     int vlen_enc = Assembler::AVX_128bit;
21901     // Shift lower half and get word result in dst
21902     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21903 
21904     // Shift upper half and get word result in vtmp1
21905     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21906     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21907     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21908 
21909     // Merge and down convert the two word results to byte in dst
21910     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21911   %}
21912   ins_pipe( pipe_slow );
21913 %}
21914 
21915 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21916   predicate(Matcher::vector_length(n) == 32 &&
21917             n->as_ShiftV()->is_var_shift() &&
21918             !VM_Version::supports_avx512bw());
21919   match(Set dst ( LShiftVB src shift));
21920   match(Set dst ( RShiftVB src shift));
21921   match(Set dst (URShiftVB src shift));
21922   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21923   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21924   ins_encode %{
21925     assert(UseAVX >= 2, "required");
21926 
21927     int opcode = this->ideal_Opcode();
21928     int vlen_enc = Assembler::AVX_128bit;
21929     // Process lower 128 bits and get result in dst
21930     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21931     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21932     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21933     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21934     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21935 
21936     // Process higher 128 bits and get result in vtmp3
21937     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21938     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21939     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21940     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21941     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21942     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21943     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21944 
21945     // Merge the two results in dst
21946     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21947   %}
21948   ins_pipe( pipe_slow );
21949 %}
21950 
21951 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21952   predicate(Matcher::vector_length(n) <= 32 &&
21953             n->as_ShiftV()->is_var_shift() &&
21954             VM_Version::supports_avx512bw());
21955   match(Set dst ( LShiftVB src shift));
21956   match(Set dst ( RShiftVB src shift));
21957   match(Set dst (URShiftVB src shift));
21958   effect(TEMP dst, TEMP vtmp);
21959   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21960   ins_encode %{
21961     assert(UseAVX > 2, "required");
21962 
21963     int opcode = this->ideal_Opcode();
21964     int vlen_enc = vector_length_encoding(this);
21965     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21966   %}
21967   ins_pipe( pipe_slow );
21968 %}
21969 
21970 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21971   predicate(Matcher::vector_length(n) == 64 &&
21972             n->as_ShiftV()->is_var_shift() &&
21973             VM_Version::supports_avx512bw());
21974   match(Set dst ( LShiftVB src shift));
21975   match(Set dst ( RShiftVB src shift));
21976   match(Set dst (URShiftVB src shift));
21977   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21978   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21979   ins_encode %{
21980     assert(UseAVX > 2, "required");
21981 
21982     int opcode = this->ideal_Opcode();
21983     int vlen_enc = Assembler::AVX_256bit;
21984     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21985     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21986     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21987     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21988     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21989   %}
21990   ins_pipe( pipe_slow );
21991 %}
21992 
21993 // Short variable shift
21994 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21995   predicate(Matcher::vector_length(n) <= 8 &&
21996             n->as_ShiftV()->is_var_shift() &&
21997             !VM_Version::supports_avx512bw());
21998   match(Set dst ( LShiftVS src shift));
21999   match(Set dst ( RShiftVS src shift));
22000   match(Set dst (URShiftVS src shift));
22001   effect(TEMP dst, TEMP vtmp);
22002   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22003   ins_encode %{
22004     assert(UseAVX >= 2, "required");
22005 
22006     int opcode = this->ideal_Opcode();
22007     bool sign = (opcode != Op_URShiftVS);
22008     int vlen_enc = Assembler::AVX_256bit;
22009     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22010     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22011     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22012     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22013     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22014     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22015   %}
22016   ins_pipe( pipe_slow );
22017 %}
22018 
22019 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22020   predicate(Matcher::vector_length(n) == 16 &&
22021             n->as_ShiftV()->is_var_shift() &&
22022             !VM_Version::supports_avx512bw());
22023   match(Set dst ( LShiftVS src shift));
22024   match(Set dst ( RShiftVS src shift));
22025   match(Set dst (URShiftVS src shift));
22026   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22027   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22028   ins_encode %{
22029     assert(UseAVX >= 2, "required");
22030 
22031     int opcode = this->ideal_Opcode();
22032     bool sign = (opcode != Op_URShiftVS);
22033     int vlen_enc = Assembler::AVX_256bit;
22034     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22035     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22037     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22038     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22039 
22040     // Shift upper half, with result in dst using vtmp1 as TEMP
22041     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22042     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22043     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22044     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22045     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22046     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22047 
22048     // Merge lower and upper half result into dst
22049     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22050     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22051   %}
22052   ins_pipe( pipe_slow );
22053 %}
22054 
22055 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22056   predicate(n->as_ShiftV()->is_var_shift() &&
22057             VM_Version::supports_avx512bw());
22058   match(Set dst ( LShiftVS src shift));
22059   match(Set dst ( RShiftVS src shift));
22060   match(Set dst (URShiftVS src shift));
22061   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22062   ins_encode %{
22063     assert(UseAVX > 2, "required");
22064 
22065     int opcode = this->ideal_Opcode();
22066     int vlen_enc = vector_length_encoding(this);
22067     if (!VM_Version::supports_avx512vl()) {
22068       vlen_enc = Assembler::AVX_512bit;
22069     }
22070     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22071   %}
22072   ins_pipe( pipe_slow );
22073 %}
22074 
22075 //Integer variable shift
22076 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22077   predicate(n->as_ShiftV()->is_var_shift());
22078   match(Set dst ( LShiftVI src shift));
22079   match(Set dst ( RShiftVI src shift));
22080   match(Set dst (URShiftVI src shift));
22081   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22082   ins_encode %{
22083     assert(UseAVX >= 2, "required");
22084 
22085     int opcode = this->ideal_Opcode();
22086     int vlen_enc = vector_length_encoding(this);
22087     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22088   %}
22089   ins_pipe( pipe_slow );
22090 %}
22091 
22092 //Long variable shift
22093 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22094   predicate(n->as_ShiftV()->is_var_shift());
22095   match(Set dst ( LShiftVL src shift));
22096   match(Set dst (URShiftVL src shift));
22097   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22098   ins_encode %{
22099     assert(UseAVX >= 2, "required");
22100 
22101     int opcode = this->ideal_Opcode();
22102     int vlen_enc = vector_length_encoding(this);
22103     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22104   %}
22105   ins_pipe( pipe_slow );
22106 %}
22107 
22108 //Long variable right shift arithmetic
22109 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22110   predicate(Matcher::vector_length(n) <= 4 &&
22111             n->as_ShiftV()->is_var_shift() &&
22112             UseAVX == 2);
22113   match(Set dst (RShiftVL src shift));
22114   effect(TEMP dst, TEMP vtmp);
22115   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22116   ins_encode %{
22117     int opcode = this->ideal_Opcode();
22118     int vlen_enc = vector_length_encoding(this);
22119     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22120                  $vtmp$$XMMRegister);
22121   %}
22122   ins_pipe( pipe_slow );
22123 %}
22124 
22125 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22126   predicate(n->as_ShiftV()->is_var_shift() &&
22127             UseAVX > 2);
22128   match(Set dst (RShiftVL src shift));
22129   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22130   ins_encode %{
22131     int opcode = this->ideal_Opcode();
22132     int vlen_enc = vector_length_encoding(this);
22133     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22134   %}
22135   ins_pipe( pipe_slow );
22136 %}
22137 
22138 // --------------------------------- AND --------------------------------------
22139 
22140 instruct vand(vec dst, vec src) %{
22141   predicate(UseAVX == 0);
22142   match(Set dst (AndV dst src));
22143   format %{ "pand    $dst,$src\t! and vectors" %}
22144   ins_encode %{
22145     __ pand($dst$$XMMRegister, $src$$XMMRegister);
22146   %}
22147   ins_pipe( pipe_slow );
22148 %}
22149 
22150 instruct vand_reg(vec dst, vec src1, vec src2) %{
22151   predicate(UseAVX > 0);
22152   match(Set dst (AndV src1 src2));
22153   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22154   ins_encode %{
22155     int vlen_enc = vector_length_encoding(this);
22156     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22157   %}
22158   ins_pipe( pipe_slow );
22159 %}
22160 
22161 instruct vand_mem(vec dst, vec src, memory mem) %{
22162   predicate((UseAVX > 0) &&
22163             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22164   match(Set dst (AndV src (LoadVector mem)));
22165   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22166   ins_encode %{
22167     int vlen_enc = vector_length_encoding(this);
22168     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22169   %}
22170   ins_pipe( pipe_slow );
22171 %}
22172 
22173 // --------------------------------- OR ---------------------------------------
22174 
22175 instruct vor(vec dst, vec src) %{
22176   predicate(UseAVX == 0);
22177   match(Set dst (OrV dst src));
22178   format %{ "por     $dst,$src\t! or vectors" %}
22179   ins_encode %{
22180     __ por($dst$$XMMRegister, $src$$XMMRegister);
22181   %}
22182   ins_pipe( pipe_slow );
22183 %}
22184 
22185 instruct vor_reg(vec dst, vec src1, vec src2) %{
22186   predicate(UseAVX > 0);
22187   match(Set dst (OrV src1 src2));
22188   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22189   ins_encode %{
22190     int vlen_enc = vector_length_encoding(this);
22191     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22192   %}
22193   ins_pipe( pipe_slow );
22194 %}
22195 
22196 instruct vor_mem(vec dst, vec src, memory mem) %{
22197   predicate((UseAVX > 0) &&
22198             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22199   match(Set dst (OrV src (LoadVector mem)));
22200   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22201   ins_encode %{
22202     int vlen_enc = vector_length_encoding(this);
22203     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22204   %}
22205   ins_pipe( pipe_slow );
22206 %}
22207 
22208 // --------------------------------- XOR --------------------------------------
22209 
22210 instruct vxor(vec dst, vec src) %{
22211   predicate(UseAVX == 0);
22212   match(Set dst (XorV dst src));
22213   format %{ "pxor    $dst,$src\t! xor vectors" %}
22214   ins_encode %{
22215     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22216   %}
22217   ins_pipe( pipe_slow );
22218 %}
22219 
22220 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22221   predicate(UseAVX > 0);
22222   match(Set dst (XorV src1 src2));
22223   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22224   ins_encode %{
22225     int vlen_enc = vector_length_encoding(this);
22226     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22227   %}
22228   ins_pipe( pipe_slow );
22229 %}
22230 
22231 instruct vxor_mem(vec dst, vec src, memory mem) %{
22232   predicate((UseAVX > 0) &&
22233             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22234   match(Set dst (XorV src (LoadVector mem)));
22235   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22236   ins_encode %{
22237     int vlen_enc = vector_length_encoding(this);
22238     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22239   %}
22240   ins_pipe( pipe_slow );
22241 %}
22242 
22243 // --------------------------------- VectorCast --------------------------------------
22244 
22245 instruct vcastBtoX(vec dst, vec src) %{
22246   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22247   match(Set dst (VectorCastB2X src));
22248   format %{ "vector_cast_b2x $dst,$src\t!" %}
22249   ins_encode %{
22250     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22251     int vlen_enc = vector_length_encoding(this);
22252     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22253   %}
22254   ins_pipe( pipe_slow );
22255 %}
22256 
22257 instruct vcastBtoD(legVec dst, legVec src) %{
22258   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22259   match(Set dst (VectorCastB2X src));
22260   format %{ "vector_cast_b2x $dst,$src\t!" %}
22261   ins_encode %{
22262     int vlen_enc = vector_length_encoding(this);
22263     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22264   %}
22265   ins_pipe( pipe_slow );
22266 %}
22267 
22268 instruct castStoX(vec dst, vec src) %{
22269   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22270             Matcher::vector_length(n->in(1)) <= 8 && // src
22271             Matcher::vector_element_basic_type(n) == T_BYTE);
22272   match(Set dst (VectorCastS2X src));
22273   format %{ "vector_cast_s2x $dst,$src" %}
22274   ins_encode %{
22275     assert(UseAVX > 0, "required");
22276 
22277     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22278     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22279   %}
22280   ins_pipe( pipe_slow );
22281 %}
22282 
22283 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22284   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22285             Matcher::vector_length(n->in(1)) == 16 && // src
22286             Matcher::vector_element_basic_type(n) == T_BYTE);
22287   effect(TEMP dst, TEMP vtmp);
22288   match(Set dst (VectorCastS2X src));
22289   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22290   ins_encode %{
22291     assert(UseAVX > 0, "required");
22292 
22293     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22294     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22295     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22296     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22297   %}
22298   ins_pipe( pipe_slow );
22299 %}
22300 
22301 instruct vcastStoX_evex(vec dst, vec src) %{
22302   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22303             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22304   match(Set dst (VectorCastS2X src));
22305   format %{ "vector_cast_s2x $dst,$src\t!" %}
22306   ins_encode %{
22307     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22308     int src_vlen_enc = vector_length_encoding(this, $src);
22309     int vlen_enc = vector_length_encoding(this);
22310     switch (to_elem_bt) {
22311       case T_BYTE:
22312         if (!VM_Version::supports_avx512vl()) {
22313           vlen_enc = Assembler::AVX_512bit;
22314         }
22315         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22316         break;
22317       case T_INT:
22318         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22319         break;
22320       case T_FLOAT:
22321         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22322         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22323         break;
22324       case T_LONG:
22325         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22326         break;
22327       case T_DOUBLE: {
22328         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22329         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22330         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22331         break;
22332       }
22333       default:
22334         ShouldNotReachHere();
22335     }
22336   %}
22337   ins_pipe( pipe_slow );
22338 %}
22339 
22340 instruct castItoX(vec dst, vec src) %{
22341   predicate(UseAVX <= 2 &&
22342             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22343             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22344   match(Set dst (VectorCastI2X src));
22345   format %{ "vector_cast_i2x $dst,$src" %}
22346   ins_encode %{
22347     assert(UseAVX > 0, "required");
22348 
22349     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22350     int vlen_enc = vector_length_encoding(this, $src);
22351 
22352     if (to_elem_bt == T_BYTE) {
22353       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22354       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22355       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22356     } else {
22357       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22358       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22359       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22360     }
22361   %}
22362   ins_pipe( pipe_slow );
22363 %}
22364 
22365 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22366   predicate(UseAVX <= 2 &&
22367             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22368             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22369   match(Set dst (VectorCastI2X src));
22370   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22371   effect(TEMP dst, TEMP vtmp);
22372   ins_encode %{
22373     assert(UseAVX > 0, "required");
22374 
22375     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22376     int vlen_enc = vector_length_encoding(this, $src);
22377 
22378     if (to_elem_bt == T_BYTE) {
22379       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22380       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22381       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22382       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22383     } else {
22384       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22385       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22386       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22387       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22388     }
22389   %}
22390   ins_pipe( pipe_slow );
22391 %}
22392 
22393 instruct vcastItoX_evex(vec dst, vec src) %{
22394   predicate(UseAVX > 2 ||
22395             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22396   match(Set dst (VectorCastI2X src));
22397   format %{ "vector_cast_i2x $dst,$src\t!" %}
22398   ins_encode %{
22399     assert(UseAVX > 0, "required");
22400 
22401     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22402     int src_vlen_enc = vector_length_encoding(this, $src);
22403     int dst_vlen_enc = vector_length_encoding(this);
22404     switch (dst_elem_bt) {
22405       case T_BYTE:
22406         if (!VM_Version::supports_avx512vl()) {
22407           src_vlen_enc = Assembler::AVX_512bit;
22408         }
22409         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22410         break;
22411       case T_SHORT:
22412         if (!VM_Version::supports_avx512vl()) {
22413           src_vlen_enc = Assembler::AVX_512bit;
22414         }
22415         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22416         break;
22417       case T_FLOAT:
22418         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22419         break;
22420       case T_LONG:
22421         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22422         break;
22423       case T_DOUBLE:
22424         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22425         break;
22426       default:
22427         ShouldNotReachHere();
22428     }
22429   %}
22430   ins_pipe( pipe_slow );
22431 %}
22432 
22433 instruct vcastLtoBS(vec dst, vec src) %{
22434   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22435             UseAVX <= 2);
22436   match(Set dst (VectorCastL2X src));
22437   format %{ "vector_cast_l2x  $dst,$src" %}
22438   ins_encode %{
22439     assert(UseAVX > 0, "required");
22440 
22441     int vlen = Matcher::vector_length_in_bytes(this, $src);
22442     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22443     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22444                                                       : ExternalAddress(vector_int_to_short_mask());
22445     if (vlen <= 16) {
22446       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22447       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22448       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22449     } else {
22450       assert(vlen <= 32, "required");
22451       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22452       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22453       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22454       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22455     }
22456     if (to_elem_bt == T_BYTE) {
22457       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22458     }
22459   %}
22460   ins_pipe( pipe_slow );
22461 %}
22462 
22463 instruct vcastLtoX_evex(vec dst, vec src) %{
22464   predicate(UseAVX > 2 ||
22465             (Matcher::vector_element_basic_type(n) == T_INT ||
22466              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22467              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22468   match(Set dst (VectorCastL2X src));
22469   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22470   ins_encode %{
22471     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22472     int vlen = Matcher::vector_length_in_bytes(this, $src);
22473     int vlen_enc = vector_length_encoding(this, $src);
22474     switch (to_elem_bt) {
22475       case T_BYTE:
22476         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22477           vlen_enc = Assembler::AVX_512bit;
22478         }
22479         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22480         break;
22481       case T_SHORT:
22482         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22483           vlen_enc = Assembler::AVX_512bit;
22484         }
22485         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22486         break;
22487       case T_INT:
22488         if (vlen == 8) {
22489           if ($dst$$XMMRegister != $src$$XMMRegister) {
22490             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22491           }
22492         } else if (vlen == 16) {
22493           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22494         } else if (vlen == 32) {
22495           if (UseAVX > 2) {
22496             if (!VM_Version::supports_avx512vl()) {
22497               vlen_enc = Assembler::AVX_512bit;
22498             }
22499             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22500           } else {
22501             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22502             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22503           }
22504         } else { // vlen == 64
22505           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22506         }
22507         break;
22508       case T_FLOAT:
22509         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22510         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22511         break;
22512       case T_DOUBLE:
22513         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22514         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22515         break;
22516 
22517       default: assert(false, "%s", type2name(to_elem_bt));
22518     }
22519   %}
22520   ins_pipe( pipe_slow );
22521 %}
22522 
22523 instruct vcastFtoD_reg(vec dst, vec src) %{
22524   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22525   match(Set dst (VectorCastF2X src));
22526   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22527   ins_encode %{
22528     int vlen_enc = vector_length_encoding(this);
22529     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22530   %}
22531   ins_pipe( pipe_slow );
22532 %}
22533 
22534 
22535 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22536   predicate(!VM_Version::supports_avx10_2() &&
22537             !VM_Version::supports_avx512vl() &&
22538             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22539             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22540             is_integral_type(Matcher::vector_element_basic_type(n)));
22541   match(Set dst (VectorCastF2X src));
22542   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22543   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22544   ins_encode %{
22545     int vlen_enc = vector_length_encoding(this, $src);
22546     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22547     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22548     // 32 bit addresses for register indirect addressing mode since stub constants
22549     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22550     // However, targets are free to increase this limit, but having a large code cache size
22551     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22552     // cap we save a temporary register allocation which in limiting case can prevent
22553     // spilling in high register pressure blocks.
22554     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22555                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22556                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22557   %}
22558   ins_pipe( pipe_slow );
22559 %}
22560 
22561 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22562   predicate(!VM_Version::supports_avx10_2() &&
22563             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22564             is_integral_type(Matcher::vector_element_basic_type(n)));
22565   match(Set dst (VectorCastF2X src));
22566   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22567   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22568   ins_encode %{
22569     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22570     if (to_elem_bt == T_LONG) {
22571       int vlen_enc = vector_length_encoding(this);
22572       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22573                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22574                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22575     } else {
22576       int vlen_enc = vector_length_encoding(this, $src);
22577       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22578                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22579                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22580     }
22581   %}
22582   ins_pipe( pipe_slow );
22583 %}
22584 
22585 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22586   predicate(VM_Version::supports_avx10_2() &&
22587             is_integral_type(Matcher::vector_element_basic_type(n)));
22588   match(Set dst (VectorCastF2X src));
22589   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22590   ins_encode %{
22591     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22592     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22593     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22594   %}
22595   ins_pipe( pipe_slow );
22596 %}
22597 
22598 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22599   predicate(VM_Version::supports_avx10_2() &&
22600             is_integral_type(Matcher::vector_element_basic_type(n)));
22601   match(Set dst (VectorCastF2X (LoadVector src)));
22602   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22603   ins_encode %{
22604     int vlen = Matcher::vector_length(this);
22605     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22606     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22607     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22608   %}
22609   ins_pipe( pipe_slow );
22610 %}
22611 
22612 instruct vcastDtoF_reg(vec dst, vec src) %{
22613   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22614   match(Set dst (VectorCastD2X src));
22615   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22616   ins_encode %{
22617     int vlen_enc = vector_length_encoding(this, $src);
22618     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619   %}
22620   ins_pipe( pipe_slow );
22621 %}
22622 
22623 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22624   predicate(!VM_Version::supports_avx10_2() &&
22625             !VM_Version::supports_avx512vl() &&
22626             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22627             is_integral_type(Matcher::vector_element_basic_type(n)));
22628   match(Set dst (VectorCastD2X src));
22629   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22630   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22631   ins_encode %{
22632     int vlen_enc = vector_length_encoding(this, $src);
22633     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22634     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22635                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22636                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22637   %}
22638   ins_pipe( pipe_slow );
22639 %}
22640 
22641 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22642   predicate(!VM_Version::supports_avx10_2() &&
22643             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22644             is_integral_type(Matcher::vector_element_basic_type(n)));
22645   match(Set dst (VectorCastD2X src));
22646   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22647   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22648   ins_encode %{
22649     int vlen_enc = vector_length_encoding(this, $src);
22650     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22651     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22652                               ExternalAddress(vector_float_signflip());
22653     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22654                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22655   %}
22656   ins_pipe( pipe_slow );
22657 %}
22658 
22659 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22660   predicate(VM_Version::supports_avx10_2() &&
22661             is_integral_type(Matcher::vector_element_basic_type(n)));
22662   match(Set dst (VectorCastD2X src));
22663   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22664   ins_encode %{
22665     int vlen_enc = vector_length_encoding(this, $src);
22666     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22667     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22668   %}
22669   ins_pipe( pipe_slow );
22670 %}
22671 
22672 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22673   predicate(VM_Version::supports_avx10_2() &&
22674             is_integral_type(Matcher::vector_element_basic_type(n)));
22675   match(Set dst (VectorCastD2X (LoadVector src)));
22676   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22677   ins_encode %{
22678     int vlen = Matcher::vector_length(this);
22679     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22680     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22681     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22682   %}
22683   ins_pipe( pipe_slow );
22684 %}
22685 
22686 instruct vucast(vec dst, vec src) %{
22687   match(Set dst (VectorUCastB2X src));
22688   match(Set dst (VectorUCastS2X src));
22689   match(Set dst (VectorUCastI2X src));
22690   format %{ "vector_ucast $dst,$src\t!" %}
22691   ins_encode %{
22692     assert(UseAVX > 0, "required");
22693 
22694     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22695     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22696     int vlen_enc = vector_length_encoding(this);
22697     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22698   %}
22699   ins_pipe( pipe_slow );
22700 %}
22701 
22702 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22703   predicate(!VM_Version::supports_avx512vl() &&
22704             Matcher::vector_length_in_bytes(n) < 64 &&
22705             Matcher::vector_element_basic_type(n) == T_INT);
22706   match(Set dst (RoundVF src));
22707   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22708   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22709   ins_encode %{
22710     int vlen_enc = vector_length_encoding(this);
22711     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22712     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22713                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22714                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22715   %}
22716   ins_pipe( pipe_slow );
22717 %}
22718 
22719 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22720   predicate((VM_Version::supports_avx512vl() ||
22721              Matcher::vector_length_in_bytes(n) == 64) &&
22722              Matcher::vector_element_basic_type(n) == T_INT);
22723   match(Set dst (RoundVF src));
22724   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22725   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22726   ins_encode %{
22727     int vlen_enc = vector_length_encoding(this);
22728     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22729     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22730                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22731                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22732   %}
22733   ins_pipe( pipe_slow );
22734 %}
22735 
22736 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22737   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22738   match(Set dst (RoundVD src));
22739   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22740   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22741   ins_encode %{
22742     int vlen_enc = vector_length_encoding(this);
22743     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22744     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22745                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22746                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22747   %}
22748   ins_pipe( pipe_slow );
22749 %}
22750 
22751 // --------------------------------- VectorMaskCmp --------------------------------------
22752 
22753 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22754   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22755             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22756             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22757             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22758   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22759   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22760   ins_encode %{
22761     int vlen_enc = vector_length_encoding(this, $src1);
22762     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22763     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22764       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22765     } else {
22766       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22767     }
22768   %}
22769   ins_pipe( pipe_slow );
22770 %}
22771 
22772 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22773   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22774             n->bottom_type()->isa_vectmask() == nullptr &&
22775             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22776   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22777   effect(TEMP ktmp);
22778   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22779   ins_encode %{
22780     int vlen_enc = Assembler::AVX_512bit;
22781     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22782     KRegister mask = k0; // The comparison itself is not being masked.
22783     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22784       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22785       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22786     } else {
22787       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22788       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22789     }
22790   %}
22791   ins_pipe( pipe_slow );
22792 %}
22793 
22794 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22795   predicate(n->bottom_type()->isa_vectmask() &&
22796             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22797   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22798   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22799   ins_encode %{
22800     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22801     int vlen_enc = vector_length_encoding(this, $src1);
22802     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22803     KRegister mask = k0; // The comparison itself is not being masked.
22804     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22805       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22806     } else {
22807       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22808     }
22809   %}
22810   ins_pipe( pipe_slow );
22811 %}
22812 
22813 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22814   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22815             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22816             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22817             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22818             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22819             (n->in(2)->get_int() == BoolTest::eq ||
22820              n->in(2)->get_int() == BoolTest::lt ||
22821              n->in(2)->get_int() == BoolTest::gt)); // cond
22822   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22823   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22824   ins_encode %{
22825     int vlen_enc = vector_length_encoding(this, $src1);
22826     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22827     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22828     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22829   %}
22830   ins_pipe( pipe_slow );
22831 %}
22832 
22833 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22834   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22835             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22836             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22837             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22838             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22839             (n->in(2)->get_int() == BoolTest::ne ||
22840              n->in(2)->get_int() == BoolTest::le ||
22841              n->in(2)->get_int() == BoolTest::ge)); // cond
22842   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22843   effect(TEMP dst, TEMP xtmp);
22844   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22845   ins_encode %{
22846     int vlen_enc = vector_length_encoding(this, $src1);
22847     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22848     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22849     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22850   %}
22851   ins_pipe( pipe_slow );
22852 %}
22853 
22854 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22855   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22856             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22857             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22858             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22859             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22860   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22861   effect(TEMP dst, TEMP xtmp);
22862   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22863   ins_encode %{
22864     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22865     int vlen_enc = vector_length_encoding(this, $src1);
22866     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22867     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22868 
22869     if (vlen_enc == Assembler::AVX_128bit) {
22870       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22871     } else {
22872       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22873     }
22874     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22875     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22876     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22877   %}
22878   ins_pipe( pipe_slow );
22879 %}
22880 
22881 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22882   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22883              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22884              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22885   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22886   effect(TEMP ktmp);
22887   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22888   ins_encode %{
22889     assert(UseAVX > 2, "required");
22890 
22891     int vlen_enc = vector_length_encoding(this, $src1);
22892     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22893     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22894     KRegister mask = k0; // The comparison itself is not being masked.
22895     bool merge = false;
22896     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22897 
22898     switch (src1_elem_bt) {
22899       case T_INT: {
22900         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22901         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22902         break;
22903       }
22904       case T_LONG: {
22905         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22906         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22907         break;
22908       }
22909       default: assert(false, "%s", type2name(src1_elem_bt));
22910     }
22911   %}
22912   ins_pipe( pipe_slow );
22913 %}
22914 
22915 
22916 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22917   predicate(n->bottom_type()->isa_vectmask() &&
22918             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22919   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22920   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22921   ins_encode %{
22922     assert(UseAVX > 2, "required");
22923     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22924 
22925     int vlen_enc = vector_length_encoding(this, $src1);
22926     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22927     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22928     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22929 
22930     // Comparison i
22931     switch (src1_elem_bt) {
22932       case T_BYTE: {
22933         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22934         break;
22935       }
22936       case T_SHORT: {
22937         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22938         break;
22939       }
22940       case T_INT: {
22941         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22942         break;
22943       }
22944       case T_LONG: {
22945         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22946         break;
22947       }
22948       default: assert(false, "%s", type2name(src1_elem_bt));
22949     }
22950   %}
22951   ins_pipe( pipe_slow );
22952 %}
22953 
22954 // Extract
22955 
22956 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22957   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22958   match(Set dst (ExtractI src idx));
22959   match(Set dst (ExtractS src idx));
22960   match(Set dst (ExtractB src idx));
22961   format %{ "extractI $dst,$src,$idx\t!" %}
22962   ins_encode %{
22963     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22964 
22965     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22966     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22967   %}
22968   ins_pipe( pipe_slow );
22969 %}
22970 
22971 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22972   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22973             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22974   match(Set dst (ExtractI src idx));
22975   match(Set dst (ExtractS src idx));
22976   match(Set dst (ExtractB src idx));
22977   effect(TEMP vtmp);
22978   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22979   ins_encode %{
22980     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22981 
22982     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22983     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22984     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22985   %}
22986   ins_pipe( pipe_slow );
22987 %}
22988 
22989 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22990   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22991   match(Set dst (ExtractL src idx));
22992   format %{ "extractL $dst,$src,$idx\t!" %}
22993   ins_encode %{
22994     assert(UseSSE >= 4, "required");
22995     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22996 
22997     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22998   %}
22999   ins_pipe( pipe_slow );
23000 %}
23001 
23002 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23003   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23004             Matcher::vector_length(n->in(1)) == 8);  // src
23005   match(Set dst (ExtractL src idx));
23006   effect(TEMP vtmp);
23007   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23008   ins_encode %{
23009     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23010 
23011     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23012     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23013   %}
23014   ins_pipe( pipe_slow );
23015 %}
23016 
23017 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23018   predicate(Matcher::vector_length(n->in(1)) <= 4);
23019   match(Set dst (ExtractF src idx));
23020   effect(TEMP dst, TEMP vtmp);
23021   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23022   ins_encode %{
23023     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23024 
23025     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23026   %}
23027   ins_pipe( pipe_slow );
23028 %}
23029 
23030 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23031   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23032             Matcher::vector_length(n->in(1)/*src*/) == 16);
23033   match(Set dst (ExtractF src idx));
23034   effect(TEMP vtmp);
23035   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23036   ins_encode %{
23037     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23038 
23039     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23040     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23041   %}
23042   ins_pipe( pipe_slow );
23043 %}
23044 
23045 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23046   predicate(Matcher::vector_length(n->in(1)) == 2); // src
23047   match(Set dst (ExtractD src idx));
23048   format %{ "extractD $dst,$src,$idx\t!" %}
23049   ins_encode %{
23050     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23051 
23052     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23053   %}
23054   ins_pipe( pipe_slow );
23055 %}
23056 
23057 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23058   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23059             Matcher::vector_length(n->in(1)) == 8);  // src
23060   match(Set dst (ExtractD src idx));
23061   effect(TEMP vtmp);
23062   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23063   ins_encode %{
23064     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23065 
23066     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23067     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23068   %}
23069   ins_pipe( pipe_slow );
23070 %}
23071 
23072 // --------------------------------- Vector Blend --------------------------------------
23073 
23074 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23075   predicate(UseAVX == 0);
23076   match(Set dst (VectorBlend (Binary dst src) mask));
23077   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
23078   effect(TEMP tmp);
23079   ins_encode %{
23080     assert(UseSSE >= 4, "required");
23081 
23082     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23083       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23084     }
23085     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23086   %}
23087   ins_pipe( pipe_slow );
23088 %}
23089 
23090 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23091   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23092             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23093             Matcher::vector_length_in_bytes(n) <= 32 &&
23094             is_integral_type(Matcher::vector_element_basic_type(n)));
23095   match(Set dst (VectorBlend (Binary src1 src2) mask));
23096   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23097   ins_encode %{
23098     int vlen_enc = vector_length_encoding(this);
23099     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23100   %}
23101   ins_pipe( pipe_slow );
23102 %}
23103 
23104 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23105   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23106             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23107             Matcher::vector_length_in_bytes(n) <= 32 &&
23108             !is_integral_type(Matcher::vector_element_basic_type(n)));
23109   match(Set dst (VectorBlend (Binary src1 src2) mask));
23110   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23111   ins_encode %{
23112     int vlen_enc = vector_length_encoding(this);
23113     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23114   %}
23115   ins_pipe( pipe_slow );
23116 %}
23117 
23118 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23119   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23120             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23121             Matcher::vector_length_in_bytes(n) <= 32);
23122   match(Set dst (VectorBlend (Binary src1 src2) mask));
23123   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23124   effect(TEMP vtmp, TEMP dst);
23125   ins_encode %{
23126     int vlen_enc = vector_length_encoding(this);
23127     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23128     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23129     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
23130   %}
23131   ins_pipe( pipe_slow );
23132 %}
23133 
23134 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23135   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23136             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23137   match(Set dst (VectorBlend (Binary src1 src2) mask));
23138   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23139   effect(TEMP ktmp);
23140   ins_encode %{
23141      int vlen_enc = Assembler::AVX_512bit;
23142      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23143     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23144     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23145   %}
23146   ins_pipe( pipe_slow );
23147 %}
23148 
23149 
23150 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23151   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23152             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23153              VM_Version::supports_avx512bw()));
23154   match(Set dst (VectorBlend (Binary src1 src2) mask));
23155   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23156   ins_encode %{
23157     int vlen_enc = vector_length_encoding(this);
23158     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23159     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23160   %}
23161   ins_pipe( pipe_slow );
23162 %}
23163 
23164 // --------------------------------- ABS --------------------------------------
23165 // a = |a|
23166 instruct vabsB_reg(vec dst, vec src) %{
23167   match(Set dst (AbsVB  src));
23168   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23169   ins_encode %{
23170     uint vlen = Matcher::vector_length(this);
23171     if (vlen <= 16) {
23172       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23173     } else {
23174       int vlen_enc = vector_length_encoding(this);
23175       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23176     }
23177   %}
23178   ins_pipe( pipe_slow );
23179 %}
23180 
23181 instruct vabsS_reg(vec dst, vec src) %{
23182   match(Set dst (AbsVS  src));
23183   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23184   ins_encode %{
23185     uint vlen = Matcher::vector_length(this);
23186     if (vlen <= 8) {
23187       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23188     } else {
23189       int vlen_enc = vector_length_encoding(this);
23190       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23191     }
23192   %}
23193   ins_pipe( pipe_slow );
23194 %}
23195 
23196 instruct vabsI_reg(vec dst, vec src) %{
23197   match(Set dst (AbsVI  src));
23198   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23199   ins_encode %{
23200     uint vlen = Matcher::vector_length(this);
23201     if (vlen <= 4) {
23202       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23203     } else {
23204       int vlen_enc = vector_length_encoding(this);
23205       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23206     }
23207   %}
23208   ins_pipe( pipe_slow );
23209 %}
23210 
23211 instruct vabsL_reg(vec dst, vec src) %{
23212   match(Set dst (AbsVL  src));
23213   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23214   ins_encode %{
23215     assert(UseAVX > 2, "required");
23216     int vlen_enc = vector_length_encoding(this);
23217     if (!VM_Version::supports_avx512vl()) {
23218       vlen_enc = Assembler::AVX_512bit;
23219     }
23220     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23221   %}
23222   ins_pipe( pipe_slow );
23223 %}
23224 
23225 // --------------------------------- ABSNEG --------------------------------------
23226 
23227 instruct vabsnegF(vec dst, vec src) %{
23228   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23229   match(Set dst (AbsVF src));
23230   match(Set dst (NegVF src));
23231   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23232   ins_cost(150);
23233   ins_encode %{
23234     int opcode = this->ideal_Opcode();
23235     int vlen = Matcher::vector_length(this);
23236     if (vlen == 2) {
23237       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23238     } else {
23239       assert(vlen == 8 || vlen == 16, "required");
23240       int vlen_enc = vector_length_encoding(this);
23241       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23242     }
23243   %}
23244   ins_pipe( pipe_slow );
23245 %}
23246 
23247 instruct vabsneg4F(vec dst) %{
23248   predicate(Matcher::vector_length(n) == 4);
23249   match(Set dst (AbsVF dst));
23250   match(Set dst (NegVF dst));
23251   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23252   ins_cost(150);
23253   ins_encode %{
23254     int opcode = this->ideal_Opcode();
23255     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23256   %}
23257   ins_pipe( pipe_slow );
23258 %}
23259 
23260 instruct vabsnegD(vec dst, vec src) %{
23261   match(Set dst (AbsVD  src));
23262   match(Set dst (NegVD  src));
23263   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23264   ins_encode %{
23265     int opcode = this->ideal_Opcode();
23266     uint vlen = Matcher::vector_length(this);
23267     if (vlen == 2) {
23268       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23269     } else {
23270       int vlen_enc = vector_length_encoding(this);
23271       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23272     }
23273   %}
23274   ins_pipe( pipe_slow );
23275 %}
23276 
23277 //------------------------------------- VectorTest --------------------------------------------
23278 
23279 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23280   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23281   match(Set cr (VectorTest src1 src2));
23282   effect(TEMP vtmp);
23283   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23284   ins_encode %{
23285     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23286     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23287     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23288   %}
23289   ins_pipe( pipe_slow );
23290 %}
23291 
23292 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23293   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23294   match(Set cr (VectorTest src1 src2));
23295   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23296   ins_encode %{
23297     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23298     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23299     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23300   %}
23301   ins_pipe( pipe_slow );
23302 %}
23303 
23304 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23305   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23306              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23307             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23308   match(Set cr (VectorTest src1 src2));
23309   effect(TEMP tmp);
23310   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23311   ins_encode %{
23312     uint masklen = Matcher::vector_length(this, $src1);
23313     __ kmovwl($tmp$$Register, $src1$$KRegister);
23314     __ andl($tmp$$Register, (1 << masklen) - 1);
23315     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23316   %}
23317   ins_pipe( pipe_slow );
23318 %}
23319 
23320 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23321   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23322              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23323             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23324   match(Set cr (VectorTest src1 src2));
23325   effect(TEMP tmp);
23326   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23327   ins_encode %{
23328     uint masklen = Matcher::vector_length(this, $src1);
23329     __ kmovwl($tmp$$Register, $src1$$KRegister);
23330     __ andl($tmp$$Register, (1 << masklen) - 1);
23331   %}
23332   ins_pipe( pipe_slow );
23333 %}
23334 
23335 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23336   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23337             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23338   match(Set cr (VectorTest src1 src2));
23339   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23340   ins_encode %{
23341     uint masklen = Matcher::vector_length(this, $src1);
23342     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23343   %}
23344   ins_pipe( pipe_slow );
23345 %}
23346 
23347 //------------------------------------- LoadMask --------------------------------------------
23348 
23349 instruct loadMask(legVec dst, legVec src) %{
23350   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23351   match(Set dst (VectorLoadMask src));
23352   effect(TEMP dst);
23353   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23354   ins_encode %{
23355     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23356     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23357     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23358   %}
23359   ins_pipe( pipe_slow );
23360 %}
23361 
23362 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23363   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23364   match(Set dst (VectorLoadMask src));
23365   effect(TEMP xtmp);
23366   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23367   ins_encode %{
23368     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23369                         true, Assembler::AVX_512bit);
23370   %}
23371   ins_pipe( pipe_slow );
23372 %}
23373 
23374 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23375   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23376   match(Set dst (VectorLoadMask src));
23377   effect(TEMP xtmp);
23378   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23379   ins_encode %{
23380     int vlen_enc = vector_length_encoding(in(1));
23381     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23382                         false, vlen_enc);
23383   %}
23384   ins_pipe( pipe_slow );
23385 %}
23386 
23387 //------------------------------------- StoreMask --------------------------------------------
23388 
23389 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23390   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23391   match(Set dst (VectorStoreMask src size));
23392   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23393   ins_encode %{
23394     int vlen = Matcher::vector_length(this);
23395     if (vlen <= 16 && UseAVX <= 2) {
23396       assert(UseSSE >= 3, "required");
23397       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23398     } else {
23399       assert(UseAVX > 0, "required");
23400       int src_vlen_enc = vector_length_encoding(this, $src);
23401       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23402     }
23403   %}
23404   ins_pipe( pipe_slow );
23405 %}
23406 
23407 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23408   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23409   match(Set dst (VectorStoreMask src size));
23410   effect(TEMP_DEF dst, TEMP xtmp);
23411   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23412   ins_encode %{
23413     int vlen_enc = Assembler::AVX_128bit;
23414     int vlen = Matcher::vector_length(this);
23415     if (vlen <= 8) {
23416       assert(UseSSE >= 3, "required");
23417       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23418       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23419       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23420     } else {
23421       assert(UseAVX > 0, "required");
23422       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23423       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23424       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23425     }
23426   %}
23427   ins_pipe( pipe_slow );
23428 %}
23429 
23430 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23431   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23432   match(Set dst (VectorStoreMask src size));
23433   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23434   effect(TEMP_DEF dst, TEMP xtmp);
23435   ins_encode %{
23436     int vlen_enc = Assembler::AVX_128bit;
23437     int vlen = Matcher::vector_length(this);
23438     if (vlen <= 4) {
23439       assert(UseSSE >= 3, "required");
23440       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23441       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23442       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23443       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23444     } else {
23445       assert(UseAVX > 0, "required");
23446       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23447       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23448       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23450       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23451     }
23452   %}
23453   ins_pipe( pipe_slow );
23454 %}
23455 
23456 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23457   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23458   match(Set dst (VectorStoreMask src size));
23459   effect(TEMP_DEF dst, TEMP xtmp);
23460   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23461   ins_encode %{
23462     assert(UseSSE >= 3, "required");
23463     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23464     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23465     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23466     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23467     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23468   %}
23469   ins_pipe( pipe_slow );
23470 %}
23471 
23472 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23473   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23474   match(Set dst (VectorStoreMask src size));
23475   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23476   effect(TEMP_DEF dst, TEMP vtmp);
23477   ins_encode %{
23478     int vlen_enc = Assembler::AVX_128bit;
23479     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23480     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23481     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23482     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23483     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23484     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23485     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23486   %}
23487   ins_pipe( pipe_slow );
23488 %}
23489 
23490 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23491   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23492   match(Set dst (VectorStoreMask src size));
23493   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23494   ins_encode %{
23495     int src_vlen_enc = vector_length_encoding(this, $src);
23496     int dst_vlen_enc = vector_length_encoding(this);
23497     if (!VM_Version::supports_avx512vl()) {
23498       src_vlen_enc = Assembler::AVX_512bit;
23499     }
23500     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23501     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23502   %}
23503   ins_pipe( pipe_slow );
23504 %}
23505 
23506 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23507   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23508   match(Set dst (VectorStoreMask src size));
23509   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23510   ins_encode %{
23511     int src_vlen_enc = vector_length_encoding(this, $src);
23512     int dst_vlen_enc = vector_length_encoding(this);
23513     if (!VM_Version::supports_avx512vl()) {
23514       src_vlen_enc = Assembler::AVX_512bit;
23515     }
23516     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23517     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23518   %}
23519   ins_pipe( pipe_slow );
23520 %}
23521 
23522 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23523   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23524   match(Set dst (VectorStoreMask mask size));
23525   effect(TEMP_DEF dst);
23526   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23527   ins_encode %{
23528     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23529     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23530                  false, Assembler::AVX_512bit, noreg);
23531     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23532   %}
23533   ins_pipe( pipe_slow );
23534 %}
23535 
23536 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23537   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23538   match(Set dst (VectorStoreMask mask size));
23539   effect(TEMP_DEF dst);
23540   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23541   ins_encode %{
23542     int dst_vlen_enc = vector_length_encoding(this);
23543     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23544     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23545   %}
23546   ins_pipe( pipe_slow );
23547 %}
23548 
23549 instruct vmaskcast_evex(kReg dst) %{
23550   match(Set dst (VectorMaskCast dst));
23551   ins_cost(0);
23552   format %{ "vector_mask_cast $dst" %}
23553   ins_encode %{
23554     // empty
23555   %}
23556   ins_pipe(empty);
23557 %}
23558 
23559 instruct vmaskcast(vec dst) %{
23560   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23561   match(Set dst (VectorMaskCast dst));
23562   ins_cost(0);
23563   format %{ "vector_mask_cast $dst" %}
23564   ins_encode %{
23565     // empty
23566   %}
23567   ins_pipe(empty);
23568 %}
23569 
23570 instruct vmaskcast_avx(vec dst, vec src) %{
23571   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23572   match(Set dst (VectorMaskCast src));
23573   format %{ "vector_mask_cast $dst, $src" %}
23574   ins_encode %{
23575     int vlen = Matcher::vector_length(this);
23576     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23577     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23578     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23579   %}
23580   ins_pipe(pipe_slow);
23581 %}
23582 
23583 //-------------------------------- Load Iota Indices ----------------------------------
23584 
23585 instruct loadIotaIndices(vec dst, immI_0 src) %{
23586   match(Set dst (VectorLoadConst src));
23587   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23588   ins_encode %{
23589      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23590      BasicType bt = Matcher::vector_element_basic_type(this);
23591      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23592   %}
23593   ins_pipe( pipe_slow );
23594 %}
23595 
23596 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23597   match(Set dst (PopulateIndex src1 src2));
23598   effect(TEMP dst, TEMP vtmp);
23599   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23600   ins_encode %{
23601      assert($src2$$constant == 1, "required");
23602      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23603      int vlen_enc = vector_length_encoding(this);
23604      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23605      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23606      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23607      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23608   %}
23609   ins_pipe( pipe_slow );
23610 %}
23611 
23612 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23613   match(Set dst (PopulateIndex src1 src2));
23614   effect(TEMP dst, TEMP vtmp);
23615   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23616   ins_encode %{
23617      assert($src2$$constant == 1, "required");
23618      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23619      int vlen_enc = vector_length_encoding(this);
23620      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23621      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23622      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23623      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23624   %}
23625   ins_pipe( pipe_slow );
23626 %}
23627 
23628 //-------------------------------- Rearrange ----------------------------------
23629 
23630 // LoadShuffle/Rearrange for Byte
23631 instruct rearrangeB(vec dst, vec shuffle) %{
23632   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23633             Matcher::vector_length(n) < 32);
23634   match(Set dst (VectorRearrange dst shuffle));
23635   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23636   ins_encode %{
23637     assert(UseSSE >= 4, "required");
23638     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23639   %}
23640   ins_pipe( pipe_slow );
23641 %}
23642 
23643 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23644   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23645             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23646   match(Set dst (VectorRearrange src shuffle));
23647   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23648   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23649   ins_encode %{
23650     assert(UseAVX >= 2, "required");
23651     // Swap src into vtmp1
23652     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23653     // Shuffle swapped src to get entries from other 128 bit lane
23654     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23655     // Shuffle original src to get entries from self 128 bit lane
23656     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23657     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23658     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23659     // Perform the blend
23660     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23661   %}
23662   ins_pipe( pipe_slow );
23663 %}
23664 
23665 
23666 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23667   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23668             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23669   match(Set dst (VectorRearrange src shuffle));
23670   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23671   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23672   ins_encode %{
23673     int vlen_enc = vector_length_encoding(this);
23674     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23675                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23676                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23677   %}
23678   ins_pipe( pipe_slow );
23679 %}
23680 
23681 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23682   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23683             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23684   match(Set dst (VectorRearrange src shuffle));
23685   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23686   ins_encode %{
23687     int vlen_enc = vector_length_encoding(this);
23688     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23689   %}
23690   ins_pipe( pipe_slow );
23691 %}
23692 
23693 // LoadShuffle/Rearrange for Short
23694 
23695 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23696   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23697             !VM_Version::supports_avx512bw());
23698   match(Set dst (VectorLoadShuffle src));
23699   effect(TEMP dst, TEMP vtmp);
23700   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23701   ins_encode %{
23702     // Create a byte shuffle mask from short shuffle mask
23703     // only byte shuffle instruction available on these platforms
23704     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23705     if (UseAVX == 0) {
23706       assert(vlen_in_bytes <= 16, "required");
23707       // Multiply each shuffle by two to get byte index
23708       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23709       __ psllw($vtmp$$XMMRegister, 1);
23710 
23711       // Duplicate to create 2 copies of byte index
23712       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23713       __ psllw($dst$$XMMRegister, 8);
23714       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23715 
23716       // Add one to get alternate byte index
23717       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23718       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23719     } else {
23720       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23721       int vlen_enc = vector_length_encoding(this);
23722       // Multiply each shuffle by two to get byte index
23723       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23724 
23725       // Duplicate to create 2 copies of byte index
23726       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23727       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23728 
23729       // Add one to get alternate byte index
23730       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23731     }
23732   %}
23733   ins_pipe( pipe_slow );
23734 %}
23735 
23736 instruct rearrangeS(vec dst, vec shuffle) %{
23737   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23738             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23739   match(Set dst (VectorRearrange dst shuffle));
23740   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23741   ins_encode %{
23742     assert(UseSSE >= 4, "required");
23743     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23744   %}
23745   ins_pipe( pipe_slow );
23746 %}
23747 
23748 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23749   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23750             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23751   match(Set dst (VectorRearrange src shuffle));
23752   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23753   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23754   ins_encode %{
23755     assert(UseAVX >= 2, "required");
23756     // Swap src into vtmp1
23757     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23758     // Shuffle swapped src to get entries from other 128 bit lane
23759     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23760     // Shuffle original src to get entries from self 128 bit lane
23761     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23762     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23763     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23764     // Perform the blend
23765     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23766   %}
23767   ins_pipe( pipe_slow );
23768 %}
23769 
23770 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23771   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23772             VM_Version::supports_avx512bw());
23773   match(Set dst (VectorRearrange src shuffle));
23774   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23775   ins_encode %{
23776     int vlen_enc = vector_length_encoding(this);
23777     if (!VM_Version::supports_avx512vl()) {
23778       vlen_enc = Assembler::AVX_512bit;
23779     }
23780     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23781   %}
23782   ins_pipe( pipe_slow );
23783 %}
23784 
23785 // LoadShuffle/Rearrange for Integer and Float
23786 
23787 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23788   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23789             Matcher::vector_length(n) == 4 && UseAVX == 0);
23790   match(Set dst (VectorLoadShuffle src));
23791   effect(TEMP dst, TEMP vtmp);
23792   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23793   ins_encode %{
23794     assert(UseSSE >= 4, "required");
23795 
23796     // Create a byte shuffle mask from int shuffle mask
23797     // only byte shuffle instruction available on these platforms
23798 
23799     // Duplicate and multiply each shuffle by 4
23800     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23801     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23802     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23803     __ psllw($vtmp$$XMMRegister, 2);
23804 
23805     // Duplicate again to create 4 copies of byte index
23806     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23807     __ psllw($dst$$XMMRegister, 8);
23808     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23809 
23810     // Add 3,2,1,0 to get alternate byte index
23811     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23812     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23813   %}
23814   ins_pipe( pipe_slow );
23815 %}
23816 
23817 instruct rearrangeI(vec dst, vec shuffle) %{
23818   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23819             UseAVX == 0);
23820   match(Set dst (VectorRearrange dst shuffle));
23821   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23822   ins_encode %{
23823     assert(UseSSE >= 4, "required");
23824     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23825   %}
23826   ins_pipe( pipe_slow );
23827 %}
23828 
23829 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23830   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23831             UseAVX > 0);
23832   match(Set dst (VectorRearrange src shuffle));
23833   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23834   ins_encode %{
23835     int vlen_enc = vector_length_encoding(this);
23836     BasicType bt = Matcher::vector_element_basic_type(this);
23837     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23838   %}
23839   ins_pipe( pipe_slow );
23840 %}
23841 
23842 // LoadShuffle/Rearrange for Long and Double
23843 
23844 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23845   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23846             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23847   match(Set dst (VectorLoadShuffle src));
23848   effect(TEMP dst, TEMP vtmp);
23849   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23850   ins_encode %{
23851     assert(UseAVX >= 2, "required");
23852 
23853     int vlen_enc = vector_length_encoding(this);
23854     // Create a double word shuffle mask from long shuffle mask
23855     // only double word shuffle instruction available on these platforms
23856 
23857     // Multiply each shuffle by two to get double word index
23858     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23859 
23860     // Duplicate each double word shuffle
23861     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23862     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23863 
23864     // Add one to get alternate double word index
23865     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23866   %}
23867   ins_pipe( pipe_slow );
23868 %}
23869 
23870 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23871   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23872             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23873   match(Set dst (VectorRearrange src shuffle));
23874   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23875   ins_encode %{
23876     assert(UseAVX >= 2, "required");
23877 
23878     int vlen_enc = vector_length_encoding(this);
23879     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23880   %}
23881   ins_pipe( pipe_slow );
23882 %}
23883 
23884 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23885   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23886             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23887   match(Set dst (VectorRearrange src shuffle));
23888   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23889   ins_encode %{
23890     assert(UseAVX > 2, "required");
23891 
23892     int vlen_enc = vector_length_encoding(this);
23893     if (vlen_enc == Assembler::AVX_128bit) {
23894       vlen_enc = Assembler::AVX_256bit;
23895     }
23896     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23897   %}
23898   ins_pipe( pipe_slow );
23899 %}
23900 
23901 // --------------------------------- FMA --------------------------------------
23902 // a * b + c
23903 
23904 instruct vfmaF_reg(vec a, vec b, vec c) %{
23905   match(Set c (FmaVF  c (Binary a b)));
23906   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23907   ins_cost(150);
23908   ins_encode %{
23909     assert(UseFMA, "not enabled");
23910     int vlen_enc = vector_length_encoding(this);
23911     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23912   %}
23913   ins_pipe( pipe_slow );
23914 %}
23915 
23916 instruct vfmaF_mem(vec a, memory b, vec c) %{
23917   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23918   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23919   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23920   ins_cost(150);
23921   ins_encode %{
23922     assert(UseFMA, "not enabled");
23923     int vlen_enc = vector_length_encoding(this);
23924     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23925   %}
23926   ins_pipe( pipe_slow );
23927 %}
23928 
23929 instruct vfmaD_reg(vec a, vec b, vec c) %{
23930   match(Set c (FmaVD  c (Binary a b)));
23931   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23932   ins_cost(150);
23933   ins_encode %{
23934     assert(UseFMA, "not enabled");
23935     int vlen_enc = vector_length_encoding(this);
23936     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23937   %}
23938   ins_pipe( pipe_slow );
23939 %}
23940 
23941 instruct vfmaD_mem(vec a, memory b, vec c) %{
23942   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23943   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23944   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23945   ins_cost(150);
23946   ins_encode %{
23947     assert(UseFMA, "not enabled");
23948     int vlen_enc = vector_length_encoding(this);
23949     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23950   %}
23951   ins_pipe( pipe_slow );
23952 %}
23953 
23954 // --------------------------------- Vector Multiply Add --------------------------------------
23955 
23956 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23957   predicate(UseAVX == 0);
23958   match(Set dst (MulAddVS2VI dst src1));
23959   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23960   ins_encode %{
23961     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23962   %}
23963   ins_pipe( pipe_slow );
23964 %}
23965 
23966 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23967   predicate(UseAVX > 0);
23968   match(Set dst (MulAddVS2VI src1 src2));
23969   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23970   ins_encode %{
23971     int vlen_enc = vector_length_encoding(this);
23972     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23973   %}
23974   ins_pipe( pipe_slow );
23975 %}
23976 
23977 // --------------------------------- Vector Multiply Add Add ----------------------------------
23978 
23979 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23980   predicate(VM_Version::supports_avx512_vnni());
23981   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23982   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23983   ins_encode %{
23984     assert(UseAVX > 2, "required");
23985     int vlen_enc = vector_length_encoding(this);
23986     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23987   %}
23988   ins_pipe( pipe_slow );
23989   ins_cost(10);
23990 %}
23991 
23992 // --------------------------------- PopCount --------------------------------------
23993 
23994 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23995   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23996   match(Set dst (PopCountVI src));
23997   match(Set dst (PopCountVL src));
23998   format %{ "vector_popcount_integral $dst, $src" %}
23999   ins_encode %{
24000     int opcode = this->ideal_Opcode();
24001     int vlen_enc = vector_length_encoding(this, $src);
24002     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24003     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24004   %}
24005   ins_pipe( pipe_slow );
24006 %}
24007 
24008 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24009   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24010   match(Set dst (PopCountVI src mask));
24011   match(Set dst (PopCountVL src mask));
24012   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24013   ins_encode %{
24014     int vlen_enc = vector_length_encoding(this, $src);
24015     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24016     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24017     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24018   %}
24019   ins_pipe( pipe_slow );
24020 %}
24021 
24022 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24023   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24024   match(Set dst (PopCountVI src));
24025   match(Set dst (PopCountVL src));
24026   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24027   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24028   ins_encode %{
24029     int opcode = this->ideal_Opcode();
24030     int vlen_enc = vector_length_encoding(this, $src);
24031     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24032     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24033                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24034   %}
24035   ins_pipe( pipe_slow );
24036 %}
24037 
24038 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24039 
24040 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24041   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24042                                               Matcher::vector_length_in_bytes(n->in(1))));
24043   match(Set dst (CountTrailingZerosV src));
24044   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24045   ins_cost(400);
24046   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24047   ins_encode %{
24048     int vlen_enc = vector_length_encoding(this, $src);
24049     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24050     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24051                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24052   %}
24053   ins_pipe( pipe_slow );
24054 %}
24055 
24056 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24057   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24058             VM_Version::supports_avx512cd() &&
24059             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24060   match(Set dst (CountTrailingZerosV src));
24061   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24062   ins_cost(400);
24063   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24064   ins_encode %{
24065     int vlen_enc = vector_length_encoding(this, $src);
24066     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24067     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24068                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24069   %}
24070   ins_pipe( pipe_slow );
24071 %}
24072 
24073 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24074   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24075   match(Set dst (CountTrailingZerosV src));
24076   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24077   ins_cost(400);
24078   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24079   ins_encode %{
24080     int vlen_enc = vector_length_encoding(this, $src);
24081     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24082     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24083                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24084                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24085   %}
24086   ins_pipe( pipe_slow );
24087 %}
24088 
24089 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24090   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24091   match(Set dst (CountTrailingZerosV src));
24092   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24093   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24094   ins_encode %{
24095     int vlen_enc = vector_length_encoding(this, $src);
24096     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24097     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24098                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24099   %}
24100   ins_pipe( pipe_slow );
24101 %}
24102 
24103 
24104 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24105 
24106 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24107   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24108   effect(TEMP dst);
24109   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24110   ins_encode %{
24111     int vector_len = vector_length_encoding(this);
24112     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24113   %}
24114   ins_pipe( pipe_slow );
24115 %}
24116 
24117 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24118   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24119   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24120   effect(TEMP dst);
24121   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24122   ins_encode %{
24123     int vector_len = vector_length_encoding(this);
24124     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24125   %}
24126   ins_pipe( pipe_slow );
24127 %}
24128 
24129 // --------------------------------- Rotation Operations ----------------------------------
24130 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24131   match(Set dst (RotateLeftV src shift));
24132   match(Set dst (RotateRightV src shift));
24133   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24134   ins_encode %{
24135     int opcode      = this->ideal_Opcode();
24136     int vector_len  = vector_length_encoding(this);
24137     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24138     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24139   %}
24140   ins_pipe( pipe_slow );
24141 %}
24142 
24143 instruct vprorate(vec dst, vec src, vec shift) %{
24144   match(Set dst (RotateLeftV src shift));
24145   match(Set dst (RotateRightV src shift));
24146   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24147   ins_encode %{
24148     int opcode      = this->ideal_Opcode();
24149     int vector_len  = vector_length_encoding(this);
24150     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24151     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24152   %}
24153   ins_pipe( pipe_slow );
24154 %}
24155 
24156 // ---------------------------------- Masked Operations ------------------------------------
24157 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24158   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24159   match(Set dst (LoadVectorMasked mem mask));
24160   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24161   ins_encode %{
24162     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24163     int vlen_enc = vector_length_encoding(this);
24164     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24165   %}
24166   ins_pipe( pipe_slow );
24167 %}
24168 
24169 
24170 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24171   predicate(n->in(3)->bottom_type()->isa_vectmask());
24172   match(Set dst (LoadVectorMasked mem mask));
24173   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24174   ins_encode %{
24175     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24176     int vector_len = vector_length_encoding(this);
24177     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24178   %}
24179   ins_pipe( pipe_slow );
24180 %}
24181 
24182 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24183   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24184   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24185   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24186   ins_encode %{
24187     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24188     int vlen_enc = vector_length_encoding(src_node);
24189     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24190     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24191   %}
24192   ins_pipe( pipe_slow );
24193 %}
24194 
24195 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24196   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24197   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24198   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24199   ins_encode %{
24200     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24201     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24202     int vlen_enc = vector_length_encoding(src_node);
24203     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24204   %}
24205   ins_pipe( pipe_slow );
24206 %}
24207 
24208 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24209   match(Set addr (VerifyVectorAlignment addr mask));
24210   effect(KILL cr);
24211   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24212   ins_encode %{
24213     Label Lskip;
24214     // check if masked bits of addr are zero
24215     __ testq($addr$$Register, $mask$$constant);
24216     __ jccb(Assembler::equal, Lskip);
24217     __ stop("verify_vector_alignment found a misaligned vector memory access");
24218     __ bind(Lskip);
24219   %}
24220   ins_pipe(pipe_slow);
24221 %}
24222 
24223 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24224   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24225   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24226   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24227   ins_encode %{
24228     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24229     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24230 
24231     Label DONE;
24232     int vlen_enc = vector_length_encoding(this, $src1);
24233     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24234 
24235     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24236     __ mov64($dst$$Register, -1L);
24237     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24238     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24239     __ jccb(Assembler::carrySet, DONE);
24240     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24241     __ notq($dst$$Register);
24242     __ tzcntq($dst$$Register, $dst$$Register);
24243     __ bind(DONE);
24244   %}
24245   ins_pipe( pipe_slow );
24246 %}
24247 
24248 
24249 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24250   match(Set dst (VectorMaskGen len));
24251   effect(TEMP temp, KILL cr);
24252   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24253   ins_encode %{
24254     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24255   %}
24256   ins_pipe( pipe_slow );
24257 %}
24258 
24259 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24260   match(Set dst (VectorMaskGen len));
24261   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24262   effect(TEMP temp);
24263   ins_encode %{
24264     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24265     __ kmovql($dst$$KRegister, $temp$$Register);
24266   %}
24267   ins_pipe( pipe_slow );
24268 %}
24269 
24270 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24271   predicate(n->in(1)->bottom_type()->isa_vectmask());
24272   match(Set dst (VectorMaskToLong mask));
24273   effect(TEMP dst, KILL cr);
24274   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24275   ins_encode %{
24276     int opcode = this->ideal_Opcode();
24277     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24278     int mask_len = Matcher::vector_length(this, $mask);
24279     int mask_size = mask_len * type2aelembytes(mbt);
24280     int vlen_enc = vector_length_encoding(this, $mask);
24281     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24282                              $dst$$Register, mask_len, mask_size, vlen_enc);
24283   %}
24284   ins_pipe( pipe_slow );
24285 %}
24286 
24287 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24288   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24289   match(Set dst (VectorMaskToLong mask));
24290   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24291   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24292   ins_encode %{
24293     int opcode = this->ideal_Opcode();
24294     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24295     int mask_len = Matcher::vector_length(this, $mask);
24296     int vlen_enc = vector_length_encoding(this, $mask);
24297     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24298                              $dst$$Register, mask_len, mbt, vlen_enc);
24299   %}
24300   ins_pipe( pipe_slow );
24301 %}
24302 
24303 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24304   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24305   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24306   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24307   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24308   ins_encode %{
24309     int opcode = this->ideal_Opcode();
24310     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24311     int mask_len = Matcher::vector_length(this, $mask);
24312     int vlen_enc = vector_length_encoding(this, $mask);
24313     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24314                              $dst$$Register, mask_len, mbt, vlen_enc);
24315   %}
24316   ins_pipe( pipe_slow );
24317 %}
24318 
24319 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24320   predicate(n->in(1)->bottom_type()->isa_vectmask());
24321   match(Set dst (VectorMaskTrueCount mask));
24322   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24323   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24324   ins_encode %{
24325     int opcode = this->ideal_Opcode();
24326     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24327     int mask_len = Matcher::vector_length(this, $mask);
24328     int mask_size = mask_len * type2aelembytes(mbt);
24329     int vlen_enc = vector_length_encoding(this, $mask);
24330     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24331                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24332   %}
24333   ins_pipe( pipe_slow );
24334 %}
24335 
24336 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24337   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24338   match(Set dst (VectorMaskTrueCount mask));
24339   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24340   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24341   ins_encode %{
24342     int opcode = this->ideal_Opcode();
24343     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24344     int mask_len = Matcher::vector_length(this, $mask);
24345     int vlen_enc = vector_length_encoding(this, $mask);
24346     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24347                              $tmp$$Register, mask_len, mbt, vlen_enc);
24348   %}
24349   ins_pipe( pipe_slow );
24350 %}
24351 
24352 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24353   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24354   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24355   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24356   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24357   ins_encode %{
24358     int opcode = this->ideal_Opcode();
24359     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24360     int mask_len = Matcher::vector_length(this, $mask);
24361     int vlen_enc = vector_length_encoding(this, $mask);
24362     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24363                              $tmp$$Register, mask_len, mbt, vlen_enc);
24364   %}
24365   ins_pipe( pipe_slow );
24366 %}
24367 
24368 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24369   predicate(n->in(1)->bottom_type()->isa_vectmask());
24370   match(Set dst (VectorMaskFirstTrue mask));
24371   match(Set dst (VectorMaskLastTrue mask));
24372   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24373   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24374   ins_encode %{
24375     int opcode = this->ideal_Opcode();
24376     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24377     int mask_len = Matcher::vector_length(this, $mask);
24378     int mask_size = mask_len * type2aelembytes(mbt);
24379     int vlen_enc = vector_length_encoding(this, $mask);
24380     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24381                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24382   %}
24383   ins_pipe( pipe_slow );
24384 %}
24385 
24386 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24387   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24388   match(Set dst (VectorMaskFirstTrue mask));
24389   match(Set dst (VectorMaskLastTrue mask));
24390   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24391   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24392   ins_encode %{
24393     int opcode = this->ideal_Opcode();
24394     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24395     int mask_len = Matcher::vector_length(this, $mask);
24396     int vlen_enc = vector_length_encoding(this, $mask);
24397     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24398                              $tmp$$Register, mask_len, mbt, vlen_enc);
24399   %}
24400   ins_pipe( pipe_slow );
24401 %}
24402 
24403 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24404   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24405   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24406   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24407   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24408   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24409   ins_encode %{
24410     int opcode = this->ideal_Opcode();
24411     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24412     int mask_len = Matcher::vector_length(this, $mask);
24413     int vlen_enc = vector_length_encoding(this, $mask);
24414     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24415                              $tmp$$Register, mask_len, mbt, vlen_enc);
24416   %}
24417   ins_pipe( pipe_slow );
24418 %}
24419 
24420 // --------------------------------- Compress/Expand Operations ---------------------------
24421 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24422   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24423   match(Set dst (CompressV src mask));
24424   match(Set dst (ExpandV src mask));
24425   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24426   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24427   ins_encode %{
24428     int opcode = this->ideal_Opcode();
24429     int vlen_enc = vector_length_encoding(this);
24430     BasicType bt  = Matcher::vector_element_basic_type(this);
24431     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24432                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24433   %}
24434   ins_pipe( pipe_slow );
24435 %}
24436 
24437 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24438   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24439   match(Set dst (CompressV src mask));
24440   match(Set dst (ExpandV src mask));
24441   format %{ "vector_compress_expand $dst, $src, $mask" %}
24442   ins_encode %{
24443     int opcode = this->ideal_Opcode();
24444     int vector_len = vector_length_encoding(this);
24445     BasicType bt  = Matcher::vector_element_basic_type(this);
24446     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24447   %}
24448   ins_pipe( pipe_slow );
24449 %}
24450 
24451 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24452   match(Set dst (CompressM mask));
24453   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24454   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24455   ins_encode %{
24456     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24457     int mask_len = Matcher::vector_length(this);
24458     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24459   %}
24460   ins_pipe( pipe_slow );
24461 %}
24462 
24463 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24464 
24465 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24466   predicate(!VM_Version::supports_gfni());
24467   match(Set dst (ReverseV src));
24468   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24469   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24470   ins_encode %{
24471     int vec_enc = vector_length_encoding(this);
24472     BasicType bt = Matcher::vector_element_basic_type(this);
24473     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24474                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24475   %}
24476   ins_pipe( pipe_slow );
24477 %}
24478 
24479 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24480   predicate(VM_Version::supports_gfni());
24481   match(Set dst (ReverseV src));
24482   effect(TEMP dst, TEMP xtmp);
24483   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24484   ins_encode %{
24485     int vec_enc = vector_length_encoding(this);
24486     BasicType bt  = Matcher::vector_element_basic_type(this);
24487     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24488     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24489                                $xtmp$$XMMRegister);
24490   %}
24491   ins_pipe( pipe_slow );
24492 %}
24493 
24494 instruct vreverse_byte_reg(vec dst, vec src) %{
24495   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24496   match(Set dst (ReverseBytesV src));
24497   effect(TEMP dst);
24498   format %{ "vector_reverse_byte $dst, $src" %}
24499   ins_encode %{
24500     int vec_enc = vector_length_encoding(this);
24501     BasicType bt = Matcher::vector_element_basic_type(this);
24502     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24503   %}
24504   ins_pipe( pipe_slow );
24505 %}
24506 
24507 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24508   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24509   match(Set dst (ReverseBytesV src));
24510   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24511   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24512   ins_encode %{
24513     int vec_enc = vector_length_encoding(this);
24514     BasicType bt = Matcher::vector_element_basic_type(this);
24515     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24516                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24517   %}
24518   ins_pipe( pipe_slow );
24519 %}
24520 
24521 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24522 
24523 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24524   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24525                                               Matcher::vector_length_in_bytes(n->in(1))));
24526   match(Set dst (CountLeadingZerosV src));
24527   format %{ "vector_count_leading_zeros $dst, $src" %}
24528   ins_encode %{
24529      int vlen_enc = vector_length_encoding(this, $src);
24530      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24531      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24532                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24533   %}
24534   ins_pipe( pipe_slow );
24535 %}
24536 
24537 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24538   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24539                                               Matcher::vector_length_in_bytes(n->in(1))));
24540   match(Set dst (CountLeadingZerosV src mask));
24541   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24542   ins_encode %{
24543     int vlen_enc = vector_length_encoding(this, $src);
24544     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24545     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24546     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24547                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24548   %}
24549   ins_pipe( pipe_slow );
24550 %}
24551 
24552 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24553   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24554             VM_Version::supports_avx512cd() &&
24555             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24556   match(Set dst (CountLeadingZerosV src));
24557   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24558   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24559   ins_encode %{
24560     int vlen_enc = vector_length_encoding(this, $src);
24561     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24562     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24563                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24564   %}
24565   ins_pipe( pipe_slow );
24566 %}
24567 
24568 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24569   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24570   match(Set dst (CountLeadingZerosV src));
24571   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24572   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24573   ins_encode %{
24574     int vlen_enc = vector_length_encoding(this, $src);
24575     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24576     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24577                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24578                                        $rtmp$$Register, true, vlen_enc);
24579   %}
24580   ins_pipe( pipe_slow );
24581 %}
24582 
24583 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24584   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24585             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24586   match(Set dst (CountLeadingZerosV src));
24587   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24588   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24589   ins_encode %{
24590     int vlen_enc = vector_length_encoding(this, $src);
24591     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24592     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24593                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24594   %}
24595   ins_pipe( pipe_slow );
24596 %}
24597 
24598 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24599   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24600             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24601   match(Set dst (CountLeadingZerosV src));
24602   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24603   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24604   ins_encode %{
24605     int vlen_enc = vector_length_encoding(this, $src);
24606     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24607     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24608                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24609   %}
24610   ins_pipe( pipe_slow );
24611 %}
24612 
24613 // ---------------------------------- Vector Masked Operations ------------------------------------
24614 
24615 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24616   match(Set dst (AddVB (Binary dst src2) mask));
24617   match(Set dst (AddVS (Binary dst src2) mask));
24618   match(Set dst (AddVI (Binary dst src2) mask));
24619   match(Set dst (AddVL (Binary dst src2) mask));
24620   match(Set dst (AddVF (Binary dst src2) mask));
24621   match(Set dst (AddVD (Binary dst src2) mask));
24622   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24623   ins_encode %{
24624     int vlen_enc = vector_length_encoding(this);
24625     BasicType bt = Matcher::vector_element_basic_type(this);
24626     int opc = this->ideal_Opcode();
24627     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24628                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24629   %}
24630   ins_pipe( pipe_slow );
24631 %}
24632 
24633 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24634   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24635   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24636   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24637   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24638   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24639   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24640   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24641   ins_encode %{
24642     int vlen_enc = vector_length_encoding(this);
24643     BasicType bt = Matcher::vector_element_basic_type(this);
24644     int opc = this->ideal_Opcode();
24645     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24646                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24647   %}
24648   ins_pipe( pipe_slow );
24649 %}
24650 
24651 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24652   match(Set dst (XorV (Binary dst src2) mask));
24653   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24654   ins_encode %{
24655     int vlen_enc = vector_length_encoding(this);
24656     BasicType bt = Matcher::vector_element_basic_type(this);
24657     int opc = this->ideal_Opcode();
24658     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24660   %}
24661   ins_pipe( pipe_slow );
24662 %}
24663 
24664 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24665   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24666   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24667   ins_encode %{
24668     int vlen_enc = vector_length_encoding(this);
24669     BasicType bt = Matcher::vector_element_basic_type(this);
24670     int opc = this->ideal_Opcode();
24671     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24672                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24673   %}
24674   ins_pipe( pipe_slow );
24675 %}
24676 
24677 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24678   match(Set dst (OrV (Binary dst src2) mask));
24679   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24680   ins_encode %{
24681     int vlen_enc = vector_length_encoding(this);
24682     BasicType bt = Matcher::vector_element_basic_type(this);
24683     int opc = this->ideal_Opcode();
24684     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24685                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24686   %}
24687   ins_pipe( pipe_slow );
24688 %}
24689 
24690 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24691   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24692   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24693   ins_encode %{
24694     int vlen_enc = vector_length_encoding(this);
24695     BasicType bt = Matcher::vector_element_basic_type(this);
24696     int opc = this->ideal_Opcode();
24697     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24698                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24699   %}
24700   ins_pipe( pipe_slow );
24701 %}
24702 
24703 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24704   match(Set dst (AndV (Binary dst src2) mask));
24705   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24706   ins_encode %{
24707     int vlen_enc = vector_length_encoding(this);
24708     BasicType bt = Matcher::vector_element_basic_type(this);
24709     int opc = this->ideal_Opcode();
24710     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24711                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24712   %}
24713   ins_pipe( pipe_slow );
24714 %}
24715 
24716 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24717   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24718   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24719   ins_encode %{
24720     int vlen_enc = vector_length_encoding(this);
24721     BasicType bt = Matcher::vector_element_basic_type(this);
24722     int opc = this->ideal_Opcode();
24723     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24724                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24725   %}
24726   ins_pipe( pipe_slow );
24727 %}
24728 
24729 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24730   match(Set dst (SubVB (Binary dst src2) mask));
24731   match(Set dst (SubVS (Binary dst src2) mask));
24732   match(Set dst (SubVI (Binary dst src2) mask));
24733   match(Set dst (SubVL (Binary dst src2) mask));
24734   match(Set dst (SubVF (Binary dst src2) mask));
24735   match(Set dst (SubVD (Binary dst src2) mask));
24736   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24737   ins_encode %{
24738     int vlen_enc = vector_length_encoding(this);
24739     BasicType bt = Matcher::vector_element_basic_type(this);
24740     int opc = this->ideal_Opcode();
24741     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24743   %}
24744   ins_pipe( pipe_slow );
24745 %}
24746 
24747 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24748   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24749   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24750   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24751   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24752   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24753   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24754   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24755   ins_encode %{
24756     int vlen_enc = vector_length_encoding(this);
24757     BasicType bt = Matcher::vector_element_basic_type(this);
24758     int opc = this->ideal_Opcode();
24759     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24761   %}
24762   ins_pipe( pipe_slow );
24763 %}
24764 
24765 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24766   match(Set dst (MulVS (Binary dst src2) mask));
24767   match(Set dst (MulVI (Binary dst src2) mask));
24768   match(Set dst (MulVL (Binary dst src2) mask));
24769   match(Set dst (MulVF (Binary dst src2) mask));
24770   match(Set dst (MulVD (Binary dst src2) mask));
24771   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24772   ins_encode %{
24773     int vlen_enc = vector_length_encoding(this);
24774     BasicType bt = Matcher::vector_element_basic_type(this);
24775     int opc = this->ideal_Opcode();
24776     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24777                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24778   %}
24779   ins_pipe( pipe_slow );
24780 %}
24781 
24782 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24783   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24784   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24785   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24786   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24787   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24788   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24789   ins_encode %{
24790     int vlen_enc = vector_length_encoding(this);
24791     BasicType bt = Matcher::vector_element_basic_type(this);
24792     int opc = this->ideal_Opcode();
24793     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24794                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24795   %}
24796   ins_pipe( pipe_slow );
24797 %}
24798 
24799 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24800   match(Set dst (SqrtVF dst mask));
24801   match(Set dst (SqrtVD dst mask));
24802   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24803   ins_encode %{
24804     int vlen_enc = vector_length_encoding(this);
24805     BasicType bt = Matcher::vector_element_basic_type(this);
24806     int opc = this->ideal_Opcode();
24807     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24808                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24809   %}
24810   ins_pipe( pipe_slow );
24811 %}
24812 
24813 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24814   match(Set dst (DivVF (Binary dst src2) mask));
24815   match(Set dst (DivVD (Binary dst src2) mask));
24816   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24817   ins_encode %{
24818     int vlen_enc = vector_length_encoding(this);
24819     BasicType bt = Matcher::vector_element_basic_type(this);
24820     int opc = this->ideal_Opcode();
24821     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24822                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24823   %}
24824   ins_pipe( pipe_slow );
24825 %}
24826 
24827 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24828   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24829   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24830   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24831   ins_encode %{
24832     int vlen_enc = vector_length_encoding(this);
24833     BasicType bt = Matcher::vector_element_basic_type(this);
24834     int opc = this->ideal_Opcode();
24835     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24836                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24837   %}
24838   ins_pipe( pipe_slow );
24839 %}
24840 
24841 
24842 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24843   match(Set dst (RotateLeftV (Binary dst shift) mask));
24844   match(Set dst (RotateRightV (Binary dst shift) mask));
24845   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24846   ins_encode %{
24847     int vlen_enc = vector_length_encoding(this);
24848     BasicType bt = Matcher::vector_element_basic_type(this);
24849     int opc = this->ideal_Opcode();
24850     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24852   %}
24853   ins_pipe( pipe_slow );
24854 %}
24855 
24856 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24857   match(Set dst (RotateLeftV (Binary dst src2) mask));
24858   match(Set dst (RotateRightV (Binary dst src2) mask));
24859   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24860   ins_encode %{
24861     int vlen_enc = vector_length_encoding(this);
24862     BasicType bt = Matcher::vector_element_basic_type(this);
24863     int opc = this->ideal_Opcode();
24864     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24865                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24866   %}
24867   ins_pipe( pipe_slow );
24868 %}
24869 
24870 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24871   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24872   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24873   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24874   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24875   ins_encode %{
24876     int vlen_enc = vector_length_encoding(this);
24877     BasicType bt = Matcher::vector_element_basic_type(this);
24878     int opc = this->ideal_Opcode();
24879     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24881   %}
24882   ins_pipe( pipe_slow );
24883 %}
24884 
24885 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24886   predicate(!n->as_ShiftV()->is_var_shift());
24887   match(Set dst (LShiftVS (Binary dst src2) mask));
24888   match(Set dst (LShiftVI (Binary dst src2) mask));
24889   match(Set dst (LShiftVL (Binary dst src2) mask));
24890   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24891   ins_encode %{
24892     int vlen_enc = vector_length_encoding(this);
24893     BasicType bt = Matcher::vector_element_basic_type(this);
24894     int opc = this->ideal_Opcode();
24895     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24896                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24897   %}
24898   ins_pipe( pipe_slow );
24899 %}
24900 
24901 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24902   predicate(n->as_ShiftV()->is_var_shift());
24903   match(Set dst (LShiftVS (Binary dst src2) mask));
24904   match(Set dst (LShiftVI (Binary dst src2) mask));
24905   match(Set dst (LShiftVL (Binary dst src2) mask));
24906   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24907   ins_encode %{
24908     int vlen_enc = vector_length_encoding(this);
24909     BasicType bt = Matcher::vector_element_basic_type(this);
24910     int opc = this->ideal_Opcode();
24911     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24912                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24913   %}
24914   ins_pipe( pipe_slow );
24915 %}
24916 
24917 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24918   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24919   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24920   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24921   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24922   ins_encode %{
24923     int vlen_enc = vector_length_encoding(this);
24924     BasicType bt = Matcher::vector_element_basic_type(this);
24925     int opc = this->ideal_Opcode();
24926     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24927                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24928   %}
24929   ins_pipe( pipe_slow );
24930 %}
24931 
24932 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24933   predicate(!n->as_ShiftV()->is_var_shift());
24934   match(Set dst (RShiftVS (Binary dst src2) mask));
24935   match(Set dst (RShiftVI (Binary dst src2) mask));
24936   match(Set dst (RShiftVL (Binary dst src2) mask));
24937   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24938   ins_encode %{
24939     int vlen_enc = vector_length_encoding(this);
24940     BasicType bt = Matcher::vector_element_basic_type(this);
24941     int opc = this->ideal_Opcode();
24942     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24943                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24944   %}
24945   ins_pipe( pipe_slow );
24946 %}
24947 
24948 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24949   predicate(n->as_ShiftV()->is_var_shift());
24950   match(Set dst (RShiftVS (Binary dst src2) mask));
24951   match(Set dst (RShiftVI (Binary dst src2) mask));
24952   match(Set dst (RShiftVL (Binary dst src2) mask));
24953   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24954   ins_encode %{
24955     int vlen_enc = vector_length_encoding(this);
24956     BasicType bt = Matcher::vector_element_basic_type(this);
24957     int opc = this->ideal_Opcode();
24958     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24959                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24960   %}
24961   ins_pipe( pipe_slow );
24962 %}
24963 
24964 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24965   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24966   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24967   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24968   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24969   ins_encode %{
24970     int vlen_enc = vector_length_encoding(this);
24971     BasicType bt = Matcher::vector_element_basic_type(this);
24972     int opc = this->ideal_Opcode();
24973     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24974                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24975   %}
24976   ins_pipe( pipe_slow );
24977 %}
24978 
24979 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24980   predicate(!n->as_ShiftV()->is_var_shift());
24981   match(Set dst (URShiftVS (Binary dst src2) mask));
24982   match(Set dst (URShiftVI (Binary dst src2) mask));
24983   match(Set dst (URShiftVL (Binary dst src2) mask));
24984   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24985   ins_encode %{
24986     int vlen_enc = vector_length_encoding(this);
24987     BasicType bt = Matcher::vector_element_basic_type(this);
24988     int opc = this->ideal_Opcode();
24989     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24990                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24991   %}
24992   ins_pipe( pipe_slow );
24993 %}
24994 
24995 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24996   predicate(n->as_ShiftV()->is_var_shift());
24997   match(Set dst (URShiftVS (Binary dst src2) mask));
24998   match(Set dst (URShiftVI (Binary dst src2) mask));
24999   match(Set dst (URShiftVL (Binary dst src2) mask));
25000   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25001   ins_encode %{
25002     int vlen_enc = vector_length_encoding(this);
25003     BasicType bt = Matcher::vector_element_basic_type(this);
25004     int opc = this->ideal_Opcode();
25005     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25006                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25007   %}
25008   ins_pipe( pipe_slow );
25009 %}
25010 
25011 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25012   match(Set dst (MaxV (Binary dst src2) mask));
25013   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25014   ins_encode %{
25015     int vlen_enc = vector_length_encoding(this);
25016     BasicType bt = Matcher::vector_element_basic_type(this);
25017     int opc = this->ideal_Opcode();
25018     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25019                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25020   %}
25021   ins_pipe( pipe_slow );
25022 %}
25023 
25024 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25025   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25026   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25027   ins_encode %{
25028     int vlen_enc = vector_length_encoding(this);
25029     BasicType bt = Matcher::vector_element_basic_type(this);
25030     int opc = this->ideal_Opcode();
25031     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25032                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25033   %}
25034   ins_pipe( pipe_slow );
25035 %}
25036 
25037 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25038   match(Set dst (MinV (Binary dst src2) mask));
25039   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25040   ins_encode %{
25041     int vlen_enc = vector_length_encoding(this);
25042     BasicType bt = Matcher::vector_element_basic_type(this);
25043     int opc = this->ideal_Opcode();
25044     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25045                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25046   %}
25047   ins_pipe( pipe_slow );
25048 %}
25049 
25050 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25051   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25052   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25053   ins_encode %{
25054     int vlen_enc = vector_length_encoding(this);
25055     BasicType bt = Matcher::vector_element_basic_type(this);
25056     int opc = this->ideal_Opcode();
25057     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25058                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25059   %}
25060   ins_pipe( pipe_slow );
25061 %}
25062 
25063 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25064   match(Set dst (VectorRearrange (Binary dst src2) mask));
25065   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25066   ins_encode %{
25067     int vlen_enc = vector_length_encoding(this);
25068     BasicType bt = Matcher::vector_element_basic_type(this);
25069     int opc = this->ideal_Opcode();
25070     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25071                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25072   %}
25073   ins_pipe( pipe_slow );
25074 %}
25075 
25076 instruct vabs_masked(vec dst, kReg mask) %{
25077   match(Set dst (AbsVB dst mask));
25078   match(Set dst (AbsVS dst mask));
25079   match(Set dst (AbsVI dst mask));
25080   match(Set dst (AbsVL dst mask));
25081   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25082   ins_encode %{
25083     int vlen_enc = vector_length_encoding(this);
25084     BasicType bt = Matcher::vector_element_basic_type(this);
25085     int opc = this->ideal_Opcode();
25086     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25087                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25088   %}
25089   ins_pipe( pipe_slow );
25090 %}
25091 
25092 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25093   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25094   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25095   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25096   ins_encode %{
25097     assert(UseFMA, "Needs FMA instructions support.");
25098     int vlen_enc = vector_length_encoding(this);
25099     BasicType bt = Matcher::vector_element_basic_type(this);
25100     int opc = this->ideal_Opcode();
25101     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25102                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25103   %}
25104   ins_pipe( pipe_slow );
25105 %}
25106 
25107 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25108   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25109   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25110   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25111   ins_encode %{
25112     assert(UseFMA, "Needs FMA instructions support.");
25113     int vlen_enc = vector_length_encoding(this);
25114     BasicType bt = Matcher::vector_element_basic_type(this);
25115     int opc = this->ideal_Opcode();
25116     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25117                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25118   %}
25119   ins_pipe( pipe_slow );
25120 %}
25121 
25122 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25123   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25124   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25125   ins_encode %{
25126     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25127     int vlen_enc = vector_length_encoding(this, $src1);
25128     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25129 
25130     // Comparison i
25131     switch (src1_elem_bt) {
25132       case T_BYTE: {
25133         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25134         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25135         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25136         break;
25137       }
25138       case T_SHORT: {
25139         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25140         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25141         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25142         break;
25143       }
25144       case T_INT: {
25145         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25146         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25147         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25148         break;
25149       }
25150       case T_LONG: {
25151         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25152         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25153         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25154         break;
25155       }
25156       case T_FLOAT: {
25157         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25158         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25159         break;
25160       }
25161       case T_DOUBLE: {
25162         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25163         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25164         break;
25165       }
25166       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25167     }
25168   %}
25169   ins_pipe( pipe_slow );
25170 %}
25171 
25172 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25173   predicate(Matcher::vector_length(n) <= 32);
25174   match(Set dst (MaskAll src));
25175   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25176   ins_encode %{
25177     int mask_len = Matcher::vector_length(this);
25178     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25179   %}
25180   ins_pipe( pipe_slow );
25181 %}
25182 
25183 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25184   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25185   match(Set dst (XorVMask src (MaskAll cnt)));
25186   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25187   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25188   ins_encode %{
25189     uint masklen = Matcher::vector_length(this);
25190     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25191   %}
25192   ins_pipe( pipe_slow );
25193 %}
25194 
25195 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25196   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25197             (Matcher::vector_length(n) == 16) ||
25198             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25199   match(Set dst (XorVMask src (MaskAll cnt)));
25200   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25201   ins_encode %{
25202     uint masklen = Matcher::vector_length(this);
25203     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25204   %}
25205   ins_pipe( pipe_slow );
25206 %}
25207 
25208 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25209   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25210   match(Set dst (VectorLongToMask src));
25211   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25212   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25213   ins_encode %{
25214     int mask_len = Matcher::vector_length(this);
25215     int vec_enc  = vector_length_encoding(mask_len);
25216     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25217                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25218   %}
25219   ins_pipe( pipe_slow );
25220 %}
25221 
25222 
25223 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25224   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25225   match(Set dst (VectorLongToMask src));
25226   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25227   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25228   ins_encode %{
25229     int mask_len = Matcher::vector_length(this);
25230     assert(mask_len <= 32, "invalid mask length");
25231     int vec_enc  = vector_length_encoding(mask_len);
25232     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25233                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25234   %}
25235   ins_pipe( pipe_slow );
25236 %}
25237 
25238 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25239   predicate(n->bottom_type()->isa_vectmask());
25240   match(Set dst (VectorLongToMask src));
25241   format %{ "long_to_mask_evex $dst, $src\t!" %}
25242   ins_encode %{
25243     __ kmov($dst$$KRegister, $src$$Register);
25244   %}
25245   ins_pipe( pipe_slow );
25246 %}
25247 
25248 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25249   match(Set dst (AndVMask src1 src2));
25250   match(Set dst (OrVMask src1 src2));
25251   match(Set dst (XorVMask src1 src2));
25252   effect(TEMP kscratch);
25253   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25254   ins_encode %{
25255     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25256     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25257     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25258     uint masklen = Matcher::vector_length(this);
25259     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25260     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25261   %}
25262   ins_pipe( pipe_slow );
25263 %}
25264 
25265 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25266   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25267   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25268   ins_encode %{
25269     int vlen_enc = vector_length_encoding(this);
25270     BasicType bt = Matcher::vector_element_basic_type(this);
25271     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25272                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25273   %}
25274   ins_pipe( pipe_slow );
25275 %}
25276 
25277 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25278   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25279   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25280   ins_encode %{
25281     int vlen_enc = vector_length_encoding(this);
25282     BasicType bt = Matcher::vector_element_basic_type(this);
25283     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25284                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25285   %}
25286   ins_pipe( pipe_slow );
25287 %}
25288 
25289 instruct castMM(kReg dst)
25290 %{
25291   match(Set dst (CastVV dst));
25292 
25293   size(0);
25294   format %{ "# castVV of $dst" %}
25295   ins_encode(/* empty encoding */);
25296   ins_cost(0);
25297   ins_pipe(empty);
25298 %}
25299 
25300 instruct castVV(vec dst)
25301 %{
25302   match(Set dst (CastVV dst));
25303 
25304   size(0);
25305   format %{ "# castVV of $dst" %}
25306   ins_encode(/* empty encoding */);
25307   ins_cost(0);
25308   ins_pipe(empty);
25309 %}
25310 
25311 instruct castVVLeg(legVec dst)
25312 %{
25313   match(Set dst (CastVV dst));
25314 
25315   size(0);
25316   format %{ "# castVV of $dst" %}
25317   ins_encode(/* empty encoding */);
25318   ins_cost(0);
25319   ins_pipe(empty);
25320 %}
25321 
25322 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25323 %{
25324   match(Set dst (IsInfiniteF src));
25325   effect(TEMP ktmp, KILL cr);
25326   format %{ "float_class_check $dst, $src" %}
25327   ins_encode %{
25328     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25329     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25330   %}
25331   ins_pipe(pipe_slow);
25332 %}
25333 
25334 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25335 %{
25336   match(Set dst (IsInfiniteD src));
25337   effect(TEMP ktmp, KILL cr);
25338   format %{ "double_class_check $dst, $src" %}
25339   ins_encode %{
25340     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25341     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25342   %}
25343   ins_pipe(pipe_slow);
25344 %}
25345 
25346 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25347 %{
25348   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25349             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25350   match(Set dst (SaturatingAddV src1 src2));
25351   match(Set dst (SaturatingSubV src1 src2));
25352   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25353   ins_encode %{
25354     int vlen_enc = vector_length_encoding(this);
25355     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25356     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25357                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25358   %}
25359   ins_pipe(pipe_slow);
25360 %}
25361 
25362 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25363 %{
25364   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25365             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25366   match(Set dst (SaturatingAddV src1 src2));
25367   match(Set dst (SaturatingSubV src1 src2));
25368   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25369   ins_encode %{
25370     int vlen_enc = vector_length_encoding(this);
25371     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25372     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25373                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25374   %}
25375   ins_pipe(pipe_slow);
25376 %}
25377 
25378 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25379 %{
25380   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25381             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25382             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25383   match(Set dst (SaturatingAddV src1 src2));
25384   match(Set dst (SaturatingSubV src1 src2));
25385   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25386   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25387   ins_encode %{
25388     int vlen_enc = vector_length_encoding(this);
25389     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25390     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25391                                         $src1$$XMMRegister, $src2$$XMMRegister,
25392                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25393                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25394   %}
25395   ins_pipe(pipe_slow);
25396 %}
25397 
25398 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25399 %{
25400   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25401             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25402             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25403   match(Set dst (SaturatingAddV src1 src2));
25404   match(Set dst (SaturatingSubV src1 src2));
25405   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25406   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25407   ins_encode %{
25408     int vlen_enc = vector_length_encoding(this);
25409     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25410     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25411                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25412                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25413   %}
25414   ins_pipe(pipe_slow);
25415 %}
25416 
25417 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25418 %{
25419   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25420             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25421             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25422   match(Set dst (SaturatingAddV src1 src2));
25423   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25424   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25425   ins_encode %{
25426     int vlen_enc = vector_length_encoding(this);
25427     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25428     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25429                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25430   %}
25431   ins_pipe(pipe_slow);
25432 %}
25433 
25434 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25435 %{
25436   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25437             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25438             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25439   match(Set dst (SaturatingAddV src1 src2));
25440   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25441   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25442   ins_encode %{
25443     int vlen_enc = vector_length_encoding(this);
25444     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25445     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25446                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25447   %}
25448   ins_pipe(pipe_slow);
25449 %}
25450 
25451 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25452 %{
25453   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25454             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25455             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25456   match(Set dst (SaturatingSubV src1 src2));
25457   effect(TEMP ktmp);
25458   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25459   ins_encode %{
25460     int vlen_enc = vector_length_encoding(this);
25461     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25462     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25463                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25464   %}
25465   ins_pipe(pipe_slow);
25466 %}
25467 
25468 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25469 %{
25470   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25471             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25472             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25473   match(Set dst (SaturatingSubV src1 src2));
25474   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25475   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25476   ins_encode %{
25477     int vlen_enc = vector_length_encoding(this);
25478     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25479     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25480                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25481   %}
25482   ins_pipe(pipe_slow);
25483 %}
25484 
25485 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25486 %{
25487   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25488             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25489   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25490   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25491   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25492   ins_encode %{
25493     int vlen_enc = vector_length_encoding(this);
25494     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25495     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25496                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25497   %}
25498   ins_pipe(pipe_slow);
25499 %}
25500 
25501 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25502 %{
25503   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25504             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25505   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25506   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25507   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25508   ins_encode %{
25509     int vlen_enc = vector_length_encoding(this);
25510     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25511     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25512                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25513   %}
25514   ins_pipe(pipe_slow);
25515 %}
25516 
25517 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25518   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25519             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25520   match(Set dst (SaturatingAddV (Binary dst src) mask));
25521   match(Set dst (SaturatingSubV (Binary dst src) mask));
25522   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25523   ins_encode %{
25524     int vlen_enc = vector_length_encoding(this);
25525     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25526     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25527                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25528   %}
25529   ins_pipe( pipe_slow );
25530 %}
25531 
25532 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25533   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25534             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25535   match(Set dst (SaturatingAddV (Binary dst src) mask));
25536   match(Set dst (SaturatingSubV (Binary dst src) mask));
25537   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25538   ins_encode %{
25539     int vlen_enc = vector_length_encoding(this);
25540     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25541     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25542                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25543   %}
25544   ins_pipe( pipe_slow );
25545 %}
25546 
25547 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25548   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25549             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25550   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25551   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25552   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25553   ins_encode %{
25554     int vlen_enc = vector_length_encoding(this);
25555     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25556     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25557                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25558   %}
25559   ins_pipe( pipe_slow );
25560 %}
25561 
25562 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25563   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25564             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25565   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25566   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25567   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25568   ins_encode %{
25569     int vlen_enc = vector_length_encoding(this);
25570     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25571     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25572                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25573   %}
25574   ins_pipe( pipe_slow );
25575 %}
25576 
25577 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25578 %{
25579   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25580   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25581   ins_encode %{
25582     int vlen_enc = vector_length_encoding(this);
25583     BasicType bt = Matcher::vector_element_basic_type(this);
25584     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25585   %}
25586   ins_pipe(pipe_slow);
25587 %}
25588 
25589 instruct reinterpretS2HF(regF dst, rRegI src)
25590 %{
25591   match(Set dst (ReinterpretS2HF src));
25592   format %{ "vmovw $dst, $src" %}
25593   ins_encode %{
25594     __ vmovw($dst$$XMMRegister, $src$$Register);
25595   %}
25596   ins_pipe(pipe_slow);
25597 %}
25598 
25599 instruct reinterpretHF2S(rRegI dst, regF src)
25600 %{
25601   match(Set dst (ReinterpretHF2S src));
25602   format %{ "vmovw $dst, $src" %}
25603   ins_encode %{
25604     __ vmovw($dst$$Register, $src$$XMMRegister);
25605   %}
25606   ins_pipe(pipe_slow);
25607 %}
25608 
25609 instruct convF2HFAndS2HF(regF dst, regF src)
25610 %{
25611   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25612   format %{ "convF2HFAndS2HF $dst, $src" %}
25613   ins_encode %{
25614     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25615   %}
25616   ins_pipe(pipe_slow);
25617 %}
25618 
25619 instruct convHF2SAndHF2F(regF dst, regF src)
25620 %{
25621   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25622   format %{ "convHF2SAndHF2F $dst, $src" %}
25623   ins_encode %{
25624     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25625   %}
25626   ins_pipe(pipe_slow);
25627 %}
25628 
25629 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25630 %{
25631   match(Set dst (SqrtHF src));
25632   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25633   ins_encode %{
25634     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25635   %}
25636   ins_pipe(pipe_slow);
25637 %}
25638 
25639 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25640 %{
25641   match(Set dst (AddHF src1 src2));
25642   match(Set dst (DivHF src1 src2));
25643   match(Set dst (MulHF src1 src2));
25644   match(Set dst (SubHF src1 src2));
25645   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25646   ins_encode %{
25647     int opcode = this->ideal_Opcode();
25648     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25649   %}
25650   ins_pipe(pipe_slow);
25651 %}
25652 
25653 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25654 %{
25655   predicate(VM_Version::supports_avx10_2());
25656   match(Set dst (MaxHF src1 src2));
25657   match(Set dst (MinHF src1 src2));
25658   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25659   ins_encode %{
25660     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25661     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25662   %}
25663   ins_pipe( pipe_slow );
25664 %}
25665 
25666 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25667 %{
25668   predicate(!VM_Version::supports_avx10_2());
25669   match(Set dst (MaxHF src1 src2));
25670   match(Set dst (MinHF src1 src2));
25671   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25672   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25673   ins_encode %{
25674     int opcode = this->ideal_Opcode();
25675     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25676                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25677   %}
25678   ins_pipe( pipe_slow );
25679 %}
25680 
25681 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25682 %{
25683   match(Set dst (FmaHF  src2 (Binary dst src1)));
25684   effect(DEF dst);
25685   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25686   ins_encode %{
25687     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25688   %}
25689   ins_pipe( pipe_slow );
25690 %}
25691 
25692 
25693 instruct vector_sqrt_HF_reg(vec dst, vec src)
25694 %{
25695   match(Set dst (SqrtVHF src));
25696   format %{ "vector_sqrt_fp16 $dst, $src" %}
25697   ins_encode %{
25698     int vlen_enc = vector_length_encoding(this);
25699     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25700   %}
25701   ins_pipe(pipe_slow);
25702 %}
25703 
25704 instruct vector_sqrt_HF_mem(vec dst, memory src)
25705 %{
25706   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25707   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25708   ins_encode %{
25709     int vlen_enc = vector_length_encoding(this);
25710     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25711   %}
25712   ins_pipe(pipe_slow);
25713 %}
25714 
25715 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25716 %{
25717   match(Set dst (AddVHF src1 src2));
25718   match(Set dst (DivVHF src1 src2));
25719   match(Set dst (MulVHF src1 src2));
25720   match(Set dst (SubVHF src1 src2));
25721   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25722   ins_encode %{
25723     int vlen_enc = vector_length_encoding(this);
25724     int opcode = this->ideal_Opcode();
25725     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25726   %}
25727   ins_pipe(pipe_slow);
25728 %}
25729 
25730 
25731 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25732 %{
25733   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25734   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25735   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25736   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25737   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25738   ins_encode %{
25739     int vlen_enc = vector_length_encoding(this);
25740     int opcode = this->ideal_Opcode();
25741     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25742   %}
25743   ins_pipe(pipe_slow);
25744 %}
25745 
25746 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25747 %{
25748   match(Set dst (FmaVHF src2 (Binary dst src1)));
25749   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25750   ins_encode %{
25751     int vlen_enc = vector_length_encoding(this);
25752     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25753   %}
25754   ins_pipe( pipe_slow );
25755 %}
25756 
25757 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25758 %{
25759   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25760   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25761   ins_encode %{
25762     int vlen_enc = vector_length_encoding(this);
25763     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25764   %}
25765   ins_pipe( pipe_slow );
25766 %}
25767 
25768 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25769 %{
25770   predicate(VM_Version::supports_avx10_2());
25771   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25772   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25773   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25774   ins_encode %{
25775     int vlen_enc = vector_length_encoding(this);
25776     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25777     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25778   %}
25779   ins_pipe( pipe_slow );
25780 %}
25781 
25782 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25783 %{
25784   predicate(VM_Version::supports_avx10_2());
25785   match(Set dst (MinVHF src1 src2));
25786   match(Set dst (MaxVHF src1 src2));
25787   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25788   ins_encode %{
25789     int vlen_enc = vector_length_encoding(this);
25790     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25791     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25792   %}
25793   ins_pipe( pipe_slow );
25794 %}
25795 
25796 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25797 %{
25798   predicate(!VM_Version::supports_avx10_2());
25799   match(Set dst (MinVHF src1 src2));
25800   match(Set dst (MaxVHF src1 src2));
25801   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25802   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25803   ins_encode %{
25804     int vlen_enc = vector_length_encoding(this);
25805     int opcode = this->ideal_Opcode();
25806     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25807                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25808   %}
25809   ins_pipe( pipe_slow );
25810 %}
25811 
25812 //----------PEEPHOLE RULES-----------------------------------------------------
25813 // These must follow all instruction definitions as they use the names
25814 // defined in the instructions definitions.
25815 //
25816 // peeppredicate ( rule_predicate );
25817 // // the predicate unless which the peephole rule will be ignored
25818 //
25819 // peepmatch ( root_instr_name [preceding_instruction]* );
25820 //
25821 // peepprocedure ( procedure_name );
25822 // // provide a procedure name to perform the optimization, the procedure should
25823 // // reside in the architecture dependent peephole file, the method has the
25824 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25825 // // with the arguments being the basic block, the current node index inside the
25826 // // block, the register allocator, the functions upon invoked return a new node
25827 // // defined in peepreplace, and the rules of the nodes appearing in the
25828 // // corresponding peepmatch, the function return true if successful, else
25829 // // return false
25830 //
25831 // peepconstraint %{
25832 // (instruction_number.operand_name relational_op instruction_number.operand_name
25833 //  [, ...] );
25834 // // instruction numbers are zero-based using left to right order in peepmatch
25835 //
25836 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25837 // // provide an instruction_number.operand_name for each operand that appears
25838 // // in the replacement instruction's match rule
25839 //
25840 // ---------VM FLAGS---------------------------------------------------------
25841 //
25842 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25843 //
25844 // Each peephole rule is given an identifying number starting with zero and
25845 // increasing by one in the order seen by the parser.  An individual peephole
25846 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25847 // on the command-line.
25848 //
25849 // ---------CURRENT LIMITATIONS----------------------------------------------
25850 //
25851 // Only transformations inside a basic block (do we need more for peephole)
25852 //
25853 // ---------EXAMPLE----------------------------------------------------------
25854 //
25855 // // pertinent parts of existing instructions in architecture description
25856 // instruct movI(rRegI dst, rRegI src)
25857 // %{
25858 //   match(Set dst (CopyI src));
25859 // %}
25860 //
25861 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25862 // %{
25863 //   match(Set dst (AddI dst src));
25864 //   effect(KILL cr);
25865 // %}
25866 //
25867 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25868 // %{
25869 //   match(Set dst (AddI dst src));
25870 // %}
25871 //
25872 // 1. Simple replacement
25873 // - Only match adjacent instructions in same basic block
25874 // - Only equality constraints
25875 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25876 // - Only one replacement instruction
25877 //
25878 // // Change (inc mov) to lea
25879 // peephole %{
25880 //   // lea should only be emitted when beneficial
25881 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25882 //   // increment preceded by register-register move
25883 //   peepmatch ( incI_rReg movI );
25884 //   // require that the destination register of the increment
25885 //   // match the destination register of the move
25886 //   peepconstraint ( 0.dst == 1.dst );
25887 //   // construct a replacement instruction that sets
25888 //   // the destination to ( move's source register + one )
25889 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25890 // %}
25891 //
25892 // 2. Procedural replacement
25893 // - More flexible finding relevent nodes
25894 // - More flexible constraints
25895 // - More flexible transformations
25896 // - May utilise architecture-dependent API more effectively
25897 // - Currently only one replacement instruction due to adlc parsing capabilities
25898 //
25899 // // Change (inc mov) to lea
25900 // peephole %{
25901 //   // lea should only be emitted when beneficial
25902 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25903 //   // the rule numbers of these nodes inside are passed into the function below
25904 //   peepmatch ( incI_rReg movI );
25905 //   // the method that takes the responsibility of transformation
25906 //   peepprocedure ( inc_mov_to_lea );
25907 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25908 //   // node is passed into the function above
25909 //   peepreplace ( leaI_rReg_immI() );
25910 // %}
25911 
25912 // These instructions is not matched by the matcher but used by the peephole
25913 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25914 %{
25915   predicate(false);
25916   match(Set dst (AddI src1 src2));
25917   format %{ "leal    $dst, [$src1 + $src2]" %}
25918   ins_encode %{
25919     Register dst = $dst$$Register;
25920     Register src1 = $src1$$Register;
25921     Register src2 = $src2$$Register;
25922     if (src1 != rbp && src1 != r13) {
25923       __ leal(dst, Address(src1, src2, Address::times_1));
25924     } else {
25925       assert(src2 != rbp && src2 != r13, "");
25926       __ leal(dst, Address(src2, src1, Address::times_1));
25927     }
25928   %}
25929   ins_pipe(ialu_reg_reg);
25930 %}
25931 
25932 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25933 %{
25934   predicate(false);
25935   match(Set dst (AddI src1 src2));
25936   format %{ "leal    $dst, [$src1 + $src2]" %}
25937   ins_encode %{
25938     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25939   %}
25940   ins_pipe(ialu_reg_reg);
25941 %}
25942 
25943 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25944 %{
25945   predicate(false);
25946   match(Set dst (LShiftI src shift));
25947   format %{ "leal    $dst, [$src << $shift]" %}
25948   ins_encode %{
25949     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25950     Register src = $src$$Register;
25951     if (scale == Address::times_2 && src != rbp && src != r13) {
25952       __ leal($dst$$Register, Address(src, src, Address::times_1));
25953     } else {
25954       __ leal($dst$$Register, Address(noreg, src, scale));
25955     }
25956   %}
25957   ins_pipe(ialu_reg_reg);
25958 %}
25959 
25960 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25961 %{
25962   predicate(false);
25963   match(Set dst (AddL src1 src2));
25964   format %{ "leaq    $dst, [$src1 + $src2]" %}
25965   ins_encode %{
25966     Register dst = $dst$$Register;
25967     Register src1 = $src1$$Register;
25968     Register src2 = $src2$$Register;
25969     if (src1 != rbp && src1 != r13) {
25970       __ leaq(dst, Address(src1, src2, Address::times_1));
25971     } else {
25972       assert(src2 != rbp && src2 != r13, "");
25973       __ leaq(dst, Address(src2, src1, Address::times_1));
25974     }
25975   %}
25976   ins_pipe(ialu_reg_reg);
25977 %}
25978 
25979 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25980 %{
25981   predicate(false);
25982   match(Set dst (AddL src1 src2));
25983   format %{ "leaq    $dst, [$src1 + $src2]" %}
25984   ins_encode %{
25985     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25986   %}
25987   ins_pipe(ialu_reg_reg);
25988 %}
25989 
25990 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25991 %{
25992   predicate(false);
25993   match(Set dst (LShiftL src shift));
25994   format %{ "leaq    $dst, [$src << $shift]" %}
25995   ins_encode %{
25996     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25997     Register src = $src$$Register;
25998     if (scale == Address::times_2 && src != rbp && src != r13) {
25999       __ leaq($dst$$Register, Address(src, src, Address::times_1));
26000     } else {
26001       __ leaq($dst$$Register, Address(noreg, src, scale));
26002     }
26003   %}
26004   ins_pipe(ialu_reg_reg);
26005 %}
26006 
26007 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26008 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26009 // processors with at least partial ALU support for lea
26010 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26011 // beneficial for processors with full ALU support
26012 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26013 
26014 peephole
26015 %{
26016   peeppredicate(VM_Version::supports_fast_2op_lea());
26017   peepmatch (addI_rReg);
26018   peepprocedure (lea_coalesce_reg);
26019   peepreplace (leaI_rReg_rReg_peep());
26020 %}
26021 
26022 peephole
26023 %{
26024   peeppredicate(VM_Version::supports_fast_2op_lea());
26025   peepmatch (addI_rReg_imm);
26026   peepprocedure (lea_coalesce_imm);
26027   peepreplace (leaI_rReg_immI_peep());
26028 %}
26029 
26030 peephole
26031 %{
26032   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26033                 VM_Version::is_intel_cascade_lake());
26034   peepmatch (incI_rReg);
26035   peepprocedure (lea_coalesce_imm);
26036   peepreplace (leaI_rReg_immI_peep());
26037 %}
26038 
26039 peephole
26040 %{
26041   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26042                 VM_Version::is_intel_cascade_lake());
26043   peepmatch (decI_rReg);
26044   peepprocedure (lea_coalesce_imm);
26045   peepreplace (leaI_rReg_immI_peep());
26046 %}
26047 
26048 peephole
26049 %{
26050   peeppredicate(VM_Version::supports_fast_2op_lea());
26051   peepmatch (salI_rReg_immI2);
26052   peepprocedure (lea_coalesce_imm);
26053   peepreplace (leaI_rReg_immI2_peep());
26054 %}
26055 
26056 peephole
26057 %{
26058   peeppredicate(VM_Version::supports_fast_2op_lea());
26059   peepmatch (addL_rReg);
26060   peepprocedure (lea_coalesce_reg);
26061   peepreplace (leaL_rReg_rReg_peep());
26062 %}
26063 
26064 peephole
26065 %{
26066   peeppredicate(VM_Version::supports_fast_2op_lea());
26067   peepmatch (addL_rReg_imm);
26068   peepprocedure (lea_coalesce_imm);
26069   peepreplace (leaL_rReg_immL32_peep());
26070 %}
26071 
26072 peephole
26073 %{
26074   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26075                 VM_Version::is_intel_cascade_lake());
26076   peepmatch (incL_rReg);
26077   peepprocedure (lea_coalesce_imm);
26078   peepreplace (leaL_rReg_immL32_peep());
26079 %}
26080 
26081 peephole
26082 %{
26083   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26084                 VM_Version::is_intel_cascade_lake());
26085   peepmatch (decL_rReg);
26086   peepprocedure (lea_coalesce_imm);
26087   peepreplace (leaL_rReg_immL32_peep());
26088 %}
26089 
26090 peephole
26091 %{
26092   peeppredicate(VM_Version::supports_fast_2op_lea());
26093   peepmatch (salL_rReg_immI2);
26094   peepprocedure (lea_coalesce_imm);
26095   peepreplace (leaL_rReg_immI2_peep());
26096 %}
26097 
26098 peephole
26099 %{
26100   peepmatch (leaPCompressedOopOffset);
26101   peepprocedure (lea_remove_redundant);
26102 %}
26103 
26104 peephole
26105 %{
26106   peepmatch (leaP8Narrow);
26107   peepprocedure (lea_remove_redundant);
26108 %}
26109 
26110 peephole
26111 %{
26112   peepmatch (leaP32Narrow);
26113   peepprocedure (lea_remove_redundant);
26114 %}
26115 
26116 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26117 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26118 
26119 //int variant
26120 peephole
26121 %{
26122   peepmatch (testI_reg);
26123   peepprocedure (test_may_remove);
26124 %}
26125 
26126 //long variant
26127 peephole
26128 %{
26129   peepmatch (testL_reg);
26130   peepprocedure (test_may_remove);
26131 %}
26132 
26133 
26134 //----------SMARTSPILL RULES---------------------------------------------------
26135 // These must follow all instruction definitions as they use the names
26136 // defined in the instructions definitions.