1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   Label done;
 1708   __ movl(dst, -1);
 1709   __ jcc(Assembler::parity, done);
 1710   __ jcc(Assembler::below, done);
 1711   __ setcc(Assembler::notEqual, dst);
 1712   __ bind(done);
 1713 }
 1714 
 1715 // Math.min()    # Math.max()
 1716 // --------------------------
 1717 // ucomis[s/d]   #
 1718 // ja   -> b     # a
 1719 // jp   -> NaN   # NaN
 1720 // jb   -> a     # b
 1721 // je            #
 1722 // |-jz -> a | b # a & b
 1723 // |    -> a     #
 1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1725                             XMMRegister a, XMMRegister b,
 1726                             XMMRegister xmmt, Register rt,
 1727                             bool min, bool single) {
 1728 
 1729   Label nan, zero, below, above, done;
 1730 
 1731   if (single)
 1732     __ ucomiss(a, b);
 1733   else
 1734     __ ucomisd(a, b);
 1735 
 1736   if (dst->encoding() != (min ? b : a)->encoding())
 1737     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1738   else
 1739     __ jccb(Assembler::above, done);
 1740 
 1741   __ jccb(Assembler::parity, nan);  // PF=1
 1742   __ jccb(Assembler::below, below); // CF=1
 1743 
 1744   // equal
 1745   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1746   if (single) {
 1747     __ ucomiss(a, xmmt);
 1748     __ jccb(Assembler::equal, zero);
 1749 
 1750     __ movflt(dst, a);
 1751     __ jmp(done);
 1752   }
 1753   else {
 1754     __ ucomisd(a, xmmt);
 1755     __ jccb(Assembler::equal, zero);
 1756 
 1757     __ movdbl(dst, a);
 1758     __ jmp(done);
 1759   }
 1760 
 1761   __ bind(zero);
 1762   if (min)
 1763     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1764   else
 1765     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1766 
 1767   __ jmp(done);
 1768 
 1769   __ bind(above);
 1770   if (single)
 1771     __ movflt(dst, min ? b : a);
 1772   else
 1773     __ movdbl(dst, min ? b : a);
 1774 
 1775   __ jmp(done);
 1776 
 1777   __ bind(nan);
 1778   if (single) {
 1779     __ movl(rt, 0x7fc00000); // Float.NaN
 1780     __ movdl(dst, rt);
 1781   }
 1782   else {
 1783     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1784     __ movdq(dst, rt);
 1785   }
 1786   __ jmp(done);
 1787 
 1788   __ bind(below);
 1789   if (single)
 1790     __ movflt(dst, min ? a : b);
 1791   else
 1792     __ movdbl(dst, min ? a : b);
 1793 
 1794   __ bind(done);
 1795 }
 1796 
 1797 //=============================================================================
 1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1799 
 1800 int ConstantTable::calculate_table_base_offset() const {
 1801   return 0;  // absolute addressing, no offset
 1802 }
 1803 
 1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1806   ShouldNotReachHere();
 1807 }
 1808 
 1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1810   // Empty encoding
 1811 }
 1812 
 1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1814   return 0;
 1815 }
 1816 
 1817 #ifndef PRODUCT
 1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1819   st->print("# MachConstantBaseNode (empty encoding)");
 1820 }
 1821 #endif
 1822 
 1823 
 1824 //=============================================================================
 1825 #ifndef PRODUCT
 1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1827   Compile* C = ra_->C;
 1828 
 1829   int framesize = C->output()->frame_size_in_bytes();
 1830   int bangsize = C->output()->bang_size_in_bytes();
 1831   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1832   // Remove wordSize for return addr which is already pushed.
 1833   framesize -= wordSize;
 1834 
 1835   if (C->output()->need_stack_bang(bangsize)) {
 1836     framesize -= wordSize;
 1837     st->print("# stack bang (%d bytes)", bangsize);
 1838     st->print("\n\t");
 1839     st->print("pushq   rbp\t# Save rbp");
 1840     if (PreserveFramePointer) {
 1841         st->print("\n\t");
 1842         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1843     }
 1844     if (framesize) {
 1845       st->print("\n\t");
 1846       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1847     }
 1848   } else {
 1849     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1850     st->print("\n\t");
 1851     framesize -= wordSize;
 1852     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1853     if (PreserveFramePointer) {
 1854       st->print("\n\t");
 1855       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1856       if (framesize > 0) {
 1857         st->print("\n\t");
 1858         st->print("addq    rbp, #%d", framesize);
 1859       }
 1860     }
 1861   }
 1862 
 1863   if (VerifyStackAtCalls) {
 1864     st->print("\n\t");
 1865     framesize -= wordSize;
 1866     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1867 #ifdef ASSERT
 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);
 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();
 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 
 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP
 1928   framesize -= 2*wordSize;
 1929 
 1930   if (framesize) {
 1931     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1932     st->print("\t");
 1933   }
 1934 
 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 
 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 
 1996 static enum RC rc_class(OptoReg::Name reg)
 1997 {
 1998   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1999 
 2000   if (OptoReg::is_stack(reg)) return rc_stack;
 2001 
 2002   VMReg r = OptoReg::as_VMReg(reg);
 2003 
 2004   if (r->is_Register()) return rc_int;
 2005 
 2006   if (r->is_KRegister()) return rc_kreg;
 2007 
 2008   assert(r->is_XMMRegister(), "must be");
 2009   return rc_float;
 2010 }
 2011 
 2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2014                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2015 
 2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2017                      int stack_offset, int reg, uint ireg, outputStream* st);
 2018 
 2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2020                                       int dst_offset, uint ireg, outputStream* st) {
 2021   if (masm) {
 2022     switch (ireg) {
 2023     case Op_VecS:
 2024       __ movq(Address(rsp, -8), rax);
 2025       __ movl(rax, Address(rsp, src_offset));
 2026       __ movl(Address(rsp, dst_offset), rax);
 2027       __ movq(rax, Address(rsp, -8));
 2028       break;
 2029     case Op_VecD:
 2030       __ pushq(Address(rsp, src_offset));
 2031       __ popq (Address(rsp, dst_offset));
 2032       break;
 2033     case Op_VecX:
 2034       __ pushq(Address(rsp, src_offset));
 2035       __ popq (Address(rsp, dst_offset));
 2036       __ pushq(Address(rsp, src_offset+8));
 2037       __ popq (Address(rsp, dst_offset+8));
 2038       break;
 2039     case Op_VecY:
 2040       __ vmovdqu(Address(rsp, -32), xmm0);
 2041       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2042       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2043       __ vmovdqu(xmm0, Address(rsp, -32));
 2044       break;
 2045     case Op_VecZ:
 2046       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2047       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2048       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2049       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2050       break;
 2051     default:
 2052       ShouldNotReachHere();
 2053     }
 2054 #ifndef PRODUCT
 2055   } else {
 2056     switch (ireg) {
 2057     case Op_VecS:
 2058       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2059                 "movl    rax, [rsp + #%d]\n\t"
 2060                 "movl    [rsp + #%d], rax\n\t"
 2061                 "movq    rax, [rsp - #8]",
 2062                 src_offset, dst_offset);
 2063       break;
 2064     case Op_VecD:
 2065       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2066                 "popq    [rsp + #%d]",
 2067                 src_offset, dst_offset);
 2068       break;
 2069      case Op_VecX:
 2070       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2071                 "popq    [rsp + #%d]\n\t"
 2072                 "pushq   [rsp + #%d]\n\t"
 2073                 "popq    [rsp + #%d]",
 2074                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2075       break;
 2076     case Op_VecY:
 2077       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2078                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2079                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2080                 "vmovdqu xmm0, [rsp - #32]",
 2081                 src_offset, dst_offset);
 2082       break;
 2083     case Op_VecZ:
 2084       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2085                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2086                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2087                 "vmovdqu xmm0, [rsp - #64]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     default:
 2091       ShouldNotReachHere();
 2092     }
 2093 #endif
 2094   }
 2095 }
 2096 
 2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2098                                        PhaseRegAlloc* ra_,
 2099                                        bool do_size,
 2100                                        outputStream* st) const {
 2101   assert(masm != nullptr || st  != nullptr, "sanity");
 2102   // Get registers to move
 2103   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2104   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2105   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2106   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2107 
 2108   enum RC src_second_rc = rc_class(src_second);
 2109   enum RC src_first_rc = rc_class(src_first);
 2110   enum RC dst_second_rc = rc_class(dst_second);
 2111   enum RC dst_first_rc = rc_class(dst_first);
 2112 
 2113   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2114          "must move at least 1 register" );
 2115 
 2116   if (src_first == dst_first && src_second == dst_second) {
 2117     // Self copy, no move
 2118     return 0;
 2119   }
 2120   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2121     uint ireg = ideal_reg();
 2122     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2123     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2124     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2125       // mem -> mem
 2126       int src_offset = ra_->reg2offset(src_first);
 2127       int dst_offset = ra_->reg2offset(dst_first);
 2128       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2129     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2130       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2131     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2132       int stack_offset = ra_->reg2offset(dst_first);
 2133       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2134     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2135       int stack_offset = ra_->reg2offset(src_first);
 2136       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2137     } else {
 2138       ShouldNotReachHere();
 2139     }
 2140     return 0;
 2141   }
 2142   if (src_first_rc == rc_stack) {
 2143     // mem ->
 2144     if (dst_first_rc == rc_stack) {
 2145       // mem -> mem
 2146       assert(src_second != dst_first, "overlap");
 2147       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2148           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2149         // 64-bit
 2150         int src_offset = ra_->reg2offset(src_first);
 2151         int dst_offset = ra_->reg2offset(dst_first);
 2152         if (masm) {
 2153           __ pushq(Address(rsp, src_offset));
 2154           __ popq (Address(rsp, dst_offset));
 2155 #ifndef PRODUCT
 2156         } else {
 2157           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2158                     "popq    [rsp + #%d]",
 2159                      src_offset, dst_offset);
 2160 #endif
 2161         }
 2162       } else {
 2163         // 32-bit
 2164         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2165         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2166         // No pushl/popl, so:
 2167         int src_offset = ra_->reg2offset(src_first);
 2168         int dst_offset = ra_->reg2offset(dst_first);
 2169         if (masm) {
 2170           __ movq(Address(rsp, -8), rax);
 2171           __ movl(rax, Address(rsp, src_offset));
 2172           __ movl(Address(rsp, dst_offset), rax);
 2173           __ movq(rax, Address(rsp, -8));
 2174 #ifndef PRODUCT
 2175         } else {
 2176           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2177                     "movl    rax, [rsp + #%d]\n\t"
 2178                     "movl    [rsp + #%d], rax\n\t"
 2179                     "movq    rax, [rsp - #8]",
 2180                      src_offset, dst_offset);
 2181 #endif
 2182         }
 2183       }
 2184       return 0;
 2185     } else if (dst_first_rc == rc_int) {
 2186       // mem -> gpr
 2187       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2188           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2189         // 64-bit
 2190         int offset = ra_->reg2offset(src_first);
 2191         if (masm) {
 2192           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2193 #ifndef PRODUCT
 2194         } else {
 2195           st->print("movq    %s, [rsp + #%d]\t# spill",
 2196                      Matcher::regName[dst_first],
 2197                      offset);
 2198 #endif
 2199         }
 2200       } else {
 2201         // 32-bit
 2202         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2203         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2204         int offset = ra_->reg2offset(src_first);
 2205         if (masm) {
 2206           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movl    %s, [rsp + #%d]\t# spill",
 2210                      Matcher::regName[dst_first],
 2211                      offset);
 2212 #endif
 2213         }
 2214       }
 2215       return 0;
 2216     } else if (dst_first_rc == rc_float) {
 2217       // mem-> xmm
 2218       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2219           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2220         // 64-bit
 2221         int offset = ra_->reg2offset(src_first);
 2222         if (masm) {
 2223           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2224 #ifndef PRODUCT
 2225         } else {
 2226           st->print("%s  %s, [rsp + #%d]\t# spill",
 2227                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2228                      Matcher::regName[dst_first],
 2229                      offset);
 2230 #endif
 2231         }
 2232       } else {
 2233         // 32-bit
 2234         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2235         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2236         int offset = ra_->reg2offset(src_first);
 2237         if (masm) {
 2238           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2239 #ifndef PRODUCT
 2240         } else {
 2241           st->print("movss   %s, [rsp + #%d]\t# spill",
 2242                      Matcher::regName[dst_first],
 2243                      offset);
 2244 #endif
 2245         }
 2246       }
 2247       return 0;
 2248     } else if (dst_first_rc == rc_kreg) {
 2249       // mem -> kreg
 2250       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2251           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2252         // 64-bit
 2253         int offset = ra_->reg2offset(src_first);
 2254         if (masm) {
 2255           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2256 #ifndef PRODUCT
 2257         } else {
 2258           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2259                      Matcher::regName[dst_first],
 2260                      offset);
 2261 #endif
 2262         }
 2263       }
 2264       return 0;
 2265     }
 2266   } else if (src_first_rc == rc_int) {
 2267     // gpr ->
 2268     if (dst_first_rc == rc_stack) {
 2269       // gpr -> mem
 2270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2272         // 64-bit
 2273         int offset = ra_->reg2offset(dst_first);
 2274         if (masm) {
 2275           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2276 #ifndef PRODUCT
 2277         } else {
 2278           st->print("movq    [rsp + #%d], %s\t# spill",
 2279                      offset,
 2280                      Matcher::regName[src_first]);
 2281 #endif
 2282         }
 2283       } else {
 2284         // 32-bit
 2285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2287         int offset = ra_->reg2offset(dst_first);
 2288         if (masm) {
 2289           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2290 #ifndef PRODUCT
 2291         } else {
 2292           st->print("movl    [rsp + #%d], %s\t# spill",
 2293                      offset,
 2294                      Matcher::regName[src_first]);
 2295 #endif
 2296         }
 2297       }
 2298       return 0;
 2299     } else if (dst_first_rc == rc_int) {
 2300       // gpr -> gpr
 2301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2303         // 64-bit
 2304         if (masm) {
 2305           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2306                   as_Register(Matcher::_regEncode[src_first]));
 2307 #ifndef PRODUCT
 2308         } else {
 2309           st->print("movq    %s, %s\t# spill",
 2310                      Matcher::regName[dst_first],
 2311                      Matcher::regName[src_first]);
 2312 #endif
 2313         }
 2314         return 0;
 2315       } else {
 2316         // 32-bit
 2317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2319         if (masm) {
 2320           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2321                   as_Register(Matcher::_regEncode[src_first]));
 2322 #ifndef PRODUCT
 2323         } else {
 2324           st->print("movl    %s, %s\t# spill",
 2325                      Matcher::regName[dst_first],
 2326                      Matcher::regName[src_first]);
 2327 #endif
 2328         }
 2329         return 0;
 2330       }
 2331     } else if (dst_first_rc == rc_float) {
 2332       // gpr -> xmm
 2333       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2334           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2335         // 64-bit
 2336         if (masm) {
 2337           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2338 #ifndef PRODUCT
 2339         } else {
 2340           st->print("movdq   %s, %s\t# spill",
 2341                      Matcher::regName[dst_first],
 2342                      Matcher::regName[src_first]);
 2343 #endif
 2344         }
 2345       } else {
 2346         // 32-bit
 2347         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2348         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2349         if (masm) {
 2350           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2351 #ifndef PRODUCT
 2352         } else {
 2353           st->print("movdl   %s, %s\t# spill",
 2354                      Matcher::regName[dst_first],
 2355                      Matcher::regName[src_first]);
 2356 #endif
 2357         }
 2358       }
 2359       return 0;
 2360     } else if (dst_first_rc == rc_kreg) {
 2361       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2362           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2363         // 64-bit
 2364         if (masm) {
 2365           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2366   #ifndef PRODUCT
 2367         } else {
 2368            st->print("kmovq   %s, %s\t# spill",
 2369                        Matcher::regName[dst_first],
 2370                        Matcher::regName[src_first]);
 2371   #endif
 2372         }
 2373       }
 2374       Unimplemented();
 2375       return 0;
 2376     }
 2377   } else if (src_first_rc == rc_float) {
 2378     // xmm ->
 2379     if (dst_first_rc == rc_stack) {
 2380       // xmm -> mem
 2381       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2382           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2383         // 64-bit
 2384         int offset = ra_->reg2offset(dst_first);
 2385         if (masm) {
 2386           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2387 #ifndef PRODUCT
 2388         } else {
 2389           st->print("movsd   [rsp + #%d], %s\t# spill",
 2390                      offset,
 2391                      Matcher::regName[src_first]);
 2392 #endif
 2393         }
 2394       } else {
 2395         // 32-bit
 2396         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2397         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2398         int offset = ra_->reg2offset(dst_first);
 2399         if (masm) {
 2400           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2401 #ifndef PRODUCT
 2402         } else {
 2403           st->print("movss   [rsp + #%d], %s\t# spill",
 2404                      offset,
 2405                      Matcher::regName[src_first]);
 2406 #endif
 2407         }
 2408       }
 2409       return 0;
 2410     } else if (dst_first_rc == rc_int) {
 2411       // xmm -> gpr
 2412       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2413           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2414         // 64-bit
 2415         if (masm) {
 2416           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2417 #ifndef PRODUCT
 2418         } else {
 2419           st->print("movdq   %s, %s\t# spill",
 2420                      Matcher::regName[dst_first],
 2421                      Matcher::regName[src_first]);
 2422 #endif
 2423         }
 2424       } else {
 2425         // 32-bit
 2426         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2427         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2428         if (masm) {
 2429           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2430 #ifndef PRODUCT
 2431         } else {
 2432           st->print("movdl   %s, %s\t# spill",
 2433                      Matcher::regName[dst_first],
 2434                      Matcher::regName[src_first]);
 2435 #endif
 2436         }
 2437       }
 2438       return 0;
 2439     } else if (dst_first_rc == rc_float) {
 2440       // xmm -> xmm
 2441       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2442           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2443         // 64-bit
 2444         if (masm) {
 2445           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2446 #ifndef PRODUCT
 2447         } else {
 2448           st->print("%s  %s, %s\t# spill",
 2449                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2450                      Matcher::regName[dst_first],
 2451                      Matcher::regName[src_first]);
 2452 #endif
 2453         }
 2454       } else {
 2455         // 32-bit
 2456         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2457         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2458         if (masm) {
 2459           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2460 #ifndef PRODUCT
 2461         } else {
 2462           st->print("%s  %s, %s\t# spill",
 2463                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2464                      Matcher::regName[dst_first],
 2465                      Matcher::regName[src_first]);
 2466 #endif
 2467         }
 2468       }
 2469       return 0;
 2470     } else if (dst_first_rc == rc_kreg) {
 2471       assert(false, "Illegal spilling");
 2472       return 0;
 2473     }
 2474   } else if (src_first_rc == rc_kreg) {
 2475     if (dst_first_rc == rc_stack) {
 2476       // mem -> kreg
 2477       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2478           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2479         // 64-bit
 2480         int offset = ra_->reg2offset(dst_first);
 2481         if (masm) {
 2482           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2483 #ifndef PRODUCT
 2484         } else {
 2485           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2486                      offset,
 2487                      Matcher::regName[src_first]);
 2488 #endif
 2489         }
 2490       }
 2491       return 0;
 2492     } else if (dst_first_rc == rc_int) {
 2493       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2494           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2495         // 64-bit
 2496         if (masm) {
 2497           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2498 #ifndef PRODUCT
 2499         } else {
 2500          st->print("kmovq   %s, %s\t# spill",
 2501                      Matcher::regName[dst_first],
 2502                      Matcher::regName[src_first]);
 2503 #endif
 2504         }
 2505       }
 2506       Unimplemented();
 2507       return 0;
 2508     } else if (dst_first_rc == rc_kreg) {
 2509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2511         // 64-bit
 2512         if (masm) {
 2513           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2514 #ifndef PRODUCT
 2515         } else {
 2516          st->print("kmovq   %s, %s\t# spill",
 2517                      Matcher::regName[dst_first],
 2518                      Matcher::regName[src_first]);
 2519 #endif
 2520         }
 2521       }
 2522       return 0;
 2523     } else if (dst_first_rc == rc_float) {
 2524       assert(false, "Illegal spill");
 2525       return 0;
 2526     }
 2527   }
 2528 
 2529   assert(0," foo ");
 2530   Unimplemented();
 2531   return 0;
 2532 }
 2533 
 2534 #ifndef PRODUCT
 2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2536   implementation(nullptr, ra_, false, st);
 2537 }
 2538 #endif
 2539 
 2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2541   implementation(masm, ra_, false, nullptr);
 2542 }
 2543 
 2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2545   return MachNode::size(ra_);
 2546 }
 2547 
 2548 //=============================================================================
 2549 #ifndef PRODUCT
 2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2551 {
 2552   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2553   int reg = ra_->get_reg_first(this);
 2554   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2555             Matcher::regName[reg], offset);
 2556 }
 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
 2653 }
 2654 
 2655 static bool is_demotion_candidate(const MachNode* mdef) {
 2656   return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
 2657 }
 2658 
 2659 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2660                                             int oper_index) {
 2661   if (mdef == nullptr) {
 2662     return false;
 2663   }
 2664 
 2665   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2666       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2667     assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
 2668     assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
 2669     return false;
 2670   }
 2671 
 2672   // Complex memory operand covers multiple incoming edges needed for
 2673   // address computation. Biasing def towards any address component will not
 2674   // result in NDD demotion by assembler.
 2675   if (mdef->operand_num_edges(oper_index) != 1) {
 2676     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2677     return false;
 2678   }
 2679 
 2680   // Demotion candidate must be register mask compatible with definition.
 2681   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2682   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2683     assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
 2684     return false;
 2685   }
 2686 
 2687   switch (oper_index) {
 2688   // First operand of MachNode corresponding to Intel APX NDD selection
 2689   // pattern can share its assigned register with definition operand if
 2690   // their live ranges do not overlap. In such a scenario we can demote
 2691   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2692   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2693   // are decorated with a special flag by instruction selector.
 2694   case 1:
 2695     return is_demotion_candidate(mdef);
 2696 
 2697   // Definition operand of commutative operation can be biased towards second
 2698   // operand.
 2699   case 2:
 2700     return is_ndd_demotable_commutative(mdef);
 2701 
 2702   // Current scheme only selects up to two biasing candidates
 2703   default:
 2704     assert(false, "unhandled operand index: %s", mdef->Name());
 2705     break;
 2706   }
 2707 
 2708   return false;
 2709 }
 2710 
 2711 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2712   assert(EnableVectorSupport, "sanity");
 2713   int lo = XMM0_num;
 2714   int hi = XMM0b_num;
 2715   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2716   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2717   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2718   return OptoRegPair(hi, lo);
 2719 }
 2720 
 2721 // Is this branch offset short enough that a short branch can be used?
 2722 //
 2723 // NOTE: If the platform does not provide any short branch variants, then
 2724 //       this method should return false for offset 0.
 2725 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2726   // The passed offset is relative to address of the branch.
 2727   // On 86 a branch displacement is calculated relative to address
 2728   // of a next instruction.
 2729   offset -= br_size;
 2730 
 2731   // the short version of jmpConUCF2 contains multiple branches,
 2732   // making the reach slightly less
 2733   if (rule == jmpConUCF2_rule)
 2734     return (-126 <= offset && offset <= 125);
 2735   return (-128 <= offset && offset <= 127);
 2736 }
 2737 
 2738 // Return whether or not this register is ever used as an argument.
 2739 // This function is used on startup to build the trampoline stubs in
 2740 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2741 // call in the trampoline, and arguments in those registers not be
 2742 // available to the callee.
 2743 bool Matcher::can_be_java_arg(int reg)
 2744 {
 2745   return
 2746     reg ==  RDI_num || reg == RDI_H_num ||
 2747     reg ==  RSI_num || reg == RSI_H_num ||
 2748     reg ==  RDX_num || reg == RDX_H_num ||
 2749     reg ==  RCX_num || reg == RCX_H_num ||
 2750     reg ==   R8_num || reg ==  R8_H_num ||
 2751     reg ==   R9_num || reg ==  R9_H_num ||
 2752     reg ==  R12_num || reg == R12_H_num ||
 2753     reg == XMM0_num || reg == XMM0b_num ||
 2754     reg == XMM1_num || reg == XMM1b_num ||
 2755     reg == XMM2_num || reg == XMM2b_num ||
 2756     reg == XMM3_num || reg == XMM3b_num ||
 2757     reg == XMM4_num || reg == XMM4b_num ||
 2758     reg == XMM5_num || reg == XMM5b_num ||
 2759     reg == XMM6_num || reg == XMM6b_num ||
 2760     reg == XMM7_num || reg == XMM7b_num;
 2761 }
 2762 
 2763 bool Matcher::is_spillable_arg(int reg)
 2764 {
 2765   return can_be_java_arg(reg);
 2766 }
 2767 
 2768 uint Matcher::int_pressure_limit()
 2769 {
 2770   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2771 }
 2772 
 2773 uint Matcher::float_pressure_limit()
 2774 {
 2775   // After experiment around with different values, the following default threshold
 2776   // works best for LCM's register pressure scheduling on x64.
 2777   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2778   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2779   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2780 }
 2781 
 2782 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2783   // In 64 bit mode a code which use multiply when
 2784   // devisor is constant is faster than hardware
 2785   // DIV instruction (it uses MulHiL).
 2786   return false;
 2787 }
 2788 
 2789 // Register for DIVI projection of divmodI
 2790 const RegMask& Matcher::divI_proj_mask() {
 2791   return INT_RAX_REG_mask();
 2792 }
 2793 
 2794 // Register for MODI projection of divmodI
 2795 const RegMask& Matcher::modI_proj_mask() {
 2796   return INT_RDX_REG_mask();
 2797 }
 2798 
 2799 // Register for DIVL projection of divmodL
 2800 const RegMask& Matcher::divL_proj_mask() {
 2801   return LONG_RAX_REG_mask();
 2802 }
 2803 
 2804 // Register for MODL projection of divmodL
 2805 const RegMask& Matcher::modL_proj_mask() {
 2806   return LONG_RDX_REG_mask();
 2807 }
 2808 
 2809 %}
 2810 
 2811 source_hpp %{
 2812 // Header information of the source block.
 2813 // Method declarations/definitions which are used outside
 2814 // the ad-scope can conveniently be defined here.
 2815 //
 2816 // To keep related declarations/definitions/uses close together,
 2817 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2818 
 2819 #include "runtime/vm_version.hpp"
 2820 
 2821 class NativeJump;
 2822 
 2823 class CallStubImpl {
 2824 
 2825   //--------------------------------------------------------------
 2826   //---<  Used for optimization in Compile::shorten_branches  >---
 2827   //--------------------------------------------------------------
 2828 
 2829  public:
 2830   // Size of call trampoline stub.
 2831   static uint size_call_trampoline() {
 2832     return 0; // no call trampolines on this platform
 2833   }
 2834 
 2835   // number of relocations needed by a call trampoline stub
 2836   static uint reloc_call_trampoline() {
 2837     return 0; // no call trampolines on this platform
 2838   }
 2839 };
 2840 
 2841 class HandlerImpl {
 2842 
 2843  public:
 2844 
 2845   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2846 
 2847   static uint size_deopt_handler() {
 2848     // one call and one jmp.
 2849     return 7;
 2850   }
 2851 };
 2852 
 2853 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2854   switch(bytes) {
 2855     case  4: // fall-through
 2856     case  8: // fall-through
 2857     case 16: return Assembler::AVX_128bit;
 2858     case 32: return Assembler::AVX_256bit;
 2859     case 64: return Assembler::AVX_512bit;
 2860 
 2861     default: {
 2862       ShouldNotReachHere();
 2863       return Assembler::AVX_NoVec;
 2864     }
 2865   }
 2866 }
 2867 
 2868 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2869   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2870 }
 2871 
 2872 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2873   uint def_idx = use->operand_index(opnd);
 2874   Node* def = use->in(def_idx);
 2875   return vector_length_encoding(def);
 2876 }
 2877 
 2878 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2879   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2880          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2881 }
 2882 
 2883 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2884   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2885            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2886 }
 2887 
 2888 class Node::PD {
 2889 public:
 2890   enum NodeFlags : uint64_t {
 2891     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2892     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2893     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2894     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2895     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2896     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2897     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2898     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2899     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2900     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2901     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2902     Flag_ndd_demotable        = Node::_last_flag << 12,
 2903     Flag_ndd_demotable_commutative = Node::_last_flag << 13,
 2904     _last_flag                = Flag_ndd_demotable_commutative
 2905   };
 2906 };
 2907 
 2908 %} // end source_hpp
 2909 
 2910 source %{
 2911 
 2912 #include "opto/addnode.hpp"
 2913 #include "c2_intelJccErratum_x86.hpp"
 2914 
 2915 void PhaseOutput::pd_perform_mach_node_analysis() {
 2916   if (VM_Version::has_intel_jcc_erratum()) {
 2917     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2918     _buf_sizes._code += extra_padding;
 2919   }
 2920 }
 2921 
 2922 int MachNode::pd_alignment_required() const {
 2923   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2924     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2925     return IntelJccErratum::largest_jcc_size() + 1;
 2926   } else {
 2927     return 1;
 2928   }
 2929 }
 2930 
 2931 int MachNode::compute_padding(int current_offset) const {
 2932   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2933     Compile* C = Compile::current();
 2934     PhaseOutput* output = C->output();
 2935     Block* block = output->block();
 2936     int index = output->index();
 2937     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2938   } else {
 2939     return 0;
 2940   }
 2941 }
 2942 
 2943 // Emit deopt handler code.
 2944 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2945 
 2946   // Note that the code buffer's insts_mark is always relative to insts.
 2947   // That's why we must use the macroassembler to generate a handler.
 2948   address base = __ start_a_stub(size_deopt_handler());
 2949   if (base == nullptr) {
 2950     ciEnv::current()->record_failure("CodeCache is full");
 2951     return 0;  // CodeBuffer::expand failed
 2952   }
 2953   int offset = __ offset();
 2954 
 2955   Label start;
 2956   __ bind(start);
 2957 
 2958   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2959 
 2960   int entry_offset = __ offset();
 2961 
 2962   __ jmp(start);
 2963 
 2964   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2965   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2966          "out of bounds read in post-call NOP check");
 2967   __ end_a_stub();
 2968   return entry_offset;
 2969 }
 2970 
 2971 static Assembler::Width widthForType(BasicType bt) {
 2972   if (bt == T_BYTE) {
 2973     return Assembler::B;
 2974   } else if (bt == T_SHORT) {
 2975     return Assembler::W;
 2976   } else if (bt == T_INT) {
 2977     return Assembler::D;
 2978   } else {
 2979     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2980     return Assembler::Q;
 2981   }
 2982 }
 2983 
 2984 //=============================================================================
 2985 
 2986   // Float masks come from different places depending on platform.
 2987   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2988   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2989   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2990   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2991   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2992   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2993   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2994   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2995   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2996   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2997   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2998   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2999   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 3000   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 3001   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 3002   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 3003   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 3004   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3005   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3006 
 3007 //=============================================================================
 3008 bool Matcher::match_rule_supported(int opcode) {
 3009   if (!has_match_rule(opcode)) {
 3010     return false; // no match rule present
 3011   }
 3012   switch (opcode) {
 3013     case Op_AbsVL:
 3014     case Op_StoreVectorScatter:
 3015       if (UseAVX < 3) {
 3016         return false;
 3017       }
 3018       break;
 3019     case Op_PopCountI:
 3020     case Op_PopCountL:
 3021       if (!UsePopCountInstruction) {
 3022         return false;
 3023       }
 3024       break;
 3025     case Op_PopCountVI:
 3026       if (UseAVX < 2) {
 3027         return false;
 3028       }
 3029       break;
 3030     case Op_CompressV:
 3031     case Op_ExpandV:
 3032     case Op_PopCountVL:
 3033       if (UseAVX < 2) {
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulVI:
 3038       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_MulVL:
 3043       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3044         return false;
 3045       }
 3046       break;
 3047     case Op_MulReductionVL:
 3048       if (VM_Version::supports_avx512dq() == false) {
 3049         return false;
 3050       }
 3051       break;
 3052     case Op_AbsVB:
 3053     case Op_AbsVS:
 3054     case Op_AbsVI:
 3055     case Op_AddReductionVI:
 3056     case Op_AndReductionV:
 3057     case Op_OrReductionV:
 3058     case Op_XorReductionV:
 3059       if (UseSSE < 3) { // requires at least SSSE3
 3060         return false;
 3061       }
 3062       break;
 3063     case Op_MaxHF:
 3064     case Op_MinHF:
 3065       if (!VM_Version::supports_avx512vlbw()) {
 3066         return false;
 3067       }  // fallthrough
 3068     case Op_AddHF:
 3069     case Op_DivHF:
 3070     case Op_FmaHF:
 3071     case Op_MulHF:
 3072     case Op_ReinterpretS2HF:
 3073     case Op_ReinterpretHF2S:
 3074     case Op_SubHF:
 3075     case Op_SqrtHF:
 3076       if (!VM_Version::supports_avx512_fp16()) {
 3077         return false;
 3078       }
 3079       break;
 3080     case Op_VectorLoadShuffle:
 3081     case Op_VectorRearrange:
 3082     case Op_MulReductionVI:
 3083       if (UseSSE < 4) { // requires at least SSE4
 3084         return false;
 3085       }
 3086       break;
 3087     case Op_IsInfiniteF:
 3088     case Op_IsInfiniteD:
 3089       if (!VM_Version::supports_avx512dq()) {
 3090         return false;
 3091       }
 3092       break;
 3093     case Op_SqrtVD:
 3094     case Op_SqrtVF:
 3095     case Op_VectorMaskCmp:
 3096     case Op_VectorCastB2X:
 3097     case Op_VectorCastS2X:
 3098     case Op_VectorCastI2X:
 3099     case Op_VectorCastL2X:
 3100     case Op_VectorCastF2X:
 3101     case Op_VectorCastD2X:
 3102     case Op_VectorUCastB2X:
 3103     case Op_VectorUCastS2X:
 3104     case Op_VectorUCastI2X:
 3105     case Op_VectorMaskCast:
 3106       if (UseAVX < 1) { // enabled for AVX only
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_PopulateIndex:
 3111       if (UseAVX < 2) {
 3112         return false;
 3113       }
 3114       break;
 3115     case Op_RoundVF:
 3116       if (UseAVX < 2) { // enabled for AVX2 only
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_RoundVD:
 3121       if (UseAVX < 3) {
 3122         return false;  // enabled for AVX3 only
 3123       }
 3124       break;
 3125     case Op_CompareAndSwapL:
 3126     case Op_CompareAndSwapP:
 3127       break;
 3128     case Op_StrIndexOf:
 3129       if (!UseSSE42Intrinsics) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_StrIndexOfChar:
 3134       if (!UseSSE42Intrinsics) {
 3135         return false;
 3136       }
 3137       break;
 3138     case Op_OnSpinWait:
 3139       if (VM_Version::supports_on_spin_wait() == false) {
 3140         return false;
 3141       }
 3142       break;
 3143     case Op_MulVB:
 3144     case Op_LShiftVB:
 3145     case Op_RShiftVB:
 3146     case Op_URShiftVB:
 3147     case Op_VectorInsert:
 3148     case Op_VectorLoadMask:
 3149     case Op_VectorStoreMask:
 3150     case Op_VectorBlend:
 3151       if (UseSSE < 4) {
 3152         return false;
 3153       }
 3154       break;
 3155     case Op_MaxD:
 3156     case Op_MaxF:
 3157     case Op_MinD:
 3158     case Op_MinF:
 3159       if (UseAVX < 1) { // enabled for AVX only
 3160         return false;
 3161       }
 3162       break;
 3163     case Op_CacheWB:
 3164     case Op_CacheWBPreSync:
 3165     case Op_CacheWBPostSync:
 3166       if (!VM_Version::supports_data_cache_line_flush()) {
 3167         return false;
 3168       }
 3169       break;
 3170     case Op_ExtractB:
 3171     case Op_ExtractL:
 3172     case Op_ExtractI:
 3173     case Op_RoundDoubleMode:
 3174       if (UseSSE < 4) {
 3175         return false;
 3176       }
 3177       break;
 3178     case Op_RoundDoubleModeV:
 3179       if (VM_Version::supports_avx() == false) {
 3180         return false; // 128bit vroundpd is not available
 3181       }
 3182       break;
 3183     case Op_LoadVectorGather:
 3184     case Op_LoadVectorGatherMasked:
 3185       if (UseAVX < 2) {
 3186         return false;
 3187       }
 3188       break;
 3189     case Op_FmaF:
 3190     case Op_FmaD:
 3191     case Op_FmaVD:
 3192     case Op_FmaVF:
 3193       if (!UseFMA) {
 3194         return false;
 3195       }
 3196       break;
 3197     case Op_MacroLogicV:
 3198       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3199         return false;
 3200       }
 3201       break;
 3202 
 3203     case Op_VectorCmpMasked:
 3204     case Op_VectorMaskGen:
 3205       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3206         return false;
 3207       }
 3208       break;
 3209     case Op_VectorMaskFirstTrue:
 3210     case Op_VectorMaskLastTrue:
 3211     case Op_VectorMaskTrueCount:
 3212     case Op_VectorMaskToLong:
 3213       if (UseAVX < 1) {
 3214          return false;
 3215       }
 3216       break;
 3217     case Op_RoundF:
 3218     case Op_RoundD:
 3219       break;
 3220     case Op_CopySignD:
 3221     case Op_CopySignF:
 3222       if (UseAVX < 3)  {
 3223         return false;
 3224       }
 3225       if (!VM_Version::supports_avx512vl()) {
 3226         return false;
 3227       }
 3228       break;
 3229     case Op_CompressBits:
 3230     case Op_ExpandBits:
 3231       if (!VM_Version::supports_bmi2()) {
 3232         return false;
 3233       }
 3234       break;
 3235     case Op_CompressM:
 3236       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3237         return false;
 3238       }
 3239       break;
 3240     case Op_ConvF2HF:
 3241     case Op_ConvHF2F:
 3242       if (!VM_Version::supports_float16()) {
 3243         return false;
 3244       }
 3245       break;
 3246     case Op_VectorCastF2HF:
 3247     case Op_VectorCastHF2F:
 3248       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3249         return false;
 3250       }
 3251       break;
 3252   }
 3253   return true;  // Match rules are supported by default.
 3254 }
 3255 
 3256 //------------------------------------------------------------------------
 3257 
 3258 static inline bool is_pop_count_instr_target(BasicType bt) {
 3259   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3260          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3261 }
 3262 
 3263 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3264   return match_rule_supported_vector(opcode, vlen, bt);
 3265 }
 3266 
 3267 // Identify extra cases that we might want to provide match rules for vector nodes and
 3268 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3269 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3270   if (!match_rule_supported(opcode)) {
 3271     return false;
 3272   }
 3273   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3274   //   * SSE2 supports 128bit vectors for all types;
 3275   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3276   //   * AVX2 supports 256bit vectors for all types;
 3277   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3278   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3279   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3280   // And MaxVectorSize is taken into account as well.
 3281   if (!vector_size_supported(bt, vlen)) {
 3282     return false;
 3283   }
 3284   // Special cases which require vector length follow:
 3285   //   * implementation limitations
 3286   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3287   //   * 128bit vroundpd instruction is present only in AVX1
 3288   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3289   switch (opcode) {
 3290     case Op_MaxVHF:
 3291     case Op_MinVHF:
 3292       if (!VM_Version::supports_avx512bw()) {
 3293         return false;
 3294       }
 3295     case Op_AddVHF:
 3296     case Op_DivVHF:
 3297     case Op_FmaVHF:
 3298     case Op_MulVHF:
 3299     case Op_SubVHF:
 3300     case Op_SqrtVHF:
 3301       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3302         return false;
 3303       }
 3304       if (!VM_Version::supports_avx512_fp16()) {
 3305         return false;
 3306       }
 3307       break;
 3308     case Op_AbsVF:
 3309     case Op_NegVF:
 3310       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3311         return false; // 512bit vandps and vxorps are not available
 3312       }
 3313       break;
 3314     case Op_AbsVD:
 3315     case Op_NegVD:
 3316       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3317         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3318       }
 3319       break;
 3320     case Op_RotateRightV:
 3321     case Op_RotateLeftV:
 3322       if (bt != T_INT && bt != T_LONG) {
 3323         return false;
 3324       } // fallthrough
 3325     case Op_MacroLogicV:
 3326       if (!VM_Version::supports_evex() ||
 3327           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_ClearArray:
 3332     case Op_VectorMaskGen:
 3333     case Op_VectorCmpMasked:
 3334       if (!VM_Version::supports_avx512bw()) {
 3335         return false;
 3336       }
 3337       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3338         return false;
 3339       }
 3340       break;
 3341     case Op_LoadVectorMasked:
 3342     case Op_StoreVectorMasked:
 3343       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3344         return false;
 3345       }
 3346       break;
 3347     case Op_UMinV:
 3348     case Op_UMaxV:
 3349       if (UseAVX == 0) {
 3350         return false;
 3351       }
 3352       break;
 3353     case Op_MaxV:
 3354     case Op_MinV:
 3355       if (UseSSE < 4 && is_integral_type(bt)) {
 3356         return false;
 3357       }
 3358       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3359           // Float/Double intrinsics are enabled for AVX family currently.
 3360           if (UseAVX == 0) {
 3361             return false;
 3362           }
 3363           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3364             return false;
 3365           }
 3366       }
 3367       break;
 3368     case Op_CallLeafVector:
 3369       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3370         return false;
 3371       }
 3372       break;
 3373     case Op_AddReductionVI:
 3374       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3375         return false;
 3376       }
 3377       // fallthrough
 3378     case Op_AndReductionV:
 3379     case Op_OrReductionV:
 3380     case Op_XorReductionV:
 3381       if (is_subword_type(bt) && (UseSSE < 4)) {
 3382         return false;
 3383       }
 3384       break;
 3385     case Op_MinReductionV:
 3386     case Op_MaxReductionV:
 3387       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3388         return false;
 3389       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3390         return false;
 3391       }
 3392       // Float/Double intrinsics enabled for AVX family.
 3393       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3394         return false;
 3395       }
 3396       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3397         return false;
 3398       }
 3399       break;
 3400     case Op_VectorBlend:
 3401       if (UseAVX == 0 && size_in_bits < 128) {
 3402         return false;
 3403       }
 3404       break;
 3405     case Op_VectorTest:
 3406       if (UseSSE < 4) {
 3407         return false; // Implementation limitation
 3408       } else if (size_in_bits < 32) {
 3409         return false; // Implementation limitation
 3410       }
 3411       break;
 3412     case Op_VectorLoadShuffle:
 3413     case Op_VectorRearrange:
 3414       if(vlen == 2) {
 3415         return false; // Implementation limitation due to how shuffle is loaded
 3416       } else if (size_in_bits == 256 && UseAVX < 2) {
 3417         return false; // Implementation limitation
 3418       }
 3419       break;
 3420     case Op_VectorLoadMask:
 3421     case Op_VectorMaskCast:
 3422       if (size_in_bits == 256 && UseAVX < 2) {
 3423         return false; // Implementation limitation
 3424       }
 3425       // fallthrough
 3426     case Op_VectorStoreMask:
 3427       if (vlen == 2) {
 3428         return false; // Implementation limitation
 3429       }
 3430       break;
 3431     case Op_PopulateIndex:
 3432       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3433         return false;
 3434       }
 3435       break;
 3436     case Op_VectorCastB2X:
 3437     case Op_VectorCastS2X:
 3438     case Op_VectorCastI2X:
 3439       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3440         return false;
 3441       }
 3442       break;
 3443     case Op_VectorCastL2X:
 3444       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3445         return false;
 3446       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3447         return false;
 3448       }
 3449       break;
 3450     case Op_VectorCastF2X: {
 3451         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3452         // happen after intermediate conversion to integer and special handling
 3453         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3454         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3455         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3456           return false;
 3457         }
 3458       }
 3459       // fallthrough
 3460     case Op_VectorCastD2X:
 3461       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3462         return false;
 3463       }
 3464       break;
 3465     case Op_VectorCastF2HF:
 3466     case Op_VectorCastHF2F:
 3467       if (!VM_Version::supports_f16c() &&
 3468          ((!VM_Version::supports_evex() ||
 3469          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_RoundVD:
 3474       if (!VM_Version::supports_avx512dq()) {
 3475         return false;
 3476       }
 3477       break;
 3478     case Op_MulReductionVI:
 3479       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3480         return false;
 3481       }
 3482       break;
 3483     case Op_LoadVectorGatherMasked:
 3484       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3485         return false;
 3486       }
 3487       if (is_subword_type(bt) &&
 3488          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3489           (size_in_bits < 64)                                      ||
 3490           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3491         return false;
 3492       }
 3493       break;
 3494     case Op_StoreVectorScatterMasked:
 3495     case Op_StoreVectorScatter:
 3496       if (is_subword_type(bt)) {
 3497         return false;
 3498       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3499         return false;
 3500       }
 3501       // fallthrough
 3502     case Op_LoadVectorGather:
 3503       if (!is_subword_type(bt) && size_in_bits == 64) {
 3504         return false;
 3505       }
 3506       if (is_subword_type(bt) && size_in_bits < 64) {
 3507         return false;
 3508       }
 3509       break;
 3510     case Op_SaturatingAddV:
 3511     case Op_SaturatingSubV:
 3512       if (UseAVX < 1) {
 3513         return false; // Implementation limitation
 3514       }
 3515       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3516         return false;
 3517       }
 3518       break;
 3519     case Op_SelectFromTwoVector:
 3520        if (size_in_bits < 128) {
 3521          return false;
 3522        }
 3523        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3524          return false;
 3525        }
 3526        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3527          return false;
 3528        }
 3529        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3530          return false;
 3531        }
 3532        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3533          return false;
 3534        }
 3535        break;
 3536     case Op_MaskAll:
 3537       if (!VM_Version::supports_evex()) {
 3538         return false;
 3539       }
 3540       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3541         return false;
 3542       }
 3543       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_VectorMaskCmp:
 3548       if (vlen < 2 || size_in_bits < 32) {
 3549         return false;
 3550       }
 3551       break;
 3552     case Op_CompressM:
 3553       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3554         return false;
 3555       }
 3556       break;
 3557     case Op_CompressV:
 3558     case Op_ExpandV:
 3559       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3560         return false;
 3561       }
 3562       if (size_in_bits < 128 ) {
 3563         return false;
 3564       }
 3565     case Op_VectorLongToMask:
 3566       if (UseAVX < 1) {
 3567         return false;
 3568       }
 3569       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3570         return false;
 3571       }
 3572       break;
 3573     case Op_SignumVD:
 3574     case Op_SignumVF:
 3575       if (UseAVX < 1) {
 3576         return false;
 3577       }
 3578       break;
 3579     case Op_PopCountVI:
 3580     case Op_PopCountVL: {
 3581         if (!is_pop_count_instr_target(bt) &&
 3582             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3583           return false;
 3584         }
 3585       }
 3586       break;
 3587     case Op_ReverseV:
 3588     case Op_ReverseBytesV:
 3589       if (UseAVX < 2) {
 3590         return false;
 3591       }
 3592       break;
 3593     case Op_CountTrailingZerosV:
 3594     case Op_CountLeadingZerosV:
 3595       if (UseAVX < 2) {
 3596         return false;
 3597       }
 3598       break;
 3599   }
 3600   return true;  // Per default match rules are supported.
 3601 }
 3602 
 3603 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3604   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3605   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3606   // of their non-masked counterpart with mask edge being the differentiator.
 3607   // This routine does a strict check on the existence of masked operation patterns
 3608   // by returning a default false value for all the other opcodes apart from the
 3609   // ones whose masked instruction patterns are defined in this file.
 3610   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3611     return false;
 3612   }
 3613 
 3614   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3615   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3616     return false;
 3617   }
 3618   switch(opcode) {
 3619     // Unary masked operations
 3620     case Op_AbsVB:
 3621     case Op_AbsVS:
 3622       if(!VM_Version::supports_avx512bw()) {
 3623         return false;  // Implementation limitation
 3624       }
 3625     case Op_AbsVI:
 3626     case Op_AbsVL:
 3627       return true;
 3628 
 3629     // Ternary masked operations
 3630     case Op_FmaVF:
 3631     case Op_FmaVD:
 3632       return true;
 3633 
 3634     case Op_MacroLogicV:
 3635       if(bt != T_INT && bt != T_LONG) {
 3636         return false;
 3637       }
 3638       return true;
 3639 
 3640     // Binary masked operations
 3641     case Op_AddVB:
 3642     case Op_AddVS:
 3643     case Op_SubVB:
 3644     case Op_SubVS:
 3645     case Op_MulVS:
 3646     case Op_LShiftVS:
 3647     case Op_RShiftVS:
 3648     case Op_URShiftVS:
 3649       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3650       if (!VM_Version::supports_avx512bw()) {
 3651         return false;  // Implementation limitation
 3652       }
 3653       return true;
 3654 
 3655     case Op_MulVL:
 3656       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3657       if (!VM_Version::supports_avx512dq()) {
 3658         return false;  // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_AndV:
 3663     case Op_OrV:
 3664     case Op_XorV:
 3665     case Op_RotateRightV:
 3666     case Op_RotateLeftV:
 3667       if (bt != T_INT && bt != T_LONG) {
 3668         return false; // Implementation limitation
 3669       }
 3670       return true;
 3671 
 3672     case Op_VectorLoadMask:
 3673       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3674       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3675         return false;
 3676       }
 3677       return true;
 3678 
 3679     case Op_AddVI:
 3680     case Op_AddVL:
 3681     case Op_AddVF:
 3682     case Op_AddVD:
 3683     case Op_SubVI:
 3684     case Op_SubVL:
 3685     case Op_SubVF:
 3686     case Op_SubVD:
 3687     case Op_MulVI:
 3688     case Op_MulVF:
 3689     case Op_MulVD:
 3690     case Op_DivVF:
 3691     case Op_DivVD:
 3692     case Op_SqrtVF:
 3693     case Op_SqrtVD:
 3694     case Op_LShiftVI:
 3695     case Op_LShiftVL:
 3696     case Op_RShiftVI:
 3697     case Op_RShiftVL:
 3698     case Op_URShiftVI:
 3699     case Op_URShiftVL:
 3700     case Op_LoadVectorMasked:
 3701     case Op_StoreVectorMasked:
 3702     case Op_LoadVectorGatherMasked:
 3703     case Op_StoreVectorScatterMasked:
 3704       return true;
 3705 
 3706     case Op_UMinV:
 3707     case Op_UMaxV:
 3708       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3709         return false;
 3710       } // fallthrough
 3711     case Op_MaxV:
 3712     case Op_MinV:
 3713       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3714         return false; // Implementation limitation
 3715       }
 3716       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3717         return false; // Implementation limitation
 3718       }
 3719       return true;
 3720     case Op_SaturatingAddV:
 3721     case Op_SaturatingSubV:
 3722       if (!is_subword_type(bt)) {
 3723         return false;
 3724       }
 3725       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3726         return false; // Implementation limitation
 3727       }
 3728       return true;
 3729 
 3730     case Op_VectorMaskCmp:
 3731       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3732         return false; // Implementation limitation
 3733       }
 3734       return true;
 3735 
 3736     case Op_VectorRearrange:
 3737       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3738         return false; // Implementation limitation
 3739       }
 3740       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3741         return false; // Implementation limitation
 3742       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3743         return false; // Implementation limitation
 3744       }
 3745       return true;
 3746 
 3747     // Binary Logical operations
 3748     case Op_AndVMask:
 3749     case Op_OrVMask:
 3750     case Op_XorVMask:
 3751       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3752         return false; // Implementation limitation
 3753       }
 3754       return true;
 3755 
 3756     case Op_PopCountVI:
 3757     case Op_PopCountVL:
 3758       if (!is_pop_count_instr_target(bt)) {
 3759         return false;
 3760       }
 3761       return true;
 3762 
 3763     case Op_MaskAll:
 3764       return true;
 3765 
 3766     case Op_CountLeadingZerosV:
 3767       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3768         return true;
 3769       }
 3770     default:
 3771       return false;
 3772   }
 3773 }
 3774 
 3775 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3776   return false;
 3777 }
 3778 
 3779 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3780 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3781   switch (elem_bt) {
 3782     case T_BYTE:  return false;
 3783     case T_SHORT: return !VM_Version::supports_avx512bw();
 3784     case T_INT:   return !VM_Version::supports_avx();
 3785     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3786     default:
 3787       ShouldNotReachHere();
 3788       return false;
 3789   }
 3790 }
 3791 
 3792 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3793   // Prefer predicate if the mask type is "TypeVectMask".
 3794   return vt->isa_vectmask() != nullptr;
 3795 }
 3796 
 3797 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3798   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3799   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3800   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3801       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3802     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3803     return new legVecZOper();
 3804   }
 3805   if (legacy) {
 3806     switch (ideal_reg) {
 3807       case Op_VecS: return new legVecSOper();
 3808       case Op_VecD: return new legVecDOper();
 3809       case Op_VecX: return new legVecXOper();
 3810       case Op_VecY: return new legVecYOper();
 3811       case Op_VecZ: return new legVecZOper();
 3812     }
 3813   } else {
 3814     switch (ideal_reg) {
 3815       case Op_VecS: return new vecSOper();
 3816       case Op_VecD: return new vecDOper();
 3817       case Op_VecX: return new vecXOper();
 3818       case Op_VecY: return new vecYOper();
 3819       case Op_VecZ: return new vecZOper();
 3820     }
 3821   }
 3822   ShouldNotReachHere();
 3823   return nullptr;
 3824 }
 3825 
 3826 bool Matcher::is_reg2reg_move(MachNode* m) {
 3827   switch (m->rule()) {
 3828     case MoveVec2Leg_rule:
 3829     case MoveLeg2Vec_rule:
 3830     case MoveF2VL_rule:
 3831     case MoveF2LEG_rule:
 3832     case MoveVL2F_rule:
 3833     case MoveLEG2F_rule:
 3834     case MoveD2VL_rule:
 3835     case MoveD2LEG_rule:
 3836     case MoveVL2D_rule:
 3837     case MoveLEG2D_rule:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 bool Matcher::is_generic_vector(MachOper* opnd) {
 3845   switch (opnd->opcode()) {
 3846     case VEC:
 3847     case LEGVEC:
 3848       return true;
 3849     default:
 3850       return false;
 3851   }
 3852 }
 3853 
 3854 //------------------------------------------------------------------------
 3855 
 3856 const RegMask* Matcher::predicate_reg_mask(void) {
 3857   return &_VECTMASK_REG_mask;
 3858 }
 3859 
 3860 // Max vector size in bytes. 0 if not supported.
 3861 int Matcher::vector_width_in_bytes(BasicType bt) {
 3862   assert(is_java_primitive(bt), "only primitive type vectors");
 3863   // SSE2 supports 128bit vectors for all types.
 3864   // AVX2 supports 256bit vectors for all types.
 3865   // AVX2/EVEX supports 512bit vectors for all types.
 3866   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3867   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3868   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3869     size = (UseAVX > 2) ? 64 : 32;
 3870   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3871     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3872   // Use flag to limit vector size.
 3873   size = MIN2(size,(int)MaxVectorSize);
 3874   // Minimum 2 values in vector (or 4 for bytes).
 3875   switch (bt) {
 3876   case T_DOUBLE:
 3877   case T_LONG:
 3878     if (size < 16) return 0;
 3879     break;
 3880   case T_FLOAT:
 3881   case T_INT:
 3882     if (size < 8) return 0;
 3883     break;
 3884   case T_BOOLEAN:
 3885     if (size < 4) return 0;
 3886     break;
 3887   case T_CHAR:
 3888     if (size < 4) return 0;
 3889     break;
 3890   case T_BYTE:
 3891     if (size < 4) return 0;
 3892     break;
 3893   case T_SHORT:
 3894     if (size < 4) return 0;
 3895     break;
 3896   default:
 3897     ShouldNotReachHere();
 3898   }
 3899   return size;
 3900 }
 3901 
 3902 // Limits on vector size (number of elements) loaded into vector.
 3903 int Matcher::max_vector_size(const BasicType bt) {
 3904   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3905 }
 3906 int Matcher::min_vector_size(const BasicType bt) {
 3907   int max_size = max_vector_size(bt);
 3908   // Min size which can be loaded into vector is 4 bytes.
 3909   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3910   // Support for calling svml double64 vectors
 3911   if (bt == T_DOUBLE) {
 3912     size = 1;
 3913   }
 3914   return MIN2(size,max_size);
 3915 }
 3916 
 3917 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3918   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3919   // by default on Cascade Lake
 3920   if (VM_Version::is_default_intel_cascade_lake()) {
 3921     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3922   }
 3923   return Matcher::max_vector_size(bt);
 3924 }
 3925 
 3926 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3927   return -1;
 3928 }
 3929 
 3930 // Vector ideal reg corresponding to specified size in bytes
 3931 uint Matcher::vector_ideal_reg(int size) {
 3932   assert(MaxVectorSize >= size, "");
 3933   switch(size) {
 3934     case  4: return Op_VecS;
 3935     case  8: return Op_VecD;
 3936     case 16: return Op_VecX;
 3937     case 32: return Op_VecY;
 3938     case 64: return Op_VecZ;
 3939   }
 3940   ShouldNotReachHere();
 3941   return 0;
 3942 }
 3943 
 3944 // Check for shift by small constant as well
 3945 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3946   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3947       shift->in(2)->get_int() <= 3 &&
 3948       // Are there other uses besides address expressions?
 3949       !matcher->is_visited(shift)) {
 3950     address_visited.set(shift->_idx); // Flag as address_visited
 3951     mstack.push(shift->in(2), Matcher::Visit);
 3952     Node *conv = shift->in(1);
 3953     // Allow Matcher to match the rule which bypass
 3954     // ConvI2L operation for an array index on LP64
 3955     // if the index value is positive.
 3956     if (conv->Opcode() == Op_ConvI2L &&
 3957         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3958         // Are there other uses besides address expressions?
 3959         !matcher->is_visited(conv)) {
 3960       address_visited.set(conv->_idx); // Flag as address_visited
 3961       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3962     } else {
 3963       mstack.push(conv, Matcher::Pre_Visit);
 3964     }
 3965     return true;
 3966   }
 3967   return false;
 3968 }
 3969 
 3970 // This function identifies sub-graphs in which a 'load' node is
 3971 // input to two different nodes, and such that it can be matched
 3972 // with BMI instructions like blsi, blsr, etc.
 3973 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3974 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3975 // refers to the same node.
 3976 //
 3977 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3978 // This is a temporary solution until we make DAGs expressible in ADL.
 3979 template<typename ConType>
 3980 class FusedPatternMatcher {
 3981   Node* _op1_node;
 3982   Node* _mop_node;
 3983   int _con_op;
 3984 
 3985   static int match_next(Node* n, int next_op, int next_op_idx) {
 3986     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3987       return -1;
 3988     }
 3989 
 3990     if (next_op_idx == -1) { // n is commutative, try rotations
 3991       if (n->in(1)->Opcode() == next_op) {
 3992         return 1;
 3993       } else if (n->in(2)->Opcode() == next_op) {
 3994         return 2;
 3995       }
 3996     } else {
 3997       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3998       if (n->in(next_op_idx)->Opcode() == next_op) {
 3999         return next_op_idx;
 4000       }
 4001     }
 4002     return -1;
 4003   }
 4004 
 4005  public:
 4006   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4007     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4008 
 4009   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4010              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4011              typename ConType::NativeType con_value) {
 4012     if (_op1_node->Opcode() != op1) {
 4013       return false;
 4014     }
 4015     if (_mop_node->outcnt() > 2) {
 4016       return false;
 4017     }
 4018     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4019     if (op1_op2_idx == -1) {
 4020       return false;
 4021     }
 4022     // Memory operation must be the other edge
 4023     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4024 
 4025     // Check that the mop node is really what we want
 4026     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4027       Node* op2_node = _op1_node->in(op1_op2_idx);
 4028       if (op2_node->outcnt() > 1) {
 4029         return false;
 4030       }
 4031       assert(op2_node->Opcode() == op2, "Should be");
 4032       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4033       if (op2_con_idx == -1) {
 4034         return false;
 4035       }
 4036       // Memory operation must be the other edge
 4037       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4038       // Check that the memory operation is the same node
 4039       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4040         // Now check the constant
 4041         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4042         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4043           return true;
 4044         }
 4045       }
 4046     }
 4047     return false;
 4048   }
 4049 };
 4050 
 4051 static bool is_bmi_pattern(Node* n, Node* m) {
 4052   assert(UseBMI1Instructions, "sanity");
 4053   if (n != nullptr && m != nullptr) {
 4054     if (m->Opcode() == Op_LoadI) {
 4055       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4056       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4057              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4058              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4059     } else if (m->Opcode() == Op_LoadL) {
 4060       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4061       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4062              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4063              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4064     }
 4065   }
 4066   return false;
 4067 }
 4068 
 4069 // Should the matcher clone input 'm' of node 'n'?
 4070 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4071   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4072   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4073     mstack.push(m, Visit);
 4074     return true;
 4075   }
 4076   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4077     mstack.push(m, Visit);           // m = ShiftCntV
 4078     return true;
 4079   }
 4080   if (is_encode_and_store_pattern(n, m)) {
 4081     mstack.push(m, Visit);
 4082     return true;
 4083   }
 4084   return false;
 4085 }
 4086 
 4087 // Should the Matcher clone shifts on addressing modes, expecting them
 4088 // to be subsumed into complex addressing expressions or compute them
 4089 // into registers?
 4090 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4091   Node *off = m->in(AddPNode::Offset);
 4092   if (off->is_Con()) {
 4093     address_visited.test_set(m->_idx); // Flag as address_visited
 4094     Node *adr = m->in(AddPNode::Address);
 4095 
 4096     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4097     // AtomicAdd is not an addressing expression.
 4098     // Cheap to find it by looking for screwy base.
 4099     if (adr->is_AddP() &&
 4100         !adr->in(AddPNode::Base)->is_top() &&
 4101         !adr->in(AddPNode::Offset)->is_Con() &&
 4102         off->get_long() == (int) (off->get_long()) && // immL32
 4103         // Are there other uses besides address expressions?
 4104         !is_visited(adr)) {
 4105       address_visited.set(adr->_idx); // Flag as address_visited
 4106       Node *shift = adr->in(AddPNode::Offset);
 4107       if (!clone_shift(shift, this, mstack, address_visited)) {
 4108         mstack.push(shift, Pre_Visit);
 4109       }
 4110       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4111       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4112     } else {
 4113       mstack.push(adr, Pre_Visit);
 4114     }
 4115 
 4116     // Clone X+offset as it also folds into most addressing expressions
 4117     mstack.push(off, Visit);
 4118     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4119     return true;
 4120   } else if (clone_shift(off, this, mstack, address_visited)) {
 4121     address_visited.test_set(m->_idx); // Flag as address_visited
 4122     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4123     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4124     return true;
 4125   }
 4126   return false;
 4127 }
 4128 
 4129 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4130   switch (bt) {
 4131     case BoolTest::eq:
 4132       return Assembler::eq;
 4133     case BoolTest::ne:
 4134       return Assembler::neq;
 4135     case BoolTest::le:
 4136     case BoolTest::ule:
 4137       return Assembler::le;
 4138     case BoolTest::ge:
 4139     case BoolTest::uge:
 4140       return Assembler::nlt;
 4141     case BoolTest::lt:
 4142     case BoolTest::ult:
 4143       return Assembler::lt;
 4144     case BoolTest::gt:
 4145     case BoolTest::ugt:
 4146       return Assembler::nle;
 4147     default : ShouldNotReachHere(); return Assembler::_false;
 4148   }
 4149 }
 4150 
 4151 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4152   switch (bt) {
 4153   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4154   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4155   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4156   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4157   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4158   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4159   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4160   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4161   }
 4162 }
 4163 
 4164 // Helper methods for MachSpillCopyNode::implementation().
 4165 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4166                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4167   assert(ireg == Op_VecS || // 32bit vector
 4168          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4169           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4170          "no non-adjacent vector moves" );
 4171   if (masm) {
 4172     switch (ireg) {
 4173     case Op_VecS: // copy whole register
 4174     case Op_VecD:
 4175     case Op_VecX:
 4176       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4177         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4178       } else {
 4179         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4180      }
 4181       break;
 4182     case Op_VecY:
 4183       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4184         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4185       } else {
 4186         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4187      }
 4188       break;
 4189     case Op_VecZ:
 4190       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4191       break;
 4192     default:
 4193       ShouldNotReachHere();
 4194     }
 4195 #ifndef PRODUCT
 4196   } else {
 4197     switch (ireg) {
 4198     case Op_VecS:
 4199     case Op_VecD:
 4200     case Op_VecX:
 4201       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4202       break;
 4203     case Op_VecY:
 4204     case Op_VecZ:
 4205       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4206       break;
 4207     default:
 4208       ShouldNotReachHere();
 4209     }
 4210 #endif
 4211   }
 4212 }
 4213 
 4214 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4215                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4216   if (masm) {
 4217     if (is_load) {
 4218       switch (ireg) {
 4219       case Op_VecS:
 4220         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4221         break;
 4222       case Op_VecD:
 4223         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4224         break;
 4225       case Op_VecX:
 4226         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4227           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4228         } else {
 4229           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4230           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4231         }
 4232         break;
 4233       case Op_VecY:
 4234         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4235           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4236         } else {
 4237           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4238           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4239         }
 4240         break;
 4241       case Op_VecZ:
 4242         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4243         break;
 4244       default:
 4245         ShouldNotReachHere();
 4246       }
 4247     } else { // store
 4248       switch (ireg) {
 4249       case Op_VecS:
 4250         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4251         break;
 4252       case Op_VecD:
 4253         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4254         break;
 4255       case Op_VecX:
 4256         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4257           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4258         }
 4259         else {
 4260           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4261         }
 4262         break;
 4263       case Op_VecY:
 4264         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4265           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4266         }
 4267         else {
 4268           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4269         }
 4270         break;
 4271       case Op_VecZ:
 4272         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4273         break;
 4274       default:
 4275         ShouldNotReachHere();
 4276       }
 4277     }
 4278 #ifndef PRODUCT
 4279   } else {
 4280     if (is_load) {
 4281       switch (ireg) {
 4282       case Op_VecS:
 4283         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       case Op_VecD:
 4286         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4287         break;
 4288        case Op_VecX:
 4289         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4290         break;
 4291       case Op_VecY:
 4292       case Op_VecZ:
 4293         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4294         break;
 4295       default:
 4296         ShouldNotReachHere();
 4297       }
 4298     } else { // store
 4299       switch (ireg) {
 4300       case Op_VecS:
 4301         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       case Op_VecD:
 4304         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4305         break;
 4306        case Op_VecX:
 4307         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4308         break;
 4309       case Op_VecY:
 4310       case Op_VecZ:
 4311         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4312         break;
 4313       default:
 4314         ShouldNotReachHere();
 4315       }
 4316     }
 4317 #endif
 4318   }
 4319 }
 4320 
 4321 template <class T>
 4322 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4323   int size = type2aelembytes(bt) * len;
 4324   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4325   for (int i = 0; i < len; i++) {
 4326     int offset = i * type2aelembytes(bt);
 4327     switch (bt) {
 4328       case T_BYTE: val->at(i) = con; break;
 4329       case T_SHORT: {
 4330         jshort c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4332         break;
 4333       }
 4334       case T_INT: {
 4335         jint c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4337         break;
 4338       }
 4339       case T_LONG: {
 4340         jlong c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4342         break;
 4343       }
 4344       case T_FLOAT: {
 4345         jfloat c = con;
 4346         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4347         break;
 4348       }
 4349       case T_DOUBLE: {
 4350         jdouble c = con;
 4351         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4352         break;
 4353       }
 4354       default: assert(false, "%s", type2name(bt));
 4355     }
 4356   }
 4357   return val;
 4358 }
 4359 
 4360 static inline jlong high_bit_set(BasicType bt) {
 4361   switch (bt) {
 4362     case T_BYTE:  return 0x8080808080808080;
 4363     case T_SHORT: return 0x8000800080008000;
 4364     case T_INT:   return 0x8000000080000000;
 4365     case T_LONG:  return 0x8000000000000000;
 4366     default:
 4367       ShouldNotReachHere();
 4368       return 0;
 4369   }
 4370 }
 4371 
 4372 #ifndef PRODUCT
 4373   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4374     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4375   }
 4376 #endif
 4377 
 4378   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4379     __ nop(_count);
 4380   }
 4381 
 4382   uint MachNopNode::size(PhaseRegAlloc*) const {
 4383     return _count;
 4384   }
 4385 
 4386 #ifndef PRODUCT
 4387   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4388     st->print("# breakpoint");
 4389   }
 4390 #endif
 4391 
 4392   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4393     __ int3();
 4394   }
 4395 
 4396   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4397     return MachNode::size(ra_);
 4398   }
 4399 
 4400 %}
 4401 
 4402 //----------ENCODING BLOCK-----------------------------------------------------
 4403 // This block specifies the encoding classes used by the compiler to
 4404 // output byte streams.  Encoding classes are parameterized macros
 4405 // used by Machine Instruction Nodes in order to generate the bit
 4406 // encoding of the instruction.  Operands specify their base encoding
 4407 // interface with the interface keyword.  There are currently
 4408 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4409 // COND_INTER.  REG_INTER causes an operand to generate a function
 4410 // which returns its register number when queried.  CONST_INTER causes
 4411 // an operand to generate a function which returns the value of the
 4412 // constant when queried.  MEMORY_INTER causes an operand to generate
 4413 // four functions which return the Base Register, the Index Register,
 4414 // the Scale Value, and the Offset Value of the operand when queried.
 4415 // COND_INTER causes an operand to generate six functions which return
 4416 // the encoding code (ie - encoding bits for the instruction)
 4417 // associated with each basic boolean condition for a conditional
 4418 // instruction.
 4419 //
 4420 // Instructions specify two basic values for encoding.  Again, a
 4421 // function is available to check if the constant displacement is an
 4422 // oop. They use the ins_encode keyword to specify their encoding
 4423 // classes (which must be a sequence of enc_class names, and their
 4424 // parameters, specified in the encoding block), and they use the
 4425 // opcode keyword to specify, in order, their primary, secondary, and
 4426 // tertiary opcode.  Only the opcode sections which a particular
 4427 // instruction needs for encoding need to be specified.
 4428 encode %{
 4429   enc_class cdql_enc(no_rax_rdx_RegI div)
 4430   %{
 4431     // Full implementation of Java idiv and irem; checks for
 4432     // special case as described in JVM spec., p.243 & p.271.
 4433     //
 4434     //         normal case                           special case
 4435     //
 4436     // input : rax: dividend                         min_int
 4437     //         reg: divisor                          -1
 4438     //
 4439     // output: rax: quotient  (= rax idiv reg)       min_int
 4440     //         rdx: remainder (= rax irem reg)       0
 4441     //
 4442     //  Code sequnce:
 4443     //
 4444     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4445     //    5:   75 07/08                jne    e <normal>
 4446     //    7:   33 d2                   xor    %edx,%edx
 4447     //  [div >= 8 -> offset + 1]
 4448     //  [REX_B]
 4449     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4450     //    c:   74 03/04                je     11 <done>
 4451     // 000000000000000e <normal>:
 4452     //    e:   99                      cltd
 4453     //  [div >= 8 -> offset + 1]
 4454     //  [REX_B]
 4455     //    f:   f7 f9                   idiv   $div
 4456     // 0000000000000011 <done>:
 4457     Label normal;
 4458     Label done;
 4459 
 4460     // cmp    $0x80000000,%eax
 4461     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4462 
 4463     // jne    e <normal>
 4464     __ jccb(Assembler::notEqual, normal);
 4465 
 4466     // xor    %edx,%edx
 4467     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4468 
 4469     // cmp    $0xffffffffffffffff,%ecx
 4470     __ cmpl($div$$Register, -1);
 4471 
 4472     // je     11 <done>
 4473     __ jccb(Assembler::equal, done);
 4474 
 4475     // <normal>
 4476     // cltd
 4477     __ bind(normal);
 4478     __ cdql();
 4479 
 4480     // idivl
 4481     // <done>
 4482     __ idivl($div$$Register);
 4483     __ bind(done);
 4484   %}
 4485 
 4486   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4487   %{
 4488     // Full implementation of Java ldiv and lrem; checks for
 4489     // special case as described in JVM spec., p.243 & p.271.
 4490     //
 4491     //         normal case                           special case
 4492     //
 4493     // input : rax: dividend                         min_long
 4494     //         reg: divisor                          -1
 4495     //
 4496     // output: rax: quotient  (= rax idiv reg)       min_long
 4497     //         rdx: remainder (= rax irem reg)       0
 4498     //
 4499     //  Code sequnce:
 4500     //
 4501     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4502     //    7:   00 00 80
 4503     //    a:   48 39 d0                cmp    %rdx,%rax
 4504     //    d:   75 08                   jne    17 <normal>
 4505     //    f:   33 d2                   xor    %edx,%edx
 4506     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4507     //   15:   74 05                   je     1c <done>
 4508     // 0000000000000017 <normal>:
 4509     //   17:   48 99                   cqto
 4510     //   19:   48 f7 f9                idiv   $div
 4511     // 000000000000001c <done>:
 4512     Label normal;
 4513     Label done;
 4514 
 4515     // mov    $0x8000000000000000,%rdx
 4516     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4517 
 4518     // cmp    %rdx,%rax
 4519     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4520 
 4521     // jne    17 <normal>
 4522     __ jccb(Assembler::notEqual, normal);
 4523 
 4524     // xor    %edx,%edx
 4525     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4526 
 4527     // cmp    $0xffffffffffffffff,$div
 4528     __ cmpq($div$$Register, -1);
 4529 
 4530     // je     1e <done>
 4531     __ jccb(Assembler::equal, done);
 4532 
 4533     // <normal>
 4534     // cqto
 4535     __ bind(normal);
 4536     __ cdqq();
 4537 
 4538     // idivq (note: must be emitted by the user of this rule)
 4539     // <done>
 4540     __ idivq($div$$Register);
 4541     __ bind(done);
 4542   %}
 4543 
 4544   enc_class clear_avx %{
 4545     DEBUG_ONLY(int off0 = __ offset());
 4546     if (generate_vzeroupper(Compile::current())) {
 4547       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4548       // Clear upper bits of YMM registers when current compiled code uses
 4549       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4550       __ vzeroupper();
 4551     }
 4552     DEBUG_ONLY(int off1 = __ offset());
 4553     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4554   %}
 4555 
 4556   enc_class Java_To_Runtime(method meth) %{
 4557     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4558     __ call(r10);
 4559     __ post_call_nop();
 4560   %}
 4561 
 4562   enc_class Java_Static_Call(method meth)
 4563   %{
 4564     // JAVA STATIC CALL
 4565     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4566     // determine who we intended to call.
 4567     if (!_method) {
 4568       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4569     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4570       // The NOP here is purely to ensure that eliding a call to
 4571       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4572       __ addr_nop_5();
 4573       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4574     } else {
 4575       int method_index = resolved_method_index(masm);
 4576       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4577                                                   : static_call_Relocation::spec(method_index);
 4578       address mark = __ pc();
 4579       int call_offset = __ offset();
 4580       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4581       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4582         // Calls of the same statically bound method can share
 4583         // a stub to the interpreter.
 4584         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4585       } else {
 4586         // Emit stubs for static call.
 4587         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4588         __ clear_inst_mark();
 4589         if (stub == nullptr) {
 4590           ciEnv::current()->record_failure("CodeCache is full");
 4591           return;
 4592         }
 4593       }
 4594     }
 4595     __ post_call_nop();
 4596   %}
 4597 
 4598   enc_class Java_Dynamic_Call(method meth) %{
 4599     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4600     __ post_call_nop();
 4601   %}
 4602 
 4603   enc_class call_epilog %{
 4604     if (VerifyStackAtCalls) {
 4605       // Check that stack depth is unchanged: find majik cookie on stack
 4606       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4607       Label L;
 4608       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4609       __ jccb(Assembler::equal, L);
 4610       // Die if stack mismatch
 4611       __ int3();
 4612       __ bind(L);
 4613     }
 4614     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4615       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4616       // Search for the corresponding projection, get the register and emit code that initialized it.
 4617       uint con = (tf()->range_cc()->cnt() - 1);
 4618       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4619         ProjNode* proj = fast_out(i)->as_Proj();
 4620         if (proj->_con == con) {
 4621           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4622           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4623           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4624           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4625           __ testq(rax, rax);
 4626           __ setb(Assembler::notZero, toReg);
 4627           __ movzbl(toReg, toReg);
 4628           if (reg->is_stack()) {
 4629             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4630             __ movq(Address(rsp, st_off), toReg);
 4631           }
 4632           break;
 4633         }
 4634       }
 4635       if (return_value_is_used()) {
 4636         // An inline type is returned as fields in multiple registers.
 4637         // Rax either contains an oop if the inline type is buffered or a pointer
 4638         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4639         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4640         // rax &= (rax & 1) - 1
 4641         __ movptr(rscratch1, rax);
 4642         __ andptr(rscratch1, 0x1);
 4643         __ subptr(rscratch1, 0x1);
 4644         __ andptr(rax, rscratch1);
 4645       }
 4646     }
 4647   %}
 4648 
 4649 %}
 4650 
 4651 //----------FRAME--------------------------------------------------------------
 4652 // Definition of frame structure and management information.
 4653 //
 4654 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4655 //                             |   (to get allocators register number
 4656 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4657 //  r   CALLER     |        |
 4658 //  o     |        +--------+      pad to even-align allocators stack-slot
 4659 //  w     V        |  pad0  |        numbers; owned by CALLER
 4660 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4661 //  h     ^        |   in   |  5
 4662 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4663 //  |     |        |        |  3
 4664 //  |     |        +--------+
 4665 //  V     |        | old out|      Empty on Intel, window on Sparc
 4666 //        |    old |preserve|      Must be even aligned.
 4667 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4668 //        |        |   in   |  3   area for Intel ret address
 4669 //     Owned by    |preserve|      Empty on Sparc.
 4670 //       SELF      +--------+
 4671 //        |        |  pad2  |  2   pad to align old SP
 4672 //        |        +--------+  1
 4673 //        |        | locks  |  0
 4674 //        |        +--------+----> OptoReg::stack0(), even aligned
 4675 //        |        |  pad1  | 11   pad to align new SP
 4676 //        |        +--------+
 4677 //        |        |        | 10
 4678 //        |        | spills |  9   spills
 4679 //        V        |        |  8   (pad0 slot for callee)
 4680 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4681 //        ^        |  out   |  7
 4682 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4683 //     Owned by    +--------+
 4684 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4685 //        |    new |preserve|      Must be even-aligned.
 4686 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4687 //        |        |        |
 4688 //
 4689 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4690 //         known from SELF's arguments and the Java calling convention.
 4691 //         Region 6-7 is determined per call site.
 4692 // Note 2: If the calling convention leaves holes in the incoming argument
 4693 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4694 //         are owned by the CALLEE.  Holes should not be necessary in the
 4695 //         incoming area, as the Java calling convention is completely under
 4696 //         the control of the AD file.  Doubles can be sorted and packed to
 4697 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4698 //         varargs C calling conventions.
 4699 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4700 //         even aligned with pad0 as needed.
 4701 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4702 //         region 6-11 is even aligned; it may be padded out more so that
 4703 //         the region from SP to FP meets the minimum stack alignment.
 4704 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4705 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4706 //         SP meets the minimum alignment.
 4707 
 4708 frame
 4709 %{
 4710   // These three registers define part of the calling convention
 4711   // between compiled code and the interpreter.
 4712   inline_cache_reg(RAX);                // Inline Cache Register
 4713 
 4714   // Optional: name the operand used by cisc-spilling to access
 4715   // [stack_pointer + offset]
 4716   cisc_spilling_operand_name(indOffset32);
 4717 
 4718   // Number of stack slots consumed by locking an object
 4719   sync_stack_slots(2);
 4720 
 4721   // Compiled code's Frame Pointer
 4722   frame_pointer(RSP);
 4723 
 4724   // Interpreter stores its frame pointer in a register which is
 4725   // stored to the stack by I2CAdaptors.
 4726   // I2CAdaptors convert from interpreted java to compiled java.
 4727   interpreter_frame_pointer(RBP);
 4728 
 4729   // Stack alignment requirement
 4730   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4731 
 4732   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4733   // for calls to C.  Supports the var-args backing area for register parms.
 4734   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4735 
 4736   // The after-PROLOG location of the return address.  Location of
 4737   // return address specifies a type (REG or STACK) and a number
 4738   // representing the register number (i.e. - use a register name) or
 4739   // stack slot.
 4740   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4741   // Otherwise, it is above the locks and verification slot and alignment word
 4742   return_addr(STACK - 2 +
 4743               align_up((Compile::current()->in_preserve_stack_slots() +
 4744                         Compile::current()->fixed_slots()),
 4745                        stack_alignment_in_slots()));
 4746 
 4747   // Location of compiled Java return values.  Same as C for now.
 4748   return_value
 4749   %{
 4750     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4751            "only return normal values");
 4752 
 4753     static const int lo[Op_RegL + 1] = {
 4754       0,
 4755       0,
 4756       RAX_num,  // Op_RegN
 4757       RAX_num,  // Op_RegI
 4758       RAX_num,  // Op_RegP
 4759       XMM0_num, // Op_RegF
 4760       XMM0_num, // Op_RegD
 4761       RAX_num   // Op_RegL
 4762     };
 4763     static const int hi[Op_RegL + 1] = {
 4764       0,
 4765       0,
 4766       OptoReg::Bad, // Op_RegN
 4767       OptoReg::Bad, // Op_RegI
 4768       RAX_H_num,    // Op_RegP
 4769       OptoReg::Bad, // Op_RegF
 4770       XMM0b_num,    // Op_RegD
 4771       RAX_H_num     // Op_RegL
 4772     };
 4773     // Excluded flags and vector registers.
 4774     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4775     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4776   %}
 4777 %}
 4778 
 4779 //----------ATTRIBUTES---------------------------------------------------------
 4780 //----------Operand Attributes-------------------------------------------------
 4781 op_attrib op_cost(0);        // Required cost attribute
 4782 
 4783 //----------Instruction Attributes---------------------------------------------
 4784 ins_attrib ins_cost(100);       // Required cost attribute
 4785 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4786 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4787                                 // a non-matching short branch variant
 4788                                 // of some long branch?
 4789 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4790                                 // be a power of 2) specifies the
 4791                                 // alignment that some part of the
 4792                                 // instruction (not necessarily the
 4793                                 // start) requires.  If > 1, a
 4794                                 // compute_padding() function must be
 4795                                 // provided for the instruction
 4796 
 4797 // Whether this node is expanded during code emission into a sequence of
 4798 // instructions and the first instruction can perform an implicit null check.
 4799 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4800 
 4801 //----------OPERANDS-----------------------------------------------------------
 4802 // Operand definitions must precede instruction definitions for correct parsing
 4803 // in the ADLC because operands constitute user defined types which are used in
 4804 // instruction definitions.
 4805 
 4806 //----------Simple Operands----------------------------------------------------
 4807 // Immediate Operands
 4808 // Integer Immediate
 4809 operand immI()
 4810 %{
 4811   match(ConI);
 4812 
 4813   op_cost(10);
 4814   format %{ %}
 4815   interface(CONST_INTER);
 4816 %}
 4817 
 4818 // Constant for test vs zero
 4819 operand immI_0()
 4820 %{
 4821   predicate(n->get_int() == 0);
 4822   match(ConI);
 4823 
 4824   op_cost(0);
 4825   format %{ %}
 4826   interface(CONST_INTER);
 4827 %}
 4828 
 4829 // Constant for increment
 4830 operand immI_1()
 4831 %{
 4832   predicate(n->get_int() == 1);
 4833   match(ConI);
 4834 
 4835   op_cost(0);
 4836   format %{ %}
 4837   interface(CONST_INTER);
 4838 %}
 4839 
 4840 // Constant for decrement
 4841 operand immI_M1()
 4842 %{
 4843   predicate(n->get_int() == -1);
 4844   match(ConI);
 4845 
 4846   op_cost(0);
 4847   format %{ %}
 4848   interface(CONST_INTER);
 4849 %}
 4850 
 4851 operand immI_2()
 4852 %{
 4853   predicate(n->get_int() == 2);
 4854   match(ConI);
 4855 
 4856   op_cost(0);
 4857   format %{ %}
 4858   interface(CONST_INTER);
 4859 %}
 4860 
 4861 operand immI_4()
 4862 %{
 4863   predicate(n->get_int() == 4);
 4864   match(ConI);
 4865 
 4866   op_cost(0);
 4867   format %{ %}
 4868   interface(CONST_INTER);
 4869 %}
 4870 
 4871 operand immI_8()
 4872 %{
 4873   predicate(n->get_int() == 8);
 4874   match(ConI);
 4875 
 4876   op_cost(0);
 4877   format %{ %}
 4878   interface(CONST_INTER);
 4879 %}
 4880 
 4881 // Valid scale values for addressing modes
 4882 operand immI2()
 4883 %{
 4884   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4885   match(ConI);
 4886 
 4887   format %{ %}
 4888   interface(CONST_INTER);
 4889 %}
 4890 
 4891 operand immU7()
 4892 %{
 4893   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4894   match(ConI);
 4895 
 4896   op_cost(5);
 4897   format %{ %}
 4898   interface(CONST_INTER);
 4899 %}
 4900 
 4901 operand immI8()
 4902 %{
 4903   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4904   match(ConI);
 4905 
 4906   op_cost(5);
 4907   format %{ %}
 4908   interface(CONST_INTER);
 4909 %}
 4910 
 4911 operand immU8()
 4912 %{
 4913   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4914   match(ConI);
 4915 
 4916   op_cost(5);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 operand immI16()
 4922 %{
 4923   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4924   match(ConI);
 4925 
 4926   op_cost(10);
 4927   format %{ %}
 4928   interface(CONST_INTER);
 4929 %}
 4930 
 4931 // Int Immediate non-negative
 4932 operand immU31()
 4933 %{
 4934   predicate(n->get_int() >= 0);
 4935   match(ConI);
 4936 
 4937   op_cost(0);
 4938   format %{ %}
 4939   interface(CONST_INTER);
 4940 %}
 4941 
 4942 // Pointer Immediate
 4943 operand immP()
 4944 %{
 4945   match(ConP);
 4946 
 4947   op_cost(10);
 4948   format %{ %}
 4949   interface(CONST_INTER);
 4950 %}
 4951 
 4952 // Null Pointer Immediate
 4953 operand immP0()
 4954 %{
 4955   predicate(n->get_ptr() == 0);
 4956   match(ConP);
 4957 
 4958   op_cost(5);
 4959   format %{ %}
 4960   interface(CONST_INTER);
 4961 %}
 4962 
 4963 // Pointer Immediate
 4964 operand immN() %{
 4965   match(ConN);
 4966 
 4967   op_cost(10);
 4968   format %{ %}
 4969   interface(CONST_INTER);
 4970 %}
 4971 
 4972 operand immNKlass() %{
 4973   match(ConNKlass);
 4974 
 4975   op_cost(10);
 4976   format %{ %}
 4977   interface(CONST_INTER);
 4978 %}
 4979 
 4980 // Null Pointer Immediate
 4981 operand immN0() %{
 4982   predicate(n->get_narrowcon() == 0);
 4983   match(ConN);
 4984 
 4985   op_cost(5);
 4986   format %{ %}
 4987   interface(CONST_INTER);
 4988 %}
 4989 
 4990 operand immP31()
 4991 %{
 4992   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4993             && (n->get_ptr() >> 31) == 0);
 4994   match(ConP);
 4995 
 4996   op_cost(5);
 4997   format %{ %}
 4998   interface(CONST_INTER);
 4999 %}
 5000 
 5001 
 5002 // Long Immediate
 5003 operand immL()
 5004 %{
 5005   match(ConL);
 5006 
 5007   op_cost(20);
 5008   format %{ %}
 5009   interface(CONST_INTER);
 5010 %}
 5011 
 5012 // Long Immediate 8-bit
 5013 operand immL8()
 5014 %{
 5015   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5016   match(ConL);
 5017 
 5018   op_cost(5);
 5019   format %{ %}
 5020   interface(CONST_INTER);
 5021 %}
 5022 
 5023 // Long Immediate 32-bit unsigned
 5024 operand immUL32()
 5025 %{
 5026   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5027   match(ConL);
 5028 
 5029   op_cost(10);
 5030   format %{ %}
 5031   interface(CONST_INTER);
 5032 %}
 5033 
 5034 // Long Immediate 32-bit signed
 5035 operand immL32()
 5036 %{
 5037   predicate(n->get_long() == (int) (n->get_long()));
 5038   match(ConL);
 5039 
 5040   op_cost(15);
 5041   format %{ %}
 5042   interface(CONST_INTER);
 5043 %}
 5044 
 5045 operand immL_Pow2()
 5046 %{
 5047   predicate(is_power_of_2((julong)n->get_long()));
 5048   match(ConL);
 5049 
 5050   op_cost(15);
 5051   format %{ %}
 5052   interface(CONST_INTER);
 5053 %}
 5054 
 5055 operand immL_NotPow2()
 5056 %{
 5057   predicate(is_power_of_2((julong)~n->get_long()));
 5058   match(ConL);
 5059 
 5060   op_cost(15);
 5061   format %{ %}
 5062   interface(CONST_INTER);
 5063 %}
 5064 
 5065 // Long Immediate zero
 5066 operand immL0()
 5067 %{
 5068   predicate(n->get_long() == 0L);
 5069   match(ConL);
 5070 
 5071   op_cost(10);
 5072   format %{ %}
 5073   interface(CONST_INTER);
 5074 %}
 5075 
 5076 // Constant for increment
 5077 operand immL1()
 5078 %{
 5079   predicate(n->get_long() == 1);
 5080   match(ConL);
 5081 
 5082   format %{ %}
 5083   interface(CONST_INTER);
 5084 %}
 5085 
 5086 // Constant for decrement
 5087 operand immL_M1()
 5088 %{
 5089   predicate(n->get_long() == -1);
 5090   match(ConL);
 5091 
 5092   format %{ %}
 5093   interface(CONST_INTER);
 5094 %}
 5095 
 5096 // Long Immediate: low 32-bit mask
 5097 operand immL_32bits()
 5098 %{
 5099   predicate(n->get_long() == 0xFFFFFFFFL);
 5100   match(ConL);
 5101   op_cost(20);
 5102 
 5103   format %{ %}
 5104   interface(CONST_INTER);
 5105 %}
 5106 
 5107 // Int Immediate: 2^n-1, positive
 5108 operand immI_Pow2M1()
 5109 %{
 5110   predicate((n->get_int() > 0)
 5111             && is_power_of_2((juint)n->get_int() + 1));
 5112   match(ConI);
 5113 
 5114   op_cost(20);
 5115   format %{ %}
 5116   interface(CONST_INTER);
 5117 %}
 5118 
 5119 // Float Immediate zero
 5120 operand immF0()
 5121 %{
 5122   predicate(jint_cast(n->getf()) == 0);
 5123   match(ConF);
 5124 
 5125   op_cost(5);
 5126   format %{ %}
 5127   interface(CONST_INTER);
 5128 %}
 5129 
 5130 // Float Immediate
 5131 operand immF()
 5132 %{
 5133   match(ConF);
 5134 
 5135   op_cost(15);
 5136   format %{ %}
 5137   interface(CONST_INTER);
 5138 %}
 5139 
 5140 // Half Float Immediate
 5141 operand immH()
 5142 %{
 5143   match(ConH);
 5144 
 5145   op_cost(15);
 5146   format %{ %}
 5147   interface(CONST_INTER);
 5148 %}
 5149 
 5150 // Double Immediate zero
 5151 operand immD0()
 5152 %{
 5153   predicate(jlong_cast(n->getd()) == 0);
 5154   match(ConD);
 5155 
 5156   op_cost(5);
 5157   format %{ %}
 5158   interface(CONST_INTER);
 5159 %}
 5160 
 5161 // Double Immediate
 5162 operand immD()
 5163 %{
 5164   match(ConD);
 5165 
 5166   op_cost(15);
 5167   format %{ %}
 5168   interface(CONST_INTER);
 5169 %}
 5170 
 5171 // Immediates for special shifts (sign extend)
 5172 
 5173 // Constants for increment
 5174 operand immI_16()
 5175 %{
 5176   predicate(n->get_int() == 16);
 5177   match(ConI);
 5178 
 5179   format %{ %}
 5180   interface(CONST_INTER);
 5181 %}
 5182 
 5183 operand immI_24()
 5184 %{
 5185   predicate(n->get_int() == 24);
 5186   match(ConI);
 5187 
 5188   format %{ %}
 5189   interface(CONST_INTER);
 5190 %}
 5191 
 5192 // Constant for byte-wide masking
 5193 operand immI_255()
 5194 %{
 5195   predicate(n->get_int() == 255);
 5196   match(ConI);
 5197 
 5198   format %{ %}
 5199   interface(CONST_INTER);
 5200 %}
 5201 
 5202 // Constant for short-wide masking
 5203 operand immI_65535()
 5204 %{
 5205   predicate(n->get_int() == 65535);
 5206   match(ConI);
 5207 
 5208   format %{ %}
 5209   interface(CONST_INTER);
 5210 %}
 5211 
 5212 // Constant for byte-wide masking
 5213 operand immL_255()
 5214 %{
 5215   predicate(n->get_long() == 255);
 5216   match(ConL);
 5217 
 5218   format %{ %}
 5219   interface(CONST_INTER);
 5220 %}
 5221 
 5222 // Constant for short-wide masking
 5223 operand immL_65535()
 5224 %{
 5225   predicate(n->get_long() == 65535);
 5226   match(ConL);
 5227 
 5228   format %{ %}
 5229   interface(CONST_INTER);
 5230 %}
 5231 
 5232 operand kReg()
 5233 %{
 5234   constraint(ALLOC_IN_RC(vectmask_reg));
 5235   match(RegVectMask);
 5236   format %{%}
 5237   interface(REG_INTER);
 5238 %}
 5239 
 5240 // Register Operands
 5241 // Integer Register
 5242 operand rRegI()
 5243 %{
 5244   constraint(ALLOC_IN_RC(int_reg));
 5245   match(RegI);
 5246 
 5247   match(rax_RegI);
 5248   match(rbx_RegI);
 5249   match(rcx_RegI);
 5250   match(rdx_RegI);
 5251   match(rdi_RegI);
 5252 
 5253   format %{ %}
 5254   interface(REG_INTER);
 5255 %}
 5256 
 5257 // Special Registers
 5258 operand rax_RegI()
 5259 %{
 5260   constraint(ALLOC_IN_RC(int_rax_reg));
 5261   match(RegI);
 5262   match(rRegI);
 5263 
 5264   format %{ "RAX" %}
 5265   interface(REG_INTER);
 5266 %}
 5267 
 5268 // Special Registers
 5269 operand rbx_RegI()
 5270 %{
 5271   constraint(ALLOC_IN_RC(int_rbx_reg));
 5272   match(RegI);
 5273   match(rRegI);
 5274 
 5275   format %{ "RBX" %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 operand rcx_RegI()
 5280 %{
 5281   constraint(ALLOC_IN_RC(int_rcx_reg));
 5282   match(RegI);
 5283   match(rRegI);
 5284 
 5285   format %{ "RCX" %}
 5286   interface(REG_INTER);
 5287 %}
 5288 
 5289 operand rdx_RegI()
 5290 %{
 5291   constraint(ALLOC_IN_RC(int_rdx_reg));
 5292   match(RegI);
 5293   match(rRegI);
 5294 
 5295   format %{ "RDX" %}
 5296   interface(REG_INTER);
 5297 %}
 5298 
 5299 operand rdi_RegI()
 5300 %{
 5301   constraint(ALLOC_IN_RC(int_rdi_reg));
 5302   match(RegI);
 5303   match(rRegI);
 5304 
 5305   format %{ "RDI" %}
 5306   interface(REG_INTER);
 5307 %}
 5308 
 5309 operand no_rax_rdx_RegI()
 5310 %{
 5311   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5312   match(RegI);
 5313   match(rbx_RegI);
 5314   match(rcx_RegI);
 5315   match(rdi_RegI);
 5316 
 5317   format %{ %}
 5318   interface(REG_INTER);
 5319 %}
 5320 
 5321 operand no_rbp_r13_RegI()
 5322 %{
 5323   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5324   match(RegI);
 5325   match(rRegI);
 5326   match(rax_RegI);
 5327   match(rbx_RegI);
 5328   match(rcx_RegI);
 5329   match(rdx_RegI);
 5330   match(rdi_RegI);
 5331 
 5332   format %{ %}
 5333   interface(REG_INTER);
 5334 %}
 5335 
 5336 // Pointer Register
 5337 operand any_RegP()
 5338 %{
 5339   constraint(ALLOC_IN_RC(any_reg));
 5340   match(RegP);
 5341   match(rax_RegP);
 5342   match(rbx_RegP);
 5343   match(rdi_RegP);
 5344   match(rsi_RegP);
 5345   match(rbp_RegP);
 5346   match(r15_RegP);
 5347   match(rRegP);
 5348 
 5349   format %{ %}
 5350   interface(REG_INTER);
 5351 %}
 5352 
 5353 operand rRegP()
 5354 %{
 5355   constraint(ALLOC_IN_RC(ptr_reg));
 5356   match(RegP);
 5357   match(rax_RegP);
 5358   match(rbx_RegP);
 5359   match(rdi_RegP);
 5360   match(rsi_RegP);
 5361   match(rbp_RegP);  // See Q&A below about
 5362   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5363 
 5364   format %{ %}
 5365   interface(REG_INTER);
 5366 %}
 5367 
 5368 operand rRegN() %{
 5369   constraint(ALLOC_IN_RC(int_reg));
 5370   match(RegN);
 5371 
 5372   format %{ %}
 5373   interface(REG_INTER);
 5374 %}
 5375 
 5376 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5377 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5378 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5379 // The output of an instruction is controlled by the allocator, which respects
 5380 // register class masks, not match rules.  Unless an instruction mentions
 5381 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5382 // by the allocator as an input.
 5383 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5384 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5385 // result, RBP is not included in the output of the instruction either.
 5386 
 5387 // This operand is not allowed to use RBP even if
 5388 // RBP is not used to hold the frame pointer.
 5389 operand no_rbp_RegP()
 5390 %{
 5391   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5392   match(RegP);
 5393   match(rbx_RegP);
 5394   match(rsi_RegP);
 5395   match(rdi_RegP);
 5396 
 5397   format %{ %}
 5398   interface(REG_INTER);
 5399 %}
 5400 
 5401 // Special Registers
 5402 // Return a pointer value
 5403 operand rax_RegP()
 5404 %{
 5405   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5406   match(RegP);
 5407   match(rRegP);
 5408 
 5409   format %{ %}
 5410   interface(REG_INTER);
 5411 %}
 5412 
 5413 // Special Registers
 5414 // Return a compressed pointer value
 5415 operand rax_RegN()
 5416 %{
 5417   constraint(ALLOC_IN_RC(int_rax_reg));
 5418   match(RegN);
 5419   match(rRegN);
 5420 
 5421   format %{ %}
 5422   interface(REG_INTER);
 5423 %}
 5424 
 5425 // Used in AtomicAdd
 5426 operand rbx_RegP()
 5427 %{
 5428   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5429   match(RegP);
 5430   match(rRegP);
 5431 
 5432   format %{ %}
 5433   interface(REG_INTER);
 5434 %}
 5435 
 5436 operand rsi_RegP()
 5437 %{
 5438   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5439   match(RegP);
 5440   match(rRegP);
 5441 
 5442   format %{ %}
 5443   interface(REG_INTER);
 5444 %}
 5445 
 5446 operand rbp_RegP()
 5447 %{
 5448   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5449   match(RegP);
 5450   match(rRegP);
 5451 
 5452   format %{ %}
 5453   interface(REG_INTER);
 5454 %}
 5455 
 5456 // Used in rep stosq
 5457 operand rdi_RegP()
 5458 %{
 5459   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5460   match(RegP);
 5461   match(rRegP);
 5462 
 5463   format %{ %}
 5464   interface(REG_INTER);
 5465 %}
 5466 
 5467 operand r15_RegP()
 5468 %{
 5469   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5470   match(RegP);
 5471   match(rRegP);
 5472 
 5473   format %{ %}
 5474   interface(REG_INTER);
 5475 %}
 5476 
 5477 operand rRegL()
 5478 %{
 5479   constraint(ALLOC_IN_RC(long_reg));
 5480   match(RegL);
 5481   match(rax_RegL);
 5482   match(rdx_RegL);
 5483 
 5484   format %{ %}
 5485   interface(REG_INTER);
 5486 %}
 5487 
 5488 // Special Registers
 5489 operand no_rax_rdx_RegL()
 5490 %{
 5491   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5492   match(RegL);
 5493   match(rRegL);
 5494 
 5495   format %{ %}
 5496   interface(REG_INTER);
 5497 %}
 5498 
 5499 operand rax_RegL()
 5500 %{
 5501   constraint(ALLOC_IN_RC(long_rax_reg));
 5502   match(RegL);
 5503   match(rRegL);
 5504 
 5505   format %{ "RAX" %}
 5506   interface(REG_INTER);
 5507 %}
 5508 
 5509 operand rcx_RegL()
 5510 %{
 5511   constraint(ALLOC_IN_RC(long_rcx_reg));
 5512   match(RegL);
 5513   match(rRegL);
 5514 
 5515   format %{ %}
 5516   interface(REG_INTER);
 5517 %}
 5518 
 5519 operand rdx_RegL()
 5520 %{
 5521   constraint(ALLOC_IN_RC(long_rdx_reg));
 5522   match(RegL);
 5523   match(rRegL);
 5524 
 5525   format %{ %}
 5526   interface(REG_INTER);
 5527 %}
 5528 
 5529 operand r11_RegL()
 5530 %{
 5531   constraint(ALLOC_IN_RC(long_r11_reg));
 5532   match(RegL);
 5533   match(rRegL);
 5534 
 5535   format %{ %}
 5536   interface(REG_INTER);
 5537 %}
 5538 
 5539 operand no_rbp_r13_RegL()
 5540 %{
 5541   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5542   match(RegL);
 5543   match(rRegL);
 5544   match(rax_RegL);
 5545   match(rcx_RegL);
 5546   match(rdx_RegL);
 5547 
 5548   format %{ %}
 5549   interface(REG_INTER);
 5550 %}
 5551 
 5552 // Flags register, used as output of compare instructions
 5553 operand rFlagsReg()
 5554 %{
 5555   constraint(ALLOC_IN_RC(int_flags));
 5556   match(RegFlags);
 5557 
 5558   format %{ "RFLAGS" %}
 5559   interface(REG_INTER);
 5560 %}
 5561 
 5562 // Flags register, used as output of FLOATING POINT compare instructions
 5563 operand rFlagsRegU()
 5564 %{
 5565   constraint(ALLOC_IN_RC(int_flags));
 5566   match(RegFlags);
 5567 
 5568   format %{ "RFLAGS_U" %}
 5569   interface(REG_INTER);
 5570 %}
 5571 
 5572 operand rFlagsRegUCF() %{
 5573   constraint(ALLOC_IN_RC(int_flags));
 5574   match(RegFlags);
 5575   predicate(false);
 5576 
 5577   format %{ "RFLAGS_U_CF" %}
 5578   interface(REG_INTER);
 5579 %}
 5580 
 5581 // Float register operands
 5582 operand regF() %{
 5583    constraint(ALLOC_IN_RC(float_reg));
 5584    match(RegF);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Float register operands
 5591 operand legRegF() %{
 5592    constraint(ALLOC_IN_RC(float_reg_legacy));
 5593    match(RegF);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Float register operands
 5600 operand vlRegF() %{
 5601    constraint(ALLOC_IN_RC(float_reg_vl));
 5602    match(RegF);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 // Double register operands
 5609 operand regD() %{
 5610    constraint(ALLOC_IN_RC(double_reg));
 5611    match(RegD);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 // Double register operands
 5618 operand legRegD() %{
 5619    constraint(ALLOC_IN_RC(double_reg_legacy));
 5620    match(RegD);
 5621 
 5622    format %{ %}
 5623    interface(REG_INTER);
 5624 %}
 5625 
 5626 // Double register operands
 5627 operand vlRegD() %{
 5628    constraint(ALLOC_IN_RC(double_reg_vl));
 5629    match(RegD);
 5630 
 5631    format %{ %}
 5632    interface(REG_INTER);
 5633 %}
 5634 
 5635 //----------Memory Operands----------------------------------------------------
 5636 // Direct Memory Operand
 5637 // operand direct(immP addr)
 5638 // %{
 5639 //   match(addr);
 5640 
 5641 //   format %{ "[$addr]" %}
 5642 //   interface(MEMORY_INTER) %{
 5643 //     base(0xFFFFFFFF);
 5644 //     index(0x4);
 5645 //     scale(0x0);
 5646 //     disp($addr);
 5647 //   %}
 5648 // %}
 5649 
 5650 // Indirect Memory Operand
 5651 operand indirect(any_RegP reg)
 5652 %{
 5653   constraint(ALLOC_IN_RC(ptr_reg));
 5654   match(reg);
 5655 
 5656   format %{ "[$reg]" %}
 5657   interface(MEMORY_INTER) %{
 5658     base($reg);
 5659     index(0x4);
 5660     scale(0x0);
 5661     disp(0x0);
 5662   %}
 5663 %}
 5664 
 5665 // Indirect Memory Plus Short Offset Operand
 5666 operand indOffset8(any_RegP reg, immL8 off)
 5667 %{
 5668   constraint(ALLOC_IN_RC(ptr_reg));
 5669   match(AddP reg off);
 5670 
 5671   format %{ "[$reg + $off (8-bit)]" %}
 5672   interface(MEMORY_INTER) %{
 5673     base($reg);
 5674     index(0x4);
 5675     scale(0x0);
 5676     disp($off);
 5677   %}
 5678 %}
 5679 
 5680 // Indirect Memory Plus Long Offset Operand
 5681 operand indOffset32(any_RegP reg, immL32 off)
 5682 %{
 5683   constraint(ALLOC_IN_RC(ptr_reg));
 5684   match(AddP reg off);
 5685 
 5686   format %{ "[$reg + $off (32-bit)]" %}
 5687   interface(MEMORY_INTER) %{
 5688     base($reg);
 5689     index(0x4);
 5690     scale(0x0);
 5691     disp($off);
 5692   %}
 5693 %}
 5694 
 5695 // Indirect Memory Plus Index Register Plus Offset Operand
 5696 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5697 %{
 5698   constraint(ALLOC_IN_RC(ptr_reg));
 5699   match(AddP (AddP reg lreg) off);
 5700 
 5701   op_cost(10);
 5702   format %{"[$reg + $off + $lreg]" %}
 5703   interface(MEMORY_INTER) %{
 5704     base($reg);
 5705     index($lreg);
 5706     scale(0x0);
 5707     disp($off);
 5708   %}
 5709 %}
 5710 
 5711 // Indirect Memory Plus Index Register Plus Offset Operand
 5712 operand indIndex(any_RegP reg, rRegL lreg)
 5713 %{
 5714   constraint(ALLOC_IN_RC(ptr_reg));
 5715   match(AddP reg lreg);
 5716 
 5717   op_cost(10);
 5718   format %{"[$reg + $lreg]" %}
 5719   interface(MEMORY_INTER) %{
 5720     base($reg);
 5721     index($lreg);
 5722     scale(0x0);
 5723     disp(0x0);
 5724   %}
 5725 %}
 5726 
 5727 // Indirect Memory Times Scale Plus Index Register
 5728 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5729 %{
 5730   constraint(ALLOC_IN_RC(ptr_reg));
 5731   match(AddP reg (LShiftL lreg scale));
 5732 
 5733   op_cost(10);
 5734   format %{"[$reg + $lreg << $scale]" %}
 5735   interface(MEMORY_INTER) %{
 5736     base($reg);
 5737     index($lreg);
 5738     scale($scale);
 5739     disp(0x0);
 5740   %}
 5741 %}
 5742 
 5743 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5744 %{
 5745   constraint(ALLOC_IN_RC(ptr_reg));
 5746   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5747   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5748 
 5749   op_cost(10);
 5750   format %{"[$reg + pos $idx << $scale]" %}
 5751   interface(MEMORY_INTER) %{
 5752     base($reg);
 5753     index($idx);
 5754     scale($scale);
 5755     disp(0x0);
 5756   %}
 5757 %}
 5758 
 5759 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5760 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5761 %{
 5762   constraint(ALLOC_IN_RC(ptr_reg));
 5763   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5764 
 5765   op_cost(10);
 5766   format %{"[$reg + $off + $lreg << $scale]" %}
 5767   interface(MEMORY_INTER) %{
 5768     base($reg);
 5769     index($lreg);
 5770     scale($scale);
 5771     disp($off);
 5772   %}
 5773 %}
 5774 
 5775 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5776 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5777 %{
 5778   constraint(ALLOC_IN_RC(ptr_reg));
 5779   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5780   match(AddP (AddP reg (ConvI2L idx)) off);
 5781 
 5782   op_cost(10);
 5783   format %{"[$reg + $off + $idx]" %}
 5784   interface(MEMORY_INTER) %{
 5785     base($reg);
 5786     index($idx);
 5787     scale(0x0);
 5788     disp($off);
 5789   %}
 5790 %}
 5791 
 5792 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5793 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5794 %{
 5795   constraint(ALLOC_IN_RC(ptr_reg));
 5796   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5797   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[$reg + $off + $idx << $scale]" %}
 5801   interface(MEMORY_INTER) %{
 5802     base($reg);
 5803     index($idx);
 5804     scale($scale);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Narrow Oop Operand
 5810 operand indCompressedOop(rRegN reg) %{
 5811   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5812   constraint(ALLOC_IN_RC(ptr_reg));
 5813   match(DecodeN reg);
 5814 
 5815   op_cost(10);
 5816   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5817   interface(MEMORY_INTER) %{
 5818     base(0xc); // R12
 5819     index($reg);
 5820     scale(0x3);
 5821     disp(0x0);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Narrow Oop Plus Offset Operand
 5826 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5827 // we can't free r12 even with CompressedOops::base() == nullptr.
 5828 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5829   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5830   constraint(ALLOC_IN_RC(ptr_reg));
 5831   match(AddP (DecodeN reg) off);
 5832 
 5833   op_cost(10);
 5834   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5835   interface(MEMORY_INTER) %{
 5836     base(0xc); // R12
 5837     index($reg);
 5838     scale(0x3);
 5839     disp($off);
 5840   %}
 5841 %}
 5842 
 5843 // Indirect Memory Operand
 5844 operand indirectNarrow(rRegN reg)
 5845 %{
 5846   predicate(CompressedOops::shift() == 0);
 5847   constraint(ALLOC_IN_RC(ptr_reg));
 5848   match(DecodeN reg);
 5849 
 5850   format %{ "[$reg]" %}
 5851   interface(MEMORY_INTER) %{
 5852     base($reg);
 5853     index(0x4);
 5854     scale(0x0);
 5855     disp(0x0);
 5856   %}
 5857 %}
 5858 
 5859 // Indirect Memory Plus Short Offset Operand
 5860 operand indOffset8Narrow(rRegN reg, immL8 off)
 5861 %{
 5862   predicate(CompressedOops::shift() == 0);
 5863   constraint(ALLOC_IN_RC(ptr_reg));
 5864   match(AddP (DecodeN reg) off);
 5865 
 5866   format %{ "[$reg + $off (8-bit)]" %}
 5867   interface(MEMORY_INTER) %{
 5868     base($reg);
 5869     index(0x4);
 5870     scale(0x0);
 5871     disp($off);
 5872   %}
 5873 %}
 5874 
 5875 // Indirect Memory Plus Long Offset Operand
 5876 operand indOffset32Narrow(rRegN reg, immL32 off)
 5877 %{
 5878   predicate(CompressedOops::shift() == 0);
 5879   constraint(ALLOC_IN_RC(ptr_reg));
 5880   match(AddP (DecodeN reg) off);
 5881 
 5882   format %{ "[$reg + $off (32-bit)]" %}
 5883   interface(MEMORY_INTER) %{
 5884     base($reg);
 5885     index(0x4);
 5886     scale(0x0);
 5887     disp($off);
 5888   %}
 5889 %}
 5890 
 5891 // Indirect Memory Plus Index Register Plus Offset Operand
 5892 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5893 %{
 5894   predicate(CompressedOops::shift() == 0);
 5895   constraint(ALLOC_IN_RC(ptr_reg));
 5896   match(AddP (AddP (DecodeN reg) lreg) off);
 5897 
 5898   op_cost(10);
 5899   format %{"[$reg + $off + $lreg]" %}
 5900   interface(MEMORY_INTER) %{
 5901     base($reg);
 5902     index($lreg);
 5903     scale(0x0);
 5904     disp($off);
 5905   %}
 5906 %}
 5907 
 5908 // Indirect Memory Plus Index Register Plus Offset Operand
 5909 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5910 %{
 5911   predicate(CompressedOops::shift() == 0);
 5912   constraint(ALLOC_IN_RC(ptr_reg));
 5913   match(AddP (DecodeN reg) lreg);
 5914 
 5915   op_cost(10);
 5916   format %{"[$reg + $lreg]" %}
 5917   interface(MEMORY_INTER) %{
 5918     base($reg);
 5919     index($lreg);
 5920     scale(0x0);
 5921     disp(0x0);
 5922   %}
 5923 %}
 5924 
 5925 // Indirect Memory Times Scale Plus Index Register
 5926 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5927 %{
 5928   predicate(CompressedOops::shift() == 0);
 5929   constraint(ALLOC_IN_RC(ptr_reg));
 5930   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5931 
 5932   op_cost(10);
 5933   format %{"[$reg + $lreg << $scale]" %}
 5934   interface(MEMORY_INTER) %{
 5935     base($reg);
 5936     index($lreg);
 5937     scale($scale);
 5938     disp(0x0);
 5939   %}
 5940 %}
 5941 
 5942 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5943 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5944 %{
 5945   predicate(CompressedOops::shift() == 0);
 5946   constraint(ALLOC_IN_RC(ptr_reg));
 5947   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5948 
 5949   op_cost(10);
 5950   format %{"[$reg + $off + $lreg << $scale]" %}
 5951   interface(MEMORY_INTER) %{
 5952     base($reg);
 5953     index($lreg);
 5954     scale($scale);
 5955     disp($off);
 5956   %}
 5957 %}
 5958 
 5959 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5960 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5961 %{
 5962   constraint(ALLOC_IN_RC(ptr_reg));
 5963   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5964   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5965 
 5966   op_cost(10);
 5967   format %{"[$reg + $off + $idx]" %}
 5968   interface(MEMORY_INTER) %{
 5969     base($reg);
 5970     index($idx);
 5971     scale(0x0);
 5972     disp($off);
 5973   %}
 5974 %}
 5975 
 5976 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5977 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5978 %{
 5979   constraint(ALLOC_IN_RC(ptr_reg));
 5980   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5981   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5982 
 5983   op_cost(10);
 5984   format %{"[$reg + $off + $idx << $scale]" %}
 5985   interface(MEMORY_INTER) %{
 5986     base($reg);
 5987     index($idx);
 5988     scale($scale);
 5989     disp($off);
 5990   %}
 5991 %}
 5992 
 5993 //----------Special Memory Operands--------------------------------------------
 5994 // Stack Slot Operand - This operand is used for loading and storing temporary
 5995 //                      values on the stack where a match requires a value to
 5996 //                      flow through memory.
 5997 operand stackSlotP(sRegP reg)
 5998 %{
 5999   constraint(ALLOC_IN_RC(stack_slots));
 6000   // No match rule because this operand is only generated in matching
 6001 
 6002   format %{ "[$reg]" %}
 6003   interface(MEMORY_INTER) %{
 6004     base(0x4);   // RSP
 6005     index(0x4);  // No Index
 6006     scale(0x0);  // No Scale
 6007     disp($reg);  // Stack Offset
 6008   %}
 6009 %}
 6010 
 6011 operand stackSlotI(sRegI reg)
 6012 %{
 6013   constraint(ALLOC_IN_RC(stack_slots));
 6014   // No match rule because this operand is only generated in matching
 6015 
 6016   format %{ "[$reg]" %}
 6017   interface(MEMORY_INTER) %{
 6018     base(0x4);   // RSP
 6019     index(0x4);  // No Index
 6020     scale(0x0);  // No Scale
 6021     disp($reg);  // Stack Offset
 6022   %}
 6023 %}
 6024 
 6025 operand stackSlotF(sRegF reg)
 6026 %{
 6027   constraint(ALLOC_IN_RC(stack_slots));
 6028   // No match rule because this operand is only generated in matching
 6029 
 6030   format %{ "[$reg]" %}
 6031   interface(MEMORY_INTER) %{
 6032     base(0x4);   // RSP
 6033     index(0x4);  // No Index
 6034     scale(0x0);  // No Scale
 6035     disp($reg);  // Stack Offset
 6036   %}
 6037 %}
 6038 
 6039 operand stackSlotD(sRegD reg)
 6040 %{
 6041   constraint(ALLOC_IN_RC(stack_slots));
 6042   // No match rule because this operand is only generated in matching
 6043 
 6044   format %{ "[$reg]" %}
 6045   interface(MEMORY_INTER) %{
 6046     base(0x4);   // RSP
 6047     index(0x4);  // No Index
 6048     scale(0x0);  // No Scale
 6049     disp($reg);  // Stack Offset
 6050   %}
 6051 %}
 6052 operand stackSlotL(sRegL reg)
 6053 %{
 6054   constraint(ALLOC_IN_RC(stack_slots));
 6055   // No match rule because this operand is only generated in matching
 6056 
 6057   format %{ "[$reg]" %}
 6058   interface(MEMORY_INTER) %{
 6059     base(0x4);   // RSP
 6060     index(0x4);  // No Index
 6061     scale(0x0);  // No Scale
 6062     disp($reg);  // Stack Offset
 6063   %}
 6064 %}
 6065 
 6066 //----------Conditional Branch Operands----------------------------------------
 6067 // Comparison Op  - This is the operation of the comparison, and is limited to
 6068 //                  the following set of codes:
 6069 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6070 //
 6071 // Other attributes of the comparison, such as unsignedness, are specified
 6072 // by the comparison instruction that sets a condition code flags register.
 6073 // That result is represented by a flags operand whose subtype is appropriate
 6074 // to the unsignedness (etc.) of the comparison.
 6075 //
 6076 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6077 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6078 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6079 
 6080 // Comparison Code
 6081 operand cmpOp()
 6082 %{
 6083   match(Bool);
 6084 
 6085   format %{ "" %}
 6086   interface(COND_INTER) %{
 6087     equal(0x4, "e");
 6088     not_equal(0x5, "ne");
 6089     less(0xC, "l");
 6090     greater_equal(0xD, "ge");
 6091     less_equal(0xE, "le");
 6092     greater(0xF, "g");
 6093     overflow(0x0, "o");
 6094     no_overflow(0x1, "no");
 6095   %}
 6096 %}
 6097 
 6098 // Comparison Code, unsigned compare.  Used by FP also, with
 6099 // C2 (unordered) turned into GT or LT already.  The other bits
 6100 // C0 and C3 are turned into Carry & Zero flags.
 6101 operand cmpOpU()
 6102 %{
 6103   match(Bool);
 6104 
 6105   format %{ "" %}
 6106   interface(COND_INTER) %{
 6107     equal(0x4, "e");
 6108     not_equal(0x5, "ne");
 6109     less(0x2, "b");
 6110     greater_equal(0x3, "ae");
 6111     less_equal(0x6, "be");
 6112     greater(0x7, "a");
 6113     overflow(0x0, "o");
 6114     no_overflow(0x1, "no");
 6115   %}
 6116 %}
 6117 
 6118 
 6119 // Floating comparisons that don't require any fixup for the unordered case,
 6120 // If both inputs of the comparison are the same, ZF is always set so we
 6121 // don't need to use cmpOpUCF2 for eq/ne
 6122 operand cmpOpUCF() %{
 6123   match(Bool);
 6124   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6125             n->as_Bool()->_test._test == BoolTest::ge ||
 6126             n->as_Bool()->_test._test == BoolTest::le ||
 6127             n->as_Bool()->_test._test == BoolTest::gt ||
 6128             n->in(1)->in(1) == n->in(1)->in(2));
 6129   format %{ "" %}
 6130   interface(COND_INTER) %{
 6131     equal(0xb, "np");
 6132     not_equal(0xa, "p");
 6133     less(0x2, "b");
 6134     greater_equal(0x3, "ae");
 6135     less_equal(0x6, "be");
 6136     greater(0x7, "a");
 6137     overflow(0x0, "o");
 6138     no_overflow(0x1, "no");
 6139   %}
 6140 %}
 6141 
 6142 
 6143 // Floating comparisons that can be fixed up with extra conditional jumps
 6144 operand cmpOpUCF2() %{
 6145   match(Bool);
 6146   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6147              n->as_Bool()->_test._test == BoolTest::eq) &&
 6148             n->in(1)->in(1) != n->in(1)->in(2));
 6149   format %{ "" %}
 6150   interface(COND_INTER) %{
 6151     equal(0x4, "e");
 6152     not_equal(0x5, "ne");
 6153     less(0x2, "b");
 6154     greater_equal(0x3, "ae");
 6155     less_equal(0x6, "be");
 6156     greater(0x7, "a");
 6157     overflow(0x0, "o");
 6158     no_overflow(0x1, "no");
 6159   %}
 6160 %}
 6161 
 6162 // Operands for bound floating pointer register arguments
 6163 operand rxmm0() %{
 6164   constraint(ALLOC_IN_RC(xmm0_reg));
 6165   match(VecX);
 6166   format%{%}
 6167   interface(REG_INTER);
 6168 %}
 6169 
 6170 // Vectors
 6171 
 6172 // Dummy generic vector class. Should be used for all vector operands.
 6173 // Replaced with vec[SDXYZ] during post-selection pass.
 6174 operand vec() %{
 6175   constraint(ALLOC_IN_RC(dynamic));
 6176   match(VecX);
 6177   match(VecY);
 6178   match(VecZ);
 6179   match(VecS);
 6180   match(VecD);
 6181 
 6182   format %{ %}
 6183   interface(REG_INTER);
 6184 %}
 6185 
 6186 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6187 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6188 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6189 // runtime code generation via reg_class_dynamic.
 6190 operand legVec() %{
 6191   constraint(ALLOC_IN_RC(dynamic));
 6192   match(VecX);
 6193   match(VecY);
 6194   match(VecZ);
 6195   match(VecS);
 6196   match(VecD);
 6197 
 6198   format %{ %}
 6199   interface(REG_INTER);
 6200 %}
 6201 
 6202 // Replaces vec during post-selection cleanup. See above.
 6203 operand vecS() %{
 6204   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6205   match(VecS);
 6206 
 6207   format %{ %}
 6208   interface(REG_INTER);
 6209 %}
 6210 
 6211 // Replaces legVec during post-selection cleanup. See above.
 6212 operand legVecS() %{
 6213   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6214   match(VecS);
 6215 
 6216   format %{ %}
 6217   interface(REG_INTER);
 6218 %}
 6219 
 6220 // Replaces vec during post-selection cleanup. See above.
 6221 operand vecD() %{
 6222   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6223   match(VecD);
 6224 
 6225   format %{ %}
 6226   interface(REG_INTER);
 6227 %}
 6228 
 6229 // Replaces legVec during post-selection cleanup. See above.
 6230 operand legVecD() %{
 6231   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6232   match(VecD);
 6233 
 6234   format %{ %}
 6235   interface(REG_INTER);
 6236 %}
 6237 
 6238 // Replaces vec during post-selection cleanup. See above.
 6239 operand vecX() %{
 6240   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6241   match(VecX);
 6242 
 6243   format %{ %}
 6244   interface(REG_INTER);
 6245 %}
 6246 
 6247 // Replaces legVec during post-selection cleanup. See above.
 6248 operand legVecX() %{
 6249   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6250   match(VecX);
 6251 
 6252   format %{ %}
 6253   interface(REG_INTER);
 6254 %}
 6255 
 6256 // Replaces vec during post-selection cleanup. See above.
 6257 operand vecY() %{
 6258   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6259   match(VecY);
 6260 
 6261   format %{ %}
 6262   interface(REG_INTER);
 6263 %}
 6264 
 6265 // Replaces legVec during post-selection cleanup. See above.
 6266 operand legVecY() %{
 6267   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6268   match(VecY);
 6269 
 6270   format %{ %}
 6271   interface(REG_INTER);
 6272 %}
 6273 
 6274 // Replaces vec during post-selection cleanup. See above.
 6275 operand vecZ() %{
 6276   constraint(ALLOC_IN_RC(vectorz_reg));
 6277   match(VecZ);
 6278 
 6279   format %{ %}
 6280   interface(REG_INTER);
 6281 %}
 6282 
 6283 // Replaces legVec during post-selection cleanup. See above.
 6284 operand legVecZ() %{
 6285   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6286   match(VecZ);
 6287 
 6288   format %{ %}
 6289   interface(REG_INTER);
 6290 %}
 6291 
 6292 //----------OPERAND CLASSES----------------------------------------------------
 6293 // Operand Classes are groups of operands that are used as to simplify
 6294 // instruction definitions by not requiring the AD writer to specify separate
 6295 // instructions for every form of operand when the instruction accepts
 6296 // multiple operand types with the same basic encoding and format.  The classic
 6297 // case of this is memory operands.
 6298 
 6299 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6300                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6301                indCompressedOop, indCompressedOopOffset,
 6302                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6303                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6304                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6305 
 6306 //----------PIPELINE-----------------------------------------------------------
 6307 // Rules which define the behavior of the target architectures pipeline.
 6308 pipeline %{
 6309 
 6310 //----------ATTRIBUTES---------------------------------------------------------
 6311 attributes %{
 6312   variable_size_instructions;        // Fixed size instructions
 6313   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6314   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6315   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6316   instruction_fetch_units = 1;       // of 16 bytes
 6317 %}
 6318 
 6319 //----------RESOURCES----------------------------------------------------------
 6320 // Resources are the functional units available to the machine
 6321 
 6322 // Generic P2/P3 pipeline
 6323 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6324 // 3 instructions decoded per cycle.
 6325 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6326 // 3 ALU op, only ALU0 handles mul instructions.
 6327 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6328            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6329            BR, FPU,
 6330            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6331 
 6332 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6333 // Pipeline Description specifies the stages in the machine's pipeline
 6334 
 6335 // Generic P2/P3 pipeline
 6336 pipe_desc(S0, S1, S2, S3, S4, S5);
 6337 
 6338 //----------PIPELINE CLASSES---------------------------------------------------
 6339 // Pipeline Classes describe the stages in which input and output are
 6340 // referenced by the hardware pipeline.
 6341 
 6342 // Naming convention: ialu or fpu
 6343 // Then: _reg
 6344 // Then: _reg if there is a 2nd register
 6345 // Then: _long if it's a pair of instructions implementing a long
 6346 // Then: _fat if it requires the big decoder
 6347 //   Or: _mem if it requires the big decoder and a memory unit.
 6348 
 6349 // Integer ALU reg operation
 6350 pipe_class ialu_reg(rRegI dst)
 6351 %{
 6352     single_instruction;
 6353     dst    : S4(write);
 6354     dst    : S3(read);
 6355     DECODE : S0;        // any decoder
 6356     ALU    : S3;        // any alu
 6357 %}
 6358 
 6359 // Long ALU reg operation
 6360 pipe_class ialu_reg_long(rRegL dst)
 6361 %{
 6362     instruction_count(2);
 6363     dst    : S4(write);
 6364     dst    : S3(read);
 6365     DECODE : S0(2);     // any 2 decoders
 6366     ALU    : S3(2);     // both alus
 6367 %}
 6368 
 6369 // Integer ALU reg operation using big decoder
 6370 pipe_class ialu_reg_fat(rRegI dst)
 6371 %{
 6372     single_instruction;
 6373     dst    : S4(write);
 6374     dst    : S3(read);
 6375     D0     : S0;        // big decoder only
 6376     ALU    : S3;        // any alu
 6377 %}
 6378 
 6379 // Integer ALU reg-reg operation
 6380 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6381 %{
 6382     single_instruction;
 6383     dst    : S4(write);
 6384     src    : S3(read);
 6385     DECODE : S0;        // any decoder
 6386     ALU    : S3;        // any alu
 6387 %}
 6388 
 6389 // Integer ALU reg-reg operation
 6390 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6391 %{
 6392     single_instruction;
 6393     dst    : S4(write);
 6394     src    : S3(read);
 6395     D0     : S0;        // big decoder only
 6396     ALU    : S3;        // any alu
 6397 %}
 6398 
 6399 // Integer ALU reg-mem operation
 6400 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6401 %{
 6402     single_instruction;
 6403     dst    : S5(write);
 6404     mem    : S3(read);
 6405     D0     : S0;        // big decoder only
 6406     ALU    : S4;        // any alu
 6407     MEM    : S3;        // any mem
 6408 %}
 6409 
 6410 // Integer mem operation (prefetch)
 6411 pipe_class ialu_mem(memory mem)
 6412 %{
 6413     single_instruction;
 6414     mem    : S3(read);
 6415     D0     : S0;        // big decoder only
 6416     MEM    : S3;        // any mem
 6417 %}
 6418 
 6419 // Integer Store to Memory
 6420 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6421 %{
 6422     single_instruction;
 6423     mem    : S3(read);
 6424     src    : S5(read);
 6425     D0     : S0;        // big decoder only
 6426     ALU    : S4;        // any alu
 6427     MEM    : S3;
 6428 %}
 6429 
 6430 // // Long Store to Memory
 6431 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6432 // %{
 6433 //     instruction_count(2);
 6434 //     mem    : S3(read);
 6435 //     src    : S5(read);
 6436 //     D0     : S0(2);          // big decoder only; twice
 6437 //     ALU    : S4(2);     // any 2 alus
 6438 //     MEM    : S3(2);  // Both mems
 6439 // %}
 6440 
 6441 // Integer Store to Memory
 6442 pipe_class ialu_mem_imm(memory mem)
 6443 %{
 6444     single_instruction;
 6445     mem    : S3(read);
 6446     D0     : S0;        // big decoder only
 6447     ALU    : S4;        // any alu
 6448     MEM    : S3;
 6449 %}
 6450 
 6451 // Integer ALU0 reg-reg operation
 6452 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6453 %{
 6454     single_instruction;
 6455     dst    : S4(write);
 6456     src    : S3(read);
 6457     D0     : S0;        // Big decoder only
 6458     ALU0   : S3;        // only alu0
 6459 %}
 6460 
 6461 // Integer ALU0 reg-mem operation
 6462 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6463 %{
 6464     single_instruction;
 6465     dst    : S5(write);
 6466     mem    : S3(read);
 6467     D0     : S0;        // big decoder only
 6468     ALU0   : S4;        // ALU0 only
 6469     MEM    : S3;        // any mem
 6470 %}
 6471 
 6472 // Integer ALU reg-reg operation
 6473 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6474 %{
 6475     single_instruction;
 6476     cr     : S4(write);
 6477     src1   : S3(read);
 6478     src2   : S3(read);
 6479     DECODE : S0;        // any decoder
 6480     ALU    : S3;        // any alu
 6481 %}
 6482 
 6483 // Integer ALU reg-imm operation
 6484 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6485 %{
 6486     single_instruction;
 6487     cr     : S4(write);
 6488     src1   : S3(read);
 6489     DECODE : S0;        // any decoder
 6490     ALU    : S3;        // any alu
 6491 %}
 6492 
 6493 // Integer ALU reg-mem operation
 6494 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6495 %{
 6496     single_instruction;
 6497     cr     : S4(write);
 6498     src1   : S3(read);
 6499     src2   : S3(read);
 6500     D0     : S0;        // big decoder only
 6501     ALU    : S4;        // any alu
 6502     MEM    : S3;
 6503 %}
 6504 
 6505 // Conditional move reg-reg
 6506 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6507 %{
 6508     instruction_count(4);
 6509     y      : S4(read);
 6510     q      : S3(read);
 6511     p      : S3(read);
 6512     DECODE : S0(4);     // any decoder
 6513 %}
 6514 
 6515 // Conditional move reg-reg
 6516 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6517 %{
 6518     single_instruction;
 6519     dst    : S4(write);
 6520     src    : S3(read);
 6521     cr     : S3(read);
 6522     DECODE : S0;        // any decoder
 6523 %}
 6524 
 6525 // Conditional move reg-mem
 6526 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6527 %{
 6528     single_instruction;
 6529     dst    : S4(write);
 6530     src    : S3(read);
 6531     cr     : S3(read);
 6532     DECODE : S0;        // any decoder
 6533     MEM    : S3;
 6534 %}
 6535 
 6536 // Conditional move reg-reg long
 6537 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6538 %{
 6539     single_instruction;
 6540     dst    : S4(write);
 6541     src    : S3(read);
 6542     cr     : S3(read);
 6543     DECODE : S0(2);     // any 2 decoders
 6544 %}
 6545 
 6546 // Float reg-reg operation
 6547 pipe_class fpu_reg(regD dst)
 6548 %{
 6549     instruction_count(2);
 6550     dst    : S3(read);
 6551     DECODE : S0(2);     // any 2 decoders
 6552     FPU    : S3;
 6553 %}
 6554 
 6555 // Float reg-reg operation
 6556 pipe_class fpu_reg_reg(regD dst, regD src)
 6557 %{
 6558     instruction_count(2);
 6559     dst    : S4(write);
 6560     src    : S3(read);
 6561     DECODE : S0(2);     // any 2 decoders
 6562     FPU    : S3;
 6563 %}
 6564 
 6565 // Float reg-reg operation
 6566 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6567 %{
 6568     instruction_count(3);
 6569     dst    : S4(write);
 6570     src1   : S3(read);
 6571     src2   : S3(read);
 6572     DECODE : S0(3);     // any 3 decoders
 6573     FPU    : S3(2);
 6574 %}
 6575 
 6576 // Float reg-reg operation
 6577 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6578 %{
 6579     instruction_count(4);
 6580     dst    : S4(write);
 6581     src1   : S3(read);
 6582     src2   : S3(read);
 6583     src3   : S3(read);
 6584     DECODE : S0(4);     // any 3 decoders
 6585     FPU    : S3(2);
 6586 %}
 6587 
 6588 // Float reg-reg operation
 6589 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6590 %{
 6591     instruction_count(4);
 6592     dst    : S4(write);
 6593     src1   : S3(read);
 6594     src2   : S3(read);
 6595     src3   : S3(read);
 6596     DECODE : S1(3);     // any 3 decoders
 6597     D0     : S0;        // Big decoder only
 6598     FPU    : S3(2);
 6599     MEM    : S3;
 6600 %}
 6601 
 6602 // Float reg-mem operation
 6603 pipe_class fpu_reg_mem(regD dst, memory mem)
 6604 %{
 6605     instruction_count(2);
 6606     dst    : S5(write);
 6607     mem    : S3(read);
 6608     D0     : S0;        // big decoder only
 6609     DECODE : S1;        // any decoder for FPU POP
 6610     FPU    : S4;
 6611     MEM    : S3;        // any mem
 6612 %}
 6613 
 6614 // Float reg-mem operation
 6615 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6616 %{
 6617     instruction_count(3);
 6618     dst    : S5(write);
 6619     src1   : S3(read);
 6620     mem    : S3(read);
 6621     D0     : S0;        // big decoder only
 6622     DECODE : S1(2);     // any decoder for FPU POP
 6623     FPU    : S4;
 6624     MEM    : S3;        // any mem
 6625 %}
 6626 
 6627 // Float mem-reg operation
 6628 pipe_class fpu_mem_reg(memory mem, regD src)
 6629 %{
 6630     instruction_count(2);
 6631     src    : S5(read);
 6632     mem    : S3(read);
 6633     DECODE : S0;        // any decoder for FPU PUSH
 6634     D0     : S1;        // big decoder only
 6635     FPU    : S4;
 6636     MEM    : S3;        // any mem
 6637 %}
 6638 
 6639 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6640 %{
 6641     instruction_count(3);
 6642     src1   : S3(read);
 6643     src2   : S3(read);
 6644     mem    : S3(read);
 6645     DECODE : S0(2);     // any decoder for FPU PUSH
 6646     D0     : S1;        // big decoder only
 6647     FPU    : S4;
 6648     MEM    : S3;        // any mem
 6649 %}
 6650 
 6651 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6652 %{
 6653     instruction_count(3);
 6654     src1   : S3(read);
 6655     src2   : S3(read);
 6656     mem    : S4(read);
 6657     DECODE : S0;        // any decoder for FPU PUSH
 6658     D0     : S0(2);     // big decoder only
 6659     FPU    : S4;
 6660     MEM    : S3(2);     // any mem
 6661 %}
 6662 
 6663 pipe_class fpu_mem_mem(memory dst, memory src1)
 6664 %{
 6665     instruction_count(2);
 6666     src1   : S3(read);
 6667     dst    : S4(read);
 6668     D0     : S0(2);     // big decoder only
 6669     MEM    : S3(2);     // any mem
 6670 %}
 6671 
 6672 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6673 %{
 6674     instruction_count(3);
 6675     src1   : S3(read);
 6676     src2   : S3(read);
 6677     dst    : S4(read);
 6678     D0     : S0(3);     // big decoder only
 6679     FPU    : S4;
 6680     MEM    : S3(3);     // any mem
 6681 %}
 6682 
 6683 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6684 %{
 6685     instruction_count(3);
 6686     src1   : S4(read);
 6687     mem    : S4(read);
 6688     DECODE : S0;        // any decoder for FPU PUSH
 6689     D0     : S0(2);     // big decoder only
 6690     FPU    : S4;
 6691     MEM    : S3(2);     // any mem
 6692 %}
 6693 
 6694 // Float load constant
 6695 pipe_class fpu_reg_con(regD dst)
 6696 %{
 6697     instruction_count(2);
 6698     dst    : S5(write);
 6699     D0     : S0;        // big decoder only for the load
 6700     DECODE : S1;        // any decoder for FPU POP
 6701     FPU    : S4;
 6702     MEM    : S3;        // any mem
 6703 %}
 6704 
 6705 // Float load constant
 6706 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6707 %{
 6708     instruction_count(3);
 6709     dst    : S5(write);
 6710     src    : S3(read);
 6711     D0     : S0;        // big decoder only for the load
 6712     DECODE : S1(2);     // any decoder for FPU POP
 6713     FPU    : S4;
 6714     MEM    : S3;        // any mem
 6715 %}
 6716 
 6717 // UnConditional branch
 6718 pipe_class pipe_jmp(label labl)
 6719 %{
 6720     single_instruction;
 6721     BR   : S3;
 6722 %}
 6723 
 6724 // Conditional branch
 6725 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6726 %{
 6727     single_instruction;
 6728     cr    : S1(read);
 6729     BR    : S3;
 6730 %}
 6731 
 6732 // Allocation idiom
 6733 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6734 %{
 6735     instruction_count(1); force_serialization;
 6736     fixed_latency(6);
 6737     heap_ptr : S3(read);
 6738     DECODE   : S0(3);
 6739     D0       : S2;
 6740     MEM      : S3;
 6741     ALU      : S3(2);
 6742     dst      : S5(write);
 6743     BR       : S5;
 6744 %}
 6745 
 6746 // Generic big/slow expanded idiom
 6747 pipe_class pipe_slow()
 6748 %{
 6749     instruction_count(10); multiple_bundles; force_serialization;
 6750     fixed_latency(100);
 6751     D0  : S0(2);
 6752     MEM : S3(2);
 6753 %}
 6754 
 6755 // The real do-nothing guy
 6756 pipe_class empty()
 6757 %{
 6758     instruction_count(0);
 6759 %}
 6760 
 6761 // Define the class for the Nop node
 6762 define
 6763 %{
 6764    MachNop = empty;
 6765 %}
 6766 
 6767 %}
 6768 
 6769 //----------INSTRUCTIONS-------------------------------------------------------
 6770 //
 6771 // match      -- States which machine-independent subtree may be replaced
 6772 //               by this instruction.
 6773 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6774 //               selection to identify a minimum cost tree of machine
 6775 //               instructions that matches a tree of machine-independent
 6776 //               instructions.
 6777 // format     -- A string providing the disassembly for this instruction.
 6778 //               The value of an instruction's operand may be inserted
 6779 //               by referring to it with a '$' prefix.
 6780 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6781 //               to within an encode class as $primary, $secondary, and $tertiary
 6782 //               rrspectively.  The primary opcode is commonly used to
 6783 //               indicate the type of machine instruction, while secondary
 6784 //               and tertiary are often used for prefix options or addressing
 6785 //               modes.
 6786 // ins_encode -- A list of encode classes with parameters. The encode class
 6787 //               name must have been defined in an 'enc_class' specification
 6788 //               in the encode section of the architecture description.
 6789 
 6790 // ============================================================================
 6791 
 6792 instruct ShouldNotReachHere() %{
 6793   match(Halt);
 6794   format %{ "stop\t# ShouldNotReachHere" %}
 6795   ins_encode %{
 6796     if (is_reachable()) {
 6797       const char* str = __ code_string(_halt_reason);
 6798       __ stop(str);
 6799     }
 6800   %}
 6801   ins_pipe(pipe_slow);
 6802 %}
 6803 
 6804 // ============================================================================
 6805 
 6806 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6807 // Load Float
 6808 instruct MoveF2VL(vlRegF dst, regF src) %{
 6809   match(Set dst src);
 6810   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6811   ins_encode %{
 6812     ShouldNotReachHere();
 6813   %}
 6814   ins_pipe( fpu_reg_reg );
 6815 %}
 6816 
 6817 // Load Float
 6818 instruct MoveF2LEG(legRegF dst, regF src) %{
 6819   match(Set dst src);
 6820   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6821   ins_encode %{
 6822     ShouldNotReachHere();
 6823   %}
 6824   ins_pipe( fpu_reg_reg );
 6825 %}
 6826 
 6827 // Load Float
 6828 instruct MoveVL2F(regF dst, vlRegF src) %{
 6829   match(Set dst src);
 6830   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6831   ins_encode %{
 6832     ShouldNotReachHere();
 6833   %}
 6834   ins_pipe( fpu_reg_reg );
 6835 %}
 6836 
 6837 // Load Float
 6838 instruct MoveLEG2F(regF dst, legRegF src) %{
 6839   match(Set dst src);
 6840   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6841   ins_encode %{
 6842     ShouldNotReachHere();
 6843   %}
 6844   ins_pipe( fpu_reg_reg );
 6845 %}
 6846 
 6847 // Load Double
 6848 instruct MoveD2VL(vlRegD dst, regD src) %{
 6849   match(Set dst src);
 6850   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6851   ins_encode %{
 6852     ShouldNotReachHere();
 6853   %}
 6854   ins_pipe( fpu_reg_reg );
 6855 %}
 6856 
 6857 // Load Double
 6858 instruct MoveD2LEG(legRegD dst, regD src) %{
 6859   match(Set dst src);
 6860   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6861   ins_encode %{
 6862     ShouldNotReachHere();
 6863   %}
 6864   ins_pipe( fpu_reg_reg );
 6865 %}
 6866 
 6867 // Load Double
 6868 instruct MoveVL2D(regD dst, vlRegD src) %{
 6869   match(Set dst src);
 6870   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6871   ins_encode %{
 6872     ShouldNotReachHere();
 6873   %}
 6874   ins_pipe( fpu_reg_reg );
 6875 %}
 6876 
 6877 // Load Double
 6878 instruct MoveLEG2D(regD dst, legRegD src) %{
 6879   match(Set dst src);
 6880   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6881   ins_encode %{
 6882     ShouldNotReachHere();
 6883   %}
 6884   ins_pipe( fpu_reg_reg );
 6885 %}
 6886 
 6887 //----------Load/Store/Move Instructions---------------------------------------
 6888 //----------Load Instructions--------------------------------------------------
 6889 
 6890 // Load Byte (8 bit signed)
 6891 instruct loadB(rRegI dst, memory mem)
 6892 %{
 6893   match(Set dst (LoadB mem));
 6894 
 6895   ins_cost(125);
 6896   format %{ "movsbl  $dst, $mem\t# byte" %}
 6897 
 6898   ins_encode %{
 6899     __ movsbl($dst$$Register, $mem$$Address);
 6900   %}
 6901 
 6902   ins_pipe(ialu_reg_mem);
 6903 %}
 6904 
 6905 // Load Byte (8 bit signed) into Long Register
 6906 instruct loadB2L(rRegL dst, memory mem)
 6907 %{
 6908   match(Set dst (ConvI2L (LoadB mem)));
 6909 
 6910   ins_cost(125);
 6911   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6912 
 6913   ins_encode %{
 6914     __ movsbq($dst$$Register, $mem$$Address);
 6915   %}
 6916 
 6917   ins_pipe(ialu_reg_mem);
 6918 %}
 6919 
 6920 // Load Unsigned Byte (8 bit UNsigned)
 6921 instruct loadUB(rRegI dst, memory mem)
 6922 %{
 6923   match(Set dst (LoadUB mem));
 6924 
 6925   ins_cost(125);
 6926   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6927 
 6928   ins_encode %{
 6929     __ movzbl($dst$$Register, $mem$$Address);
 6930   %}
 6931 
 6932   ins_pipe(ialu_reg_mem);
 6933 %}
 6934 
 6935 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6936 instruct loadUB2L(rRegL dst, memory mem)
 6937 %{
 6938   match(Set dst (ConvI2L (LoadUB mem)));
 6939 
 6940   ins_cost(125);
 6941   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6942 
 6943   ins_encode %{
 6944     __ movzbq($dst$$Register, $mem$$Address);
 6945   %}
 6946 
 6947   ins_pipe(ialu_reg_mem);
 6948 %}
 6949 
 6950 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6951 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6952   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6953   effect(KILL cr);
 6954 
 6955   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6956             "andl    $dst, right_n_bits($mask, 8)" %}
 6957   ins_encode %{
 6958     Register Rdst = $dst$$Register;
 6959     __ movzbq(Rdst, $mem$$Address);
 6960     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6961   %}
 6962   ins_pipe(ialu_reg_mem);
 6963 %}
 6964 
 6965 // Load Short (16 bit signed)
 6966 instruct loadS(rRegI dst, memory mem)
 6967 %{
 6968   match(Set dst (LoadS mem));
 6969 
 6970   ins_cost(125);
 6971   format %{ "movswl $dst, $mem\t# short" %}
 6972 
 6973   ins_encode %{
 6974     __ movswl($dst$$Register, $mem$$Address);
 6975   %}
 6976 
 6977   ins_pipe(ialu_reg_mem);
 6978 %}
 6979 
 6980 // Load Short (16 bit signed) to Byte (8 bit signed)
 6981 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6982   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6983 
 6984   ins_cost(125);
 6985   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6986   ins_encode %{
 6987     __ movsbl($dst$$Register, $mem$$Address);
 6988   %}
 6989   ins_pipe(ialu_reg_mem);
 6990 %}
 6991 
 6992 // Load Short (16 bit signed) into Long Register
 6993 instruct loadS2L(rRegL dst, memory mem)
 6994 %{
 6995   match(Set dst (ConvI2L (LoadS mem)));
 6996 
 6997   ins_cost(125);
 6998   format %{ "movswq $dst, $mem\t# short -> long" %}
 6999 
 7000   ins_encode %{
 7001     __ movswq($dst$$Register, $mem$$Address);
 7002   %}
 7003 
 7004   ins_pipe(ialu_reg_mem);
 7005 %}
 7006 
 7007 // Load Unsigned Short/Char (16 bit UNsigned)
 7008 instruct loadUS(rRegI dst, memory mem)
 7009 %{
 7010   match(Set dst (LoadUS mem));
 7011 
 7012   ins_cost(125);
 7013   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7014 
 7015   ins_encode %{
 7016     __ movzwl($dst$$Register, $mem$$Address);
 7017   %}
 7018 
 7019   ins_pipe(ialu_reg_mem);
 7020 %}
 7021 
 7022 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7023 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7024   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7025 
 7026   ins_cost(125);
 7027   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7028   ins_encode %{
 7029     __ movsbl($dst$$Register, $mem$$Address);
 7030   %}
 7031   ins_pipe(ialu_reg_mem);
 7032 %}
 7033 
 7034 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7035 instruct loadUS2L(rRegL dst, memory mem)
 7036 %{
 7037   match(Set dst (ConvI2L (LoadUS mem)));
 7038 
 7039   ins_cost(125);
 7040   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7041 
 7042   ins_encode %{
 7043     __ movzwq($dst$$Register, $mem$$Address);
 7044   %}
 7045 
 7046   ins_pipe(ialu_reg_mem);
 7047 %}
 7048 
 7049 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7050 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7051   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7052 
 7053   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7054   ins_encode %{
 7055     __ movzbq($dst$$Register, $mem$$Address);
 7056   %}
 7057   ins_pipe(ialu_reg_mem);
 7058 %}
 7059 
 7060 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7061 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7062   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7063   effect(KILL cr);
 7064 
 7065   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7066             "andl    $dst, right_n_bits($mask, 16)" %}
 7067   ins_encode %{
 7068     Register Rdst = $dst$$Register;
 7069     __ movzwq(Rdst, $mem$$Address);
 7070     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7071   %}
 7072   ins_pipe(ialu_reg_mem);
 7073 %}
 7074 
 7075 // Load Integer
 7076 instruct loadI(rRegI dst, memory mem)
 7077 %{
 7078   match(Set dst (LoadI mem));
 7079 
 7080   ins_cost(125);
 7081   format %{ "movl    $dst, $mem\t# int" %}
 7082 
 7083   ins_encode %{
 7084     __ movl($dst$$Register, $mem$$Address);
 7085   %}
 7086 
 7087   ins_pipe(ialu_reg_mem);
 7088 %}
 7089 
 7090 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7091 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7092   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7093 
 7094   ins_cost(125);
 7095   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7096   ins_encode %{
 7097     __ movsbl($dst$$Register, $mem$$Address);
 7098   %}
 7099   ins_pipe(ialu_reg_mem);
 7100 %}
 7101 
 7102 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7103 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7104   match(Set dst (AndI (LoadI mem) mask));
 7105 
 7106   ins_cost(125);
 7107   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7108   ins_encode %{
 7109     __ movzbl($dst$$Register, $mem$$Address);
 7110   %}
 7111   ins_pipe(ialu_reg_mem);
 7112 %}
 7113 
 7114 // Load Integer (32 bit signed) to Short (16 bit signed)
 7115 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7116   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7117 
 7118   ins_cost(125);
 7119   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7120   ins_encode %{
 7121     __ movswl($dst$$Register, $mem$$Address);
 7122   %}
 7123   ins_pipe(ialu_reg_mem);
 7124 %}
 7125 
 7126 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7127 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7128   match(Set dst (AndI (LoadI mem) mask));
 7129 
 7130   ins_cost(125);
 7131   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7132   ins_encode %{
 7133     __ movzwl($dst$$Register, $mem$$Address);
 7134   %}
 7135   ins_pipe(ialu_reg_mem);
 7136 %}
 7137 
 7138 // Load Integer into Long Register
 7139 instruct loadI2L(rRegL dst, memory mem)
 7140 %{
 7141   match(Set dst (ConvI2L (LoadI mem)));
 7142 
 7143   ins_cost(125);
 7144   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7145 
 7146   ins_encode %{
 7147     __ movslq($dst$$Register, $mem$$Address);
 7148   %}
 7149 
 7150   ins_pipe(ialu_reg_mem);
 7151 %}
 7152 
 7153 // Load Integer with mask 0xFF into Long Register
 7154 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7155   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7156 
 7157   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7158   ins_encode %{
 7159     __ movzbq($dst$$Register, $mem$$Address);
 7160   %}
 7161   ins_pipe(ialu_reg_mem);
 7162 %}
 7163 
 7164 // Load Integer with mask 0xFFFF into Long Register
 7165 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7166   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7167 
 7168   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7169   ins_encode %{
 7170     __ movzwq($dst$$Register, $mem$$Address);
 7171   %}
 7172   ins_pipe(ialu_reg_mem);
 7173 %}
 7174 
 7175 // Load Integer with a 31-bit mask into Long Register
 7176 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7177   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7178   effect(KILL cr);
 7179 
 7180   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7181             "andl    $dst, $mask" %}
 7182   ins_encode %{
 7183     Register Rdst = $dst$$Register;
 7184     __ movl(Rdst, $mem$$Address);
 7185     __ andl(Rdst, $mask$$constant);
 7186   %}
 7187   ins_pipe(ialu_reg_mem);
 7188 %}
 7189 
 7190 // Load Unsigned Integer into Long Register
 7191 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7192 %{
 7193   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7194 
 7195   ins_cost(125);
 7196   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7197 
 7198   ins_encode %{
 7199     __ movl($dst$$Register, $mem$$Address);
 7200   %}
 7201 
 7202   ins_pipe(ialu_reg_mem);
 7203 %}
 7204 
 7205 // Load Long
 7206 instruct loadL(rRegL dst, memory mem)
 7207 %{
 7208   match(Set dst (LoadL mem));
 7209 
 7210   ins_cost(125);
 7211   format %{ "movq    $dst, $mem\t# long" %}
 7212 
 7213   ins_encode %{
 7214     __ movq($dst$$Register, $mem$$Address);
 7215   %}
 7216 
 7217   ins_pipe(ialu_reg_mem); // XXX
 7218 %}
 7219 
 7220 // Load Range
 7221 instruct loadRange(rRegI dst, memory mem)
 7222 %{
 7223   match(Set dst (LoadRange mem));
 7224 
 7225   ins_cost(125); // XXX
 7226   format %{ "movl    $dst, $mem\t# range" %}
 7227   ins_encode %{
 7228     __ movl($dst$$Register, $mem$$Address);
 7229   %}
 7230   ins_pipe(ialu_reg_mem);
 7231 %}
 7232 
 7233 // Load Pointer
 7234 instruct loadP(rRegP dst, memory mem)
 7235 %{
 7236   match(Set dst (LoadP mem));
 7237   predicate(n->as_Load()->barrier_data() == 0);
 7238 
 7239   ins_cost(125); // XXX
 7240   format %{ "movq    $dst, $mem\t# ptr" %}
 7241   ins_encode %{
 7242     __ movq($dst$$Register, $mem$$Address);
 7243   %}
 7244   ins_pipe(ialu_reg_mem); // XXX
 7245 %}
 7246 
 7247 // Load Compressed Pointer
 7248 instruct loadN(rRegN dst, memory mem)
 7249 %{
 7250    predicate(n->as_Load()->barrier_data() == 0);
 7251    match(Set dst (LoadN mem));
 7252 
 7253    ins_cost(125); // XXX
 7254    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7255    ins_encode %{
 7256      __ movl($dst$$Register, $mem$$Address);
 7257    %}
 7258    ins_pipe(ialu_reg_mem); // XXX
 7259 %}
 7260 
 7261 
 7262 // Load Klass Pointer
 7263 instruct loadKlass(rRegP dst, memory mem)
 7264 %{
 7265   match(Set dst (LoadKlass mem));
 7266 
 7267   ins_cost(125); // XXX
 7268   format %{ "movq    $dst, $mem\t# class" %}
 7269   ins_encode %{
 7270     __ movq($dst$$Register, $mem$$Address);
 7271   %}
 7272   ins_pipe(ialu_reg_mem); // XXX
 7273 %}
 7274 
 7275 // Load narrow Klass Pointer
 7276 instruct loadNKlass(rRegN dst, memory mem)
 7277 %{
 7278   predicate(!UseCompactObjectHeaders);
 7279   match(Set dst (LoadNKlass mem));
 7280 
 7281   ins_cost(125); // XXX
 7282   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7283   ins_encode %{
 7284     __ movl($dst$$Register, $mem$$Address);
 7285   %}
 7286   ins_pipe(ialu_reg_mem); // XXX
 7287 %}
 7288 
 7289 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7290 %{
 7291   predicate(UseCompactObjectHeaders);
 7292   match(Set dst (LoadNKlass mem));
 7293   effect(KILL cr);
 7294   ins_cost(125);
 7295   format %{
 7296     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7297     "shrl    $dst, markWord::klass_shift_at_offset"
 7298   %}
 7299   ins_encode %{
 7300     if (UseAPX) {
 7301       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7302     }
 7303     else {
 7304       __ movl($dst$$Register, $mem$$Address);
 7305       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7306     }
 7307   %}
 7308   ins_pipe(ialu_reg_mem);
 7309 %}
 7310 
 7311 // Load Float
 7312 instruct loadF(regF dst, memory mem)
 7313 %{
 7314   match(Set dst (LoadF mem));
 7315 
 7316   ins_cost(145); // XXX
 7317   format %{ "movss   $dst, $mem\t# float" %}
 7318   ins_encode %{
 7319     __ movflt($dst$$XMMRegister, $mem$$Address);
 7320   %}
 7321   ins_pipe(pipe_slow); // XXX
 7322 %}
 7323 
 7324 // Load Double
 7325 instruct loadD_partial(regD dst, memory mem)
 7326 %{
 7327   predicate(!UseXmmLoadAndClearUpper);
 7328   match(Set dst (LoadD mem));
 7329 
 7330   ins_cost(145); // XXX
 7331   format %{ "movlpd  $dst, $mem\t# double" %}
 7332   ins_encode %{
 7333     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7334   %}
 7335   ins_pipe(pipe_slow); // XXX
 7336 %}
 7337 
 7338 instruct loadD(regD dst, memory mem)
 7339 %{
 7340   predicate(UseXmmLoadAndClearUpper);
 7341   match(Set dst (LoadD mem));
 7342 
 7343   ins_cost(145); // XXX
 7344   format %{ "movsd   $dst, $mem\t# double" %}
 7345   ins_encode %{
 7346     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7347   %}
 7348   ins_pipe(pipe_slow); // XXX
 7349 %}
 7350 
 7351 // max = java.lang.Math.max(float a, float b)
 7352 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7353   predicate(VM_Version::supports_avx10_2());
 7354   match(Set dst (MaxF a b));
 7355   format %{ "maxF $dst, $a, $b" %}
 7356   ins_encode %{
 7357     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7358   %}
 7359   ins_pipe( pipe_slow );
 7360 %}
 7361 
 7362 // max = java.lang.Math.max(float a, float b)
 7363 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7364   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7365   match(Set dst (MaxF a b));
 7366   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7367   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7368   ins_encode %{
 7369     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7370   %}
 7371   ins_pipe( pipe_slow );
 7372 %}
 7373 
 7374 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7375   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7376   match(Set dst (MaxF a b));
 7377   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7378 
 7379   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7380   ins_encode %{
 7381     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7382                     false /*min*/, true /*single*/);
 7383   %}
 7384   ins_pipe( pipe_slow );
 7385 %}
 7386 
 7387 // max = java.lang.Math.max(double a, double b)
 7388 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7389   predicate(VM_Version::supports_avx10_2());
 7390   match(Set dst (MaxD a b));
 7391   format %{ "maxD $dst, $a, $b" %}
 7392   ins_encode %{
 7393     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7394   %}
 7395   ins_pipe( pipe_slow );
 7396 %}
 7397 
 7398 // max = java.lang.Math.max(double a, double b)
 7399 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7400   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7401   match(Set dst (MaxD a b));
 7402   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7403   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7404   ins_encode %{
 7405     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7406   %}
 7407   ins_pipe( pipe_slow );
 7408 %}
 7409 
 7410 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7411   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7412   match(Set dst (MaxD a b));
 7413   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7414 
 7415   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7416   ins_encode %{
 7417     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7418                     false /*min*/, false /*single*/);
 7419   %}
 7420   ins_pipe( pipe_slow );
 7421 %}
 7422 
 7423 // max = java.lang.Math.min(float a, float b)
 7424 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7425   predicate(VM_Version::supports_avx10_2());
 7426   match(Set dst (MinF a b));
 7427   format %{ "minF $dst, $a, $b" %}
 7428   ins_encode %{
 7429     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7430   %}
 7431   ins_pipe( pipe_slow );
 7432 %}
 7433 
 7434 // min = java.lang.Math.min(float a, float b)
 7435 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7436   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7437   match(Set dst (MinF a b));
 7438   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7439   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7440   ins_encode %{
 7441     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7442   %}
 7443   ins_pipe( pipe_slow );
 7444 %}
 7445 
 7446 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7447   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7448   match(Set dst (MinF a b));
 7449   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7450 
 7451   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7452   ins_encode %{
 7453     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7454                     true /*min*/, true /*single*/);
 7455   %}
 7456   ins_pipe( pipe_slow );
 7457 %}
 7458 
 7459 // max = java.lang.Math.min(double a, double b)
 7460 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7461   predicate(VM_Version::supports_avx10_2());
 7462   match(Set dst (MinD a b));
 7463   format %{ "minD $dst, $a, $b" %}
 7464   ins_encode %{
 7465     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7466   %}
 7467   ins_pipe( pipe_slow );
 7468 %}
 7469 
 7470 // min = java.lang.Math.min(double a, double b)
 7471 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7472   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7473   match(Set dst (MinD a b));
 7474   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7475     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7476   ins_encode %{
 7477     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7478   %}
 7479   ins_pipe( pipe_slow );
 7480 %}
 7481 
 7482 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7483   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7484   match(Set dst (MinD a b));
 7485   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7486 
 7487   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7488   ins_encode %{
 7489     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7490                     true /*min*/, false /*single*/);
 7491   %}
 7492   ins_pipe( pipe_slow );
 7493 %}
 7494 
 7495 // Load Effective Address
 7496 instruct leaP8(rRegP dst, indOffset8 mem)
 7497 %{
 7498   match(Set dst mem);
 7499 
 7500   ins_cost(110); // XXX
 7501   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7502   ins_encode %{
 7503     __ leaq($dst$$Register, $mem$$Address);
 7504   %}
 7505   ins_pipe(ialu_reg_reg_fat);
 7506 %}
 7507 
 7508 instruct leaP32(rRegP dst, indOffset32 mem)
 7509 %{
 7510   match(Set dst mem);
 7511 
 7512   ins_cost(110);
 7513   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7514   ins_encode %{
 7515     __ leaq($dst$$Register, $mem$$Address);
 7516   %}
 7517   ins_pipe(ialu_reg_reg_fat);
 7518 %}
 7519 
 7520 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7521 %{
 7522   match(Set dst mem);
 7523 
 7524   ins_cost(110);
 7525   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7526   ins_encode %{
 7527     __ leaq($dst$$Register, $mem$$Address);
 7528   %}
 7529   ins_pipe(ialu_reg_reg_fat);
 7530 %}
 7531 
 7532 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7533 %{
 7534   match(Set dst mem);
 7535 
 7536   ins_cost(110);
 7537   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7538   ins_encode %{
 7539     __ leaq($dst$$Register, $mem$$Address);
 7540   %}
 7541   ins_pipe(ialu_reg_reg_fat);
 7542 %}
 7543 
 7544 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7545 %{
 7546   match(Set dst mem);
 7547 
 7548   ins_cost(110);
 7549   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7550   ins_encode %{
 7551     __ leaq($dst$$Register, $mem$$Address);
 7552   %}
 7553   ins_pipe(ialu_reg_reg_fat);
 7554 %}
 7555 
 7556 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7557 %{
 7558   match(Set dst mem);
 7559 
 7560   ins_cost(110);
 7561   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7562   ins_encode %{
 7563     __ leaq($dst$$Register, $mem$$Address);
 7564   %}
 7565   ins_pipe(ialu_reg_reg_fat);
 7566 %}
 7567 
 7568 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7569 %{
 7570   match(Set dst mem);
 7571 
 7572   ins_cost(110);
 7573   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7574   ins_encode %{
 7575     __ leaq($dst$$Register, $mem$$Address);
 7576   %}
 7577   ins_pipe(ialu_reg_reg_fat);
 7578 %}
 7579 
 7580 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7581 %{
 7582   match(Set dst mem);
 7583 
 7584   ins_cost(110);
 7585   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7586   ins_encode %{
 7587     __ leaq($dst$$Register, $mem$$Address);
 7588   %}
 7589   ins_pipe(ialu_reg_reg_fat);
 7590 %}
 7591 
 7592 // Load Effective Address which uses Narrow (32-bits) oop
 7593 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7594 %{
 7595   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7596   match(Set dst mem);
 7597 
 7598   ins_cost(110);
 7599   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7600   ins_encode %{
 7601     __ leaq($dst$$Register, $mem$$Address);
 7602   %}
 7603   ins_pipe(ialu_reg_reg_fat);
 7604 %}
 7605 
 7606 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7607 %{
 7608   predicate(CompressedOops::shift() == 0);
 7609   match(Set dst mem);
 7610 
 7611   ins_cost(110); // XXX
 7612   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7613   ins_encode %{
 7614     __ leaq($dst$$Register, $mem$$Address);
 7615   %}
 7616   ins_pipe(ialu_reg_reg_fat);
 7617 %}
 7618 
 7619 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7620 %{
 7621   predicate(CompressedOops::shift() == 0);
 7622   match(Set dst mem);
 7623 
 7624   ins_cost(110);
 7625   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7626   ins_encode %{
 7627     __ leaq($dst$$Register, $mem$$Address);
 7628   %}
 7629   ins_pipe(ialu_reg_reg_fat);
 7630 %}
 7631 
 7632 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7633 %{
 7634   predicate(CompressedOops::shift() == 0);
 7635   match(Set dst mem);
 7636 
 7637   ins_cost(110);
 7638   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7639   ins_encode %{
 7640     __ leaq($dst$$Register, $mem$$Address);
 7641   %}
 7642   ins_pipe(ialu_reg_reg_fat);
 7643 %}
 7644 
 7645 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7646 %{
 7647   predicate(CompressedOops::shift() == 0);
 7648   match(Set dst mem);
 7649 
 7650   ins_cost(110);
 7651   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7652   ins_encode %{
 7653     __ leaq($dst$$Register, $mem$$Address);
 7654   %}
 7655   ins_pipe(ialu_reg_reg_fat);
 7656 %}
 7657 
 7658 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7659 %{
 7660   predicate(CompressedOops::shift() == 0);
 7661   match(Set dst mem);
 7662 
 7663   ins_cost(110);
 7664   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7665   ins_encode %{
 7666     __ leaq($dst$$Register, $mem$$Address);
 7667   %}
 7668   ins_pipe(ialu_reg_reg_fat);
 7669 %}
 7670 
 7671 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7672 %{
 7673   predicate(CompressedOops::shift() == 0);
 7674   match(Set dst mem);
 7675 
 7676   ins_cost(110);
 7677   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7678   ins_encode %{
 7679     __ leaq($dst$$Register, $mem$$Address);
 7680   %}
 7681   ins_pipe(ialu_reg_reg_fat);
 7682 %}
 7683 
 7684 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7685 %{
 7686   predicate(CompressedOops::shift() == 0);
 7687   match(Set dst mem);
 7688 
 7689   ins_cost(110);
 7690   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7691   ins_encode %{
 7692     __ leaq($dst$$Register, $mem$$Address);
 7693   %}
 7694   ins_pipe(ialu_reg_reg_fat);
 7695 %}
 7696 
 7697 instruct loadConI(rRegI dst, immI src)
 7698 %{
 7699   match(Set dst src);
 7700 
 7701   format %{ "movl    $dst, $src\t# int" %}
 7702   ins_encode %{
 7703     __ movl($dst$$Register, $src$$constant);
 7704   %}
 7705   ins_pipe(ialu_reg_fat); // XXX
 7706 %}
 7707 
 7708 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7709 %{
 7710   match(Set dst src);
 7711   effect(KILL cr);
 7712 
 7713   ins_cost(50);
 7714   format %{ "xorl    $dst, $dst\t# int" %}
 7715   ins_encode %{
 7716     __ xorl($dst$$Register, $dst$$Register);
 7717   %}
 7718   ins_pipe(ialu_reg);
 7719 %}
 7720 
 7721 instruct loadConL(rRegL dst, immL src)
 7722 %{
 7723   match(Set dst src);
 7724 
 7725   ins_cost(150);
 7726   format %{ "movq    $dst, $src\t# long" %}
 7727   ins_encode %{
 7728     __ mov64($dst$$Register, $src$$constant);
 7729   %}
 7730   ins_pipe(ialu_reg);
 7731 %}
 7732 
 7733 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7734 %{
 7735   match(Set dst src);
 7736   effect(KILL cr);
 7737 
 7738   ins_cost(50);
 7739   format %{ "xorl    $dst, $dst\t# long" %}
 7740   ins_encode %{
 7741     __ xorl($dst$$Register, $dst$$Register);
 7742   %}
 7743   ins_pipe(ialu_reg); // XXX
 7744 %}
 7745 
 7746 instruct loadConUL32(rRegL dst, immUL32 src)
 7747 %{
 7748   match(Set dst src);
 7749 
 7750   ins_cost(60);
 7751   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7752   ins_encode %{
 7753     __ movl($dst$$Register, $src$$constant);
 7754   %}
 7755   ins_pipe(ialu_reg);
 7756 %}
 7757 
 7758 instruct loadConL32(rRegL dst, immL32 src)
 7759 %{
 7760   match(Set dst src);
 7761 
 7762   ins_cost(70);
 7763   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7764   ins_encode %{
 7765     __ movq($dst$$Register, $src$$constant);
 7766   %}
 7767   ins_pipe(ialu_reg);
 7768 %}
 7769 
 7770 instruct loadConP(rRegP dst, immP con) %{
 7771   match(Set dst con);
 7772 
 7773   format %{ "movq    $dst, $con\t# ptr" %}
 7774   ins_encode %{
 7775     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7776   %}
 7777   ins_pipe(ialu_reg_fat); // XXX
 7778 %}
 7779 
 7780 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7781 %{
 7782   match(Set dst src);
 7783   effect(KILL cr);
 7784 
 7785   ins_cost(50);
 7786   format %{ "xorl    $dst, $dst\t# ptr" %}
 7787   ins_encode %{
 7788     __ xorl($dst$$Register, $dst$$Register);
 7789   %}
 7790   ins_pipe(ialu_reg);
 7791 %}
 7792 
 7793 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7794 %{
 7795   match(Set dst src);
 7796   effect(KILL cr);
 7797 
 7798   ins_cost(60);
 7799   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7800   ins_encode %{
 7801     __ movl($dst$$Register, $src$$constant);
 7802   %}
 7803   ins_pipe(ialu_reg);
 7804 %}
 7805 
 7806 instruct loadConF(regF dst, immF con) %{
 7807   match(Set dst con);
 7808   ins_cost(125);
 7809   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7810   ins_encode %{
 7811     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7812   %}
 7813   ins_pipe(pipe_slow);
 7814 %}
 7815 
 7816 instruct loadConH(regF dst, immH con) %{
 7817   match(Set dst con);
 7818   ins_cost(125);
 7819   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7820   ins_encode %{
 7821     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7822   %}
 7823   ins_pipe(pipe_slow);
 7824 %}
 7825 
 7826 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7827   match(Set dst src);
 7828   effect(KILL cr);
 7829   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7830   ins_encode %{
 7831     __ xorq($dst$$Register, $dst$$Register);
 7832   %}
 7833   ins_pipe(ialu_reg);
 7834 %}
 7835 
 7836 instruct loadConN(rRegN dst, immN src) %{
 7837   match(Set dst src);
 7838 
 7839   ins_cost(125);
 7840   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7841   ins_encode %{
 7842     address con = (address)$src$$constant;
 7843     if (con == nullptr) {
 7844       ShouldNotReachHere();
 7845     } else {
 7846       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7847     }
 7848   %}
 7849   ins_pipe(ialu_reg_fat); // XXX
 7850 %}
 7851 
 7852 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7853   match(Set dst src);
 7854 
 7855   ins_cost(125);
 7856   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7857   ins_encode %{
 7858     address con = (address)$src$$constant;
 7859     if (con == nullptr) {
 7860       ShouldNotReachHere();
 7861     } else {
 7862       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7863     }
 7864   %}
 7865   ins_pipe(ialu_reg_fat); // XXX
 7866 %}
 7867 
 7868 instruct loadConF0(regF dst, immF0 src)
 7869 %{
 7870   match(Set dst src);
 7871   ins_cost(100);
 7872 
 7873   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7874   ins_encode %{
 7875     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7876   %}
 7877   ins_pipe(pipe_slow);
 7878 %}
 7879 
 7880 // Use the same format since predicate() can not be used here.
 7881 instruct loadConD(regD dst, immD con) %{
 7882   match(Set dst con);
 7883   ins_cost(125);
 7884   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7885   ins_encode %{
 7886     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7887   %}
 7888   ins_pipe(pipe_slow);
 7889 %}
 7890 
 7891 instruct loadConD0(regD dst, immD0 src)
 7892 %{
 7893   match(Set dst src);
 7894   ins_cost(100);
 7895 
 7896   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7897   ins_encode %{
 7898     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7899   %}
 7900   ins_pipe(pipe_slow);
 7901 %}
 7902 
 7903 instruct loadSSI(rRegI dst, stackSlotI src)
 7904 %{
 7905   match(Set dst src);
 7906 
 7907   ins_cost(125);
 7908   format %{ "movl    $dst, $src\t# int stk" %}
 7909   ins_encode %{
 7910     __ movl($dst$$Register, $src$$Address);
 7911   %}
 7912   ins_pipe(ialu_reg_mem);
 7913 %}
 7914 
 7915 instruct loadSSL(rRegL dst, stackSlotL src)
 7916 %{
 7917   match(Set dst src);
 7918 
 7919   ins_cost(125);
 7920   format %{ "movq    $dst, $src\t# long stk" %}
 7921   ins_encode %{
 7922     __ movq($dst$$Register, $src$$Address);
 7923   %}
 7924   ins_pipe(ialu_reg_mem);
 7925 %}
 7926 
 7927 instruct loadSSP(rRegP dst, stackSlotP src)
 7928 %{
 7929   match(Set dst src);
 7930 
 7931   ins_cost(125);
 7932   format %{ "movq    $dst, $src\t# ptr stk" %}
 7933   ins_encode %{
 7934     __ movq($dst$$Register, $src$$Address);
 7935   %}
 7936   ins_pipe(ialu_reg_mem);
 7937 %}
 7938 
 7939 instruct loadSSF(regF dst, stackSlotF src)
 7940 %{
 7941   match(Set dst src);
 7942 
 7943   ins_cost(125);
 7944   format %{ "movss   $dst, $src\t# float stk" %}
 7945   ins_encode %{
 7946     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7947   %}
 7948   ins_pipe(pipe_slow); // XXX
 7949 %}
 7950 
 7951 // Use the same format since predicate() can not be used here.
 7952 instruct loadSSD(regD dst, stackSlotD src)
 7953 %{
 7954   match(Set dst src);
 7955 
 7956   ins_cost(125);
 7957   format %{ "movsd   $dst, $src\t# double stk" %}
 7958   ins_encode  %{
 7959     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7960   %}
 7961   ins_pipe(pipe_slow); // XXX
 7962 %}
 7963 
 7964 // Prefetch instructions for allocation.
 7965 // Must be safe to execute with invalid address (cannot fault).
 7966 
 7967 instruct prefetchAlloc( memory mem ) %{
 7968   predicate(AllocatePrefetchInstr==3);
 7969   match(PrefetchAllocation mem);
 7970   ins_cost(125);
 7971 
 7972   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7973   ins_encode %{
 7974     __ prefetchw($mem$$Address);
 7975   %}
 7976   ins_pipe(ialu_mem);
 7977 %}
 7978 
 7979 instruct prefetchAllocNTA( memory mem ) %{
 7980   predicate(AllocatePrefetchInstr==0);
 7981   match(PrefetchAllocation mem);
 7982   ins_cost(125);
 7983 
 7984   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7985   ins_encode %{
 7986     __ prefetchnta($mem$$Address);
 7987   %}
 7988   ins_pipe(ialu_mem);
 7989 %}
 7990 
 7991 instruct prefetchAllocT0( memory mem ) %{
 7992   predicate(AllocatePrefetchInstr==1);
 7993   match(PrefetchAllocation mem);
 7994   ins_cost(125);
 7995 
 7996   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7997   ins_encode %{
 7998     __ prefetcht0($mem$$Address);
 7999   %}
 8000   ins_pipe(ialu_mem);
 8001 %}
 8002 
 8003 instruct prefetchAllocT2( memory mem ) %{
 8004   predicate(AllocatePrefetchInstr==2);
 8005   match(PrefetchAllocation mem);
 8006   ins_cost(125);
 8007 
 8008   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8009   ins_encode %{
 8010     __ prefetcht2($mem$$Address);
 8011   %}
 8012   ins_pipe(ialu_mem);
 8013 %}
 8014 
 8015 //----------Store Instructions-------------------------------------------------
 8016 
 8017 // Store Byte
 8018 instruct storeB(memory mem, rRegI src)
 8019 %{
 8020   match(Set mem (StoreB mem src));
 8021 
 8022   ins_cost(125); // XXX
 8023   format %{ "movb    $mem, $src\t# byte" %}
 8024   ins_encode %{
 8025     __ movb($mem$$Address, $src$$Register);
 8026   %}
 8027   ins_pipe(ialu_mem_reg);
 8028 %}
 8029 
 8030 // Store Char/Short
 8031 instruct storeC(memory mem, rRegI src)
 8032 %{
 8033   match(Set mem (StoreC mem src));
 8034 
 8035   ins_cost(125); // XXX
 8036   format %{ "movw    $mem, $src\t# char/short" %}
 8037   ins_encode %{
 8038     __ movw($mem$$Address, $src$$Register);
 8039   %}
 8040   ins_pipe(ialu_mem_reg);
 8041 %}
 8042 
 8043 // Store Integer
 8044 instruct storeI(memory mem, rRegI src)
 8045 %{
 8046   match(Set mem (StoreI mem src));
 8047 
 8048   ins_cost(125); // XXX
 8049   format %{ "movl    $mem, $src\t# int" %}
 8050   ins_encode %{
 8051     __ movl($mem$$Address, $src$$Register);
 8052   %}
 8053   ins_pipe(ialu_mem_reg);
 8054 %}
 8055 
 8056 // Store Long
 8057 instruct storeL(memory mem, rRegL src)
 8058 %{
 8059   match(Set mem (StoreL mem src));
 8060 
 8061   ins_cost(125); // XXX
 8062   format %{ "movq    $mem, $src\t# long" %}
 8063   ins_encode %{
 8064     __ movq($mem$$Address, $src$$Register);
 8065   %}
 8066   ins_pipe(ialu_mem_reg); // XXX
 8067 %}
 8068 
 8069 // Store Pointer
 8070 instruct storeP(memory mem, any_RegP src)
 8071 %{
 8072   predicate(n->as_Store()->barrier_data() == 0);
 8073   match(Set mem (StoreP mem src));
 8074 
 8075   ins_cost(125); // XXX
 8076   format %{ "movq    $mem, $src\t# ptr" %}
 8077   ins_encode %{
 8078     __ movq($mem$$Address, $src$$Register);
 8079   %}
 8080   ins_pipe(ialu_mem_reg);
 8081 %}
 8082 
 8083 instruct storeImmP0(memory mem, immP0 zero)
 8084 %{
 8085   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8086   match(Set mem (StoreP mem zero));
 8087 
 8088   ins_cost(125); // XXX
 8089   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8090   ins_encode %{
 8091     __ movq($mem$$Address, r12);
 8092   %}
 8093   ins_pipe(ialu_mem_reg);
 8094 %}
 8095 
 8096 // Store Null Pointer, mark word, or other simple pointer constant.
 8097 instruct storeImmP(memory mem, immP31 src)
 8098 %{
 8099   predicate(n->as_Store()->barrier_data() == 0);
 8100   match(Set mem (StoreP mem src));
 8101 
 8102   ins_cost(150); // XXX
 8103   format %{ "movq    $mem, $src\t# ptr" %}
 8104   ins_encode %{
 8105     __ movq($mem$$Address, $src$$constant);
 8106   %}
 8107   ins_pipe(ialu_mem_imm);
 8108 %}
 8109 
 8110 // Store Compressed Pointer
 8111 instruct storeN(memory mem, rRegN src)
 8112 %{
 8113   predicate(n->as_Store()->barrier_data() == 0);
 8114   match(Set mem (StoreN mem src));
 8115 
 8116   ins_cost(125); // XXX
 8117   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8118   ins_encode %{
 8119     __ movl($mem$$Address, $src$$Register);
 8120   %}
 8121   ins_pipe(ialu_mem_reg);
 8122 %}
 8123 
 8124 instruct storeNKlass(memory mem, rRegN src)
 8125 %{
 8126   match(Set mem (StoreNKlass mem src));
 8127 
 8128   ins_cost(125); // XXX
 8129   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8130   ins_encode %{
 8131     __ movl($mem$$Address, $src$$Register);
 8132   %}
 8133   ins_pipe(ialu_mem_reg);
 8134 %}
 8135 
 8136 instruct storeImmN0(memory mem, immN0 zero)
 8137 %{
 8138   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8139   match(Set mem (StoreN mem zero));
 8140 
 8141   ins_cost(125); // XXX
 8142   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8143   ins_encode %{
 8144     __ movl($mem$$Address, r12);
 8145   %}
 8146   ins_pipe(ialu_mem_reg);
 8147 %}
 8148 
 8149 instruct storeImmN(memory mem, immN src)
 8150 %{
 8151   predicate(n->as_Store()->barrier_data() == 0);
 8152   match(Set mem (StoreN mem src));
 8153 
 8154   ins_cost(150); // XXX
 8155   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8156   ins_encode %{
 8157     address con = (address)$src$$constant;
 8158     if (con == nullptr) {
 8159       __ movl($mem$$Address, 0);
 8160     } else {
 8161       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8162     }
 8163   %}
 8164   ins_pipe(ialu_mem_imm);
 8165 %}
 8166 
 8167 instruct storeImmNKlass(memory mem, immNKlass src)
 8168 %{
 8169   match(Set mem (StoreNKlass mem src));
 8170 
 8171   ins_cost(150); // XXX
 8172   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8173   ins_encode %{
 8174     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8175   %}
 8176   ins_pipe(ialu_mem_imm);
 8177 %}
 8178 
 8179 // Store Integer Immediate
 8180 instruct storeImmI0(memory mem, immI_0 zero)
 8181 %{
 8182   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8183   match(Set mem (StoreI mem zero));
 8184 
 8185   ins_cost(125); // XXX
 8186   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8187   ins_encode %{
 8188     __ movl($mem$$Address, r12);
 8189   %}
 8190   ins_pipe(ialu_mem_reg);
 8191 %}
 8192 
 8193 instruct storeImmI(memory mem, immI src)
 8194 %{
 8195   match(Set mem (StoreI mem src));
 8196 
 8197   ins_cost(150);
 8198   format %{ "movl    $mem, $src\t# int" %}
 8199   ins_encode %{
 8200     __ movl($mem$$Address, $src$$constant);
 8201   %}
 8202   ins_pipe(ialu_mem_imm);
 8203 %}
 8204 
 8205 // Store Long Immediate
 8206 instruct storeImmL0(memory mem, immL0 zero)
 8207 %{
 8208   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8209   match(Set mem (StoreL mem zero));
 8210 
 8211   ins_cost(125); // XXX
 8212   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8213   ins_encode %{
 8214     __ movq($mem$$Address, r12);
 8215   %}
 8216   ins_pipe(ialu_mem_reg);
 8217 %}
 8218 
 8219 instruct storeImmL(memory mem, immL32 src)
 8220 %{
 8221   match(Set mem (StoreL mem src));
 8222 
 8223   ins_cost(150);
 8224   format %{ "movq    $mem, $src\t# long" %}
 8225   ins_encode %{
 8226     __ movq($mem$$Address, $src$$constant);
 8227   %}
 8228   ins_pipe(ialu_mem_imm);
 8229 %}
 8230 
 8231 // Store Short/Char Immediate
 8232 instruct storeImmC0(memory mem, immI_0 zero)
 8233 %{
 8234   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8235   match(Set mem (StoreC mem zero));
 8236 
 8237   ins_cost(125); // XXX
 8238   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8239   ins_encode %{
 8240     __ movw($mem$$Address, r12);
 8241   %}
 8242   ins_pipe(ialu_mem_reg);
 8243 %}
 8244 
 8245 instruct storeImmI16(memory mem, immI16 src)
 8246 %{
 8247   predicate(UseStoreImmI16);
 8248   match(Set mem (StoreC mem src));
 8249 
 8250   ins_cost(150);
 8251   format %{ "movw    $mem, $src\t# short/char" %}
 8252   ins_encode %{
 8253     __ movw($mem$$Address, $src$$constant);
 8254   %}
 8255   ins_pipe(ialu_mem_imm);
 8256 %}
 8257 
 8258 // Store Byte Immediate
 8259 instruct storeImmB0(memory mem, immI_0 zero)
 8260 %{
 8261   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8262   match(Set mem (StoreB mem zero));
 8263 
 8264   ins_cost(125); // XXX
 8265   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8266   ins_encode %{
 8267     __ movb($mem$$Address, r12);
 8268   %}
 8269   ins_pipe(ialu_mem_reg);
 8270 %}
 8271 
 8272 instruct storeImmB(memory mem, immI8 src)
 8273 %{
 8274   match(Set mem (StoreB mem src));
 8275 
 8276   ins_cost(150); // XXX
 8277   format %{ "movb    $mem, $src\t# byte" %}
 8278   ins_encode %{
 8279     __ movb($mem$$Address, $src$$constant);
 8280   %}
 8281   ins_pipe(ialu_mem_imm);
 8282 %}
 8283 
 8284 // Store Float
 8285 instruct storeF(memory mem, regF src)
 8286 %{
 8287   match(Set mem (StoreF mem src));
 8288 
 8289   ins_cost(95); // XXX
 8290   format %{ "movss   $mem, $src\t# float" %}
 8291   ins_encode %{
 8292     __ movflt($mem$$Address, $src$$XMMRegister);
 8293   %}
 8294   ins_pipe(pipe_slow); // XXX
 8295 %}
 8296 
 8297 // Store immediate Float value (it is faster than store from XMM register)
 8298 instruct storeF0(memory mem, immF0 zero)
 8299 %{
 8300   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8301   match(Set mem (StoreF mem zero));
 8302 
 8303   ins_cost(25); // XXX
 8304   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8305   ins_encode %{
 8306     __ movl($mem$$Address, r12);
 8307   %}
 8308   ins_pipe(ialu_mem_reg);
 8309 %}
 8310 
 8311 instruct storeF_imm(memory mem, immF src)
 8312 %{
 8313   match(Set mem (StoreF mem src));
 8314 
 8315   ins_cost(50);
 8316   format %{ "movl    $mem, $src\t# float" %}
 8317   ins_encode %{
 8318     __ movl($mem$$Address, jint_cast($src$$constant));
 8319   %}
 8320   ins_pipe(ialu_mem_imm);
 8321 %}
 8322 
 8323 // Store Double
 8324 instruct storeD(memory mem, regD src)
 8325 %{
 8326   match(Set mem (StoreD mem src));
 8327 
 8328   ins_cost(95); // XXX
 8329   format %{ "movsd   $mem, $src\t# double" %}
 8330   ins_encode %{
 8331     __ movdbl($mem$$Address, $src$$XMMRegister);
 8332   %}
 8333   ins_pipe(pipe_slow); // XXX
 8334 %}
 8335 
 8336 // Store immediate double 0.0 (it is faster than store from XMM register)
 8337 instruct storeD0_imm(memory mem, immD0 src)
 8338 %{
 8339   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8340   match(Set mem (StoreD mem src));
 8341 
 8342   ins_cost(50);
 8343   format %{ "movq    $mem, $src\t# double 0." %}
 8344   ins_encode %{
 8345     __ movq($mem$$Address, $src$$constant);
 8346   %}
 8347   ins_pipe(ialu_mem_imm);
 8348 %}
 8349 
 8350 instruct storeD0(memory mem, immD0 zero)
 8351 %{
 8352   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8353   match(Set mem (StoreD mem zero));
 8354 
 8355   ins_cost(25); // XXX
 8356   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8357   ins_encode %{
 8358     __ movq($mem$$Address, r12);
 8359   %}
 8360   ins_pipe(ialu_mem_reg);
 8361 %}
 8362 
 8363 instruct storeSSI(stackSlotI dst, rRegI src)
 8364 %{
 8365   match(Set dst src);
 8366 
 8367   ins_cost(100);
 8368   format %{ "movl    $dst, $src\t# int stk" %}
 8369   ins_encode %{
 8370     __ movl($dst$$Address, $src$$Register);
 8371   %}
 8372   ins_pipe( ialu_mem_reg );
 8373 %}
 8374 
 8375 instruct storeSSL(stackSlotL dst, rRegL src)
 8376 %{
 8377   match(Set dst src);
 8378 
 8379   ins_cost(100);
 8380   format %{ "movq    $dst, $src\t# long stk" %}
 8381   ins_encode %{
 8382     __ movq($dst$$Address, $src$$Register);
 8383   %}
 8384   ins_pipe(ialu_mem_reg);
 8385 %}
 8386 
 8387 instruct storeSSP(stackSlotP dst, rRegP src)
 8388 %{
 8389   match(Set dst src);
 8390 
 8391   ins_cost(100);
 8392   format %{ "movq    $dst, $src\t# ptr stk" %}
 8393   ins_encode %{
 8394     __ movq($dst$$Address, $src$$Register);
 8395   %}
 8396   ins_pipe(ialu_mem_reg);
 8397 %}
 8398 
 8399 instruct storeSSF(stackSlotF dst, regF src)
 8400 %{
 8401   match(Set dst src);
 8402 
 8403   ins_cost(95); // XXX
 8404   format %{ "movss   $dst, $src\t# float stk" %}
 8405   ins_encode %{
 8406     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8407   %}
 8408   ins_pipe(pipe_slow); // XXX
 8409 %}
 8410 
 8411 instruct storeSSD(stackSlotD dst, regD src)
 8412 %{
 8413   match(Set dst src);
 8414 
 8415   ins_cost(95); // XXX
 8416   format %{ "movsd   $dst, $src\t# double stk" %}
 8417   ins_encode %{
 8418     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8419   %}
 8420   ins_pipe(pipe_slow); // XXX
 8421 %}
 8422 
 8423 instruct cacheWB(indirect addr)
 8424 %{
 8425   predicate(VM_Version::supports_data_cache_line_flush());
 8426   match(CacheWB addr);
 8427 
 8428   ins_cost(100);
 8429   format %{"cache wb $addr" %}
 8430   ins_encode %{
 8431     assert($addr->index_position() < 0, "should be");
 8432     assert($addr$$disp == 0, "should be");
 8433     __ cache_wb(Address($addr$$base$$Register, 0));
 8434   %}
 8435   ins_pipe(pipe_slow); // XXX
 8436 %}
 8437 
 8438 instruct cacheWBPreSync()
 8439 %{
 8440   predicate(VM_Version::supports_data_cache_line_flush());
 8441   match(CacheWBPreSync);
 8442 
 8443   ins_cost(100);
 8444   format %{"cache wb presync" %}
 8445   ins_encode %{
 8446     __ cache_wbsync(true);
 8447   %}
 8448   ins_pipe(pipe_slow); // XXX
 8449 %}
 8450 
 8451 instruct cacheWBPostSync()
 8452 %{
 8453   predicate(VM_Version::supports_data_cache_line_flush());
 8454   match(CacheWBPostSync);
 8455 
 8456   ins_cost(100);
 8457   format %{"cache wb postsync" %}
 8458   ins_encode %{
 8459     __ cache_wbsync(false);
 8460   %}
 8461   ins_pipe(pipe_slow); // XXX
 8462 %}
 8463 
 8464 //----------BSWAP Instructions-------------------------------------------------
 8465 instruct bytes_reverse_int(rRegI dst) %{
 8466   match(Set dst (ReverseBytesI dst));
 8467 
 8468   format %{ "bswapl  $dst" %}
 8469   ins_encode %{
 8470     __ bswapl($dst$$Register);
 8471   %}
 8472   ins_pipe( ialu_reg );
 8473 %}
 8474 
 8475 instruct bytes_reverse_long(rRegL dst) %{
 8476   match(Set dst (ReverseBytesL dst));
 8477 
 8478   format %{ "bswapq  $dst" %}
 8479   ins_encode %{
 8480     __ bswapq($dst$$Register);
 8481   %}
 8482   ins_pipe( ialu_reg);
 8483 %}
 8484 
 8485 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8486   match(Set dst (ReverseBytesUS dst));
 8487   effect(KILL cr);
 8488 
 8489   format %{ "bswapl  $dst\n\t"
 8490             "shrl    $dst,16\n\t" %}
 8491   ins_encode %{
 8492     __ bswapl($dst$$Register);
 8493     __ shrl($dst$$Register, 16);
 8494   %}
 8495   ins_pipe( ialu_reg );
 8496 %}
 8497 
 8498 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8499   match(Set dst (ReverseBytesS dst));
 8500   effect(KILL cr);
 8501 
 8502   format %{ "bswapl  $dst\n\t"
 8503             "sar     $dst,16\n\t" %}
 8504   ins_encode %{
 8505     __ bswapl($dst$$Register);
 8506     __ sarl($dst$$Register, 16);
 8507   %}
 8508   ins_pipe( ialu_reg );
 8509 %}
 8510 
 8511 //---------- Zeros Count Instructions ------------------------------------------
 8512 
 8513 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8514   predicate(UseCountLeadingZerosInstruction);
 8515   match(Set dst (CountLeadingZerosI src));
 8516   effect(KILL cr);
 8517 
 8518   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8519   ins_encode %{
 8520     __ lzcntl($dst$$Register, $src$$Register);
 8521   %}
 8522   ins_pipe(ialu_reg);
 8523 %}
 8524 
 8525 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8526   predicate(UseCountLeadingZerosInstruction);
 8527   match(Set dst (CountLeadingZerosI (LoadI src)));
 8528   effect(KILL cr);
 8529   ins_cost(175);
 8530   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8531   ins_encode %{
 8532     __ lzcntl($dst$$Register, $src$$Address);
 8533   %}
 8534   ins_pipe(ialu_reg_mem);
 8535 %}
 8536 
 8537 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8538   predicate(!UseCountLeadingZerosInstruction);
 8539   match(Set dst (CountLeadingZerosI src));
 8540   effect(KILL cr);
 8541 
 8542   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8543             "jnz     skip\n\t"
 8544             "movl    $dst, -1\n"
 8545       "skip:\n\t"
 8546             "negl    $dst\n\t"
 8547             "addl    $dst, 31" %}
 8548   ins_encode %{
 8549     Register Rdst = $dst$$Register;
 8550     Register Rsrc = $src$$Register;
 8551     Label skip;
 8552     __ bsrl(Rdst, Rsrc);
 8553     __ jccb(Assembler::notZero, skip);
 8554     __ movl(Rdst, -1);
 8555     __ bind(skip);
 8556     __ negl(Rdst);
 8557     __ addl(Rdst, BitsPerInt - 1);
 8558   %}
 8559   ins_pipe(ialu_reg);
 8560 %}
 8561 
 8562 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8563   predicate(UseCountLeadingZerosInstruction);
 8564   match(Set dst (CountLeadingZerosL src));
 8565   effect(KILL cr);
 8566 
 8567   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8568   ins_encode %{
 8569     __ lzcntq($dst$$Register, $src$$Register);
 8570   %}
 8571   ins_pipe(ialu_reg);
 8572 %}
 8573 
 8574 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8575   predicate(UseCountLeadingZerosInstruction);
 8576   match(Set dst (CountLeadingZerosL (LoadL src)));
 8577   effect(KILL cr);
 8578   ins_cost(175);
 8579   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8580   ins_encode %{
 8581     __ lzcntq($dst$$Register, $src$$Address);
 8582   %}
 8583   ins_pipe(ialu_reg_mem);
 8584 %}
 8585 
 8586 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8587   predicate(!UseCountLeadingZerosInstruction);
 8588   match(Set dst (CountLeadingZerosL src));
 8589   effect(KILL cr);
 8590 
 8591   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8592             "jnz     skip\n\t"
 8593             "movl    $dst, -1\n"
 8594       "skip:\n\t"
 8595             "negl    $dst\n\t"
 8596             "addl    $dst, 63" %}
 8597   ins_encode %{
 8598     Register Rdst = $dst$$Register;
 8599     Register Rsrc = $src$$Register;
 8600     Label skip;
 8601     __ bsrq(Rdst, Rsrc);
 8602     __ jccb(Assembler::notZero, skip);
 8603     __ movl(Rdst, -1);
 8604     __ bind(skip);
 8605     __ negl(Rdst);
 8606     __ addl(Rdst, BitsPerLong - 1);
 8607   %}
 8608   ins_pipe(ialu_reg);
 8609 %}
 8610 
 8611 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8612   predicate(UseCountTrailingZerosInstruction);
 8613   match(Set dst (CountTrailingZerosI src));
 8614   effect(KILL cr);
 8615 
 8616   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8617   ins_encode %{
 8618     __ tzcntl($dst$$Register, $src$$Register);
 8619   %}
 8620   ins_pipe(ialu_reg);
 8621 %}
 8622 
 8623 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8624   predicate(UseCountTrailingZerosInstruction);
 8625   match(Set dst (CountTrailingZerosI (LoadI src)));
 8626   effect(KILL cr);
 8627   ins_cost(175);
 8628   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8629   ins_encode %{
 8630     __ tzcntl($dst$$Register, $src$$Address);
 8631   %}
 8632   ins_pipe(ialu_reg_mem);
 8633 %}
 8634 
 8635 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8636   predicate(!UseCountTrailingZerosInstruction);
 8637   match(Set dst (CountTrailingZerosI src));
 8638   effect(KILL cr);
 8639 
 8640   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8641             "jnz     done\n\t"
 8642             "movl    $dst, 32\n"
 8643       "done:" %}
 8644   ins_encode %{
 8645     Register Rdst = $dst$$Register;
 8646     Label done;
 8647     __ bsfl(Rdst, $src$$Register);
 8648     __ jccb(Assembler::notZero, done);
 8649     __ movl(Rdst, BitsPerInt);
 8650     __ bind(done);
 8651   %}
 8652   ins_pipe(ialu_reg);
 8653 %}
 8654 
 8655 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8656   predicate(UseCountTrailingZerosInstruction);
 8657   match(Set dst (CountTrailingZerosL src));
 8658   effect(KILL cr);
 8659 
 8660   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8661   ins_encode %{
 8662     __ tzcntq($dst$$Register, $src$$Register);
 8663   %}
 8664   ins_pipe(ialu_reg);
 8665 %}
 8666 
 8667 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8668   predicate(UseCountTrailingZerosInstruction);
 8669   match(Set dst (CountTrailingZerosL (LoadL src)));
 8670   effect(KILL cr);
 8671   ins_cost(175);
 8672   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8673   ins_encode %{
 8674     __ tzcntq($dst$$Register, $src$$Address);
 8675   %}
 8676   ins_pipe(ialu_reg_mem);
 8677 %}
 8678 
 8679 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8680   predicate(!UseCountTrailingZerosInstruction);
 8681   match(Set dst (CountTrailingZerosL src));
 8682   effect(KILL cr);
 8683 
 8684   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8685             "jnz     done\n\t"
 8686             "movl    $dst, 64\n"
 8687       "done:" %}
 8688   ins_encode %{
 8689     Register Rdst = $dst$$Register;
 8690     Label done;
 8691     __ bsfq(Rdst, $src$$Register);
 8692     __ jccb(Assembler::notZero, done);
 8693     __ movl(Rdst, BitsPerLong);
 8694     __ bind(done);
 8695   %}
 8696   ins_pipe(ialu_reg);
 8697 %}
 8698 
 8699 //--------------- Reverse Operation Instructions ----------------
 8700 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8701   predicate(!VM_Version::supports_gfni());
 8702   match(Set dst (ReverseI src));
 8703   effect(TEMP dst, TEMP rtmp, KILL cr);
 8704   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8705   ins_encode %{
 8706     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8707   %}
 8708   ins_pipe( ialu_reg );
 8709 %}
 8710 
 8711 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8712   predicate(VM_Version::supports_gfni());
 8713   match(Set dst (ReverseI src));
 8714   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8715   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8716   ins_encode %{
 8717     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8718   %}
 8719   ins_pipe( ialu_reg );
 8720 %}
 8721 
 8722 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8723   predicate(!VM_Version::supports_gfni());
 8724   match(Set dst (ReverseL src));
 8725   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8726   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8727   ins_encode %{
 8728     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8729   %}
 8730   ins_pipe( ialu_reg );
 8731 %}
 8732 
 8733 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8734   predicate(VM_Version::supports_gfni());
 8735   match(Set dst (ReverseL src));
 8736   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8737   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8738   ins_encode %{
 8739     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8740   %}
 8741   ins_pipe( ialu_reg );
 8742 %}
 8743 
 8744 //---------- Population Count Instructions -------------------------------------
 8745 
 8746 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8747   predicate(UsePopCountInstruction);
 8748   match(Set dst (PopCountI src));
 8749   effect(KILL cr);
 8750 
 8751   format %{ "popcnt  $dst, $src" %}
 8752   ins_encode %{
 8753     __ popcntl($dst$$Register, $src$$Register);
 8754   %}
 8755   ins_pipe(ialu_reg);
 8756 %}
 8757 
 8758 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8759   predicate(UsePopCountInstruction);
 8760   match(Set dst (PopCountI (LoadI mem)));
 8761   effect(KILL cr);
 8762 
 8763   format %{ "popcnt  $dst, $mem" %}
 8764   ins_encode %{
 8765     __ popcntl($dst$$Register, $mem$$Address);
 8766   %}
 8767   ins_pipe(ialu_reg);
 8768 %}
 8769 
 8770 // Note: Long.bitCount(long) returns an int.
 8771 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8772   predicate(UsePopCountInstruction);
 8773   match(Set dst (PopCountL src));
 8774   effect(KILL cr);
 8775 
 8776   format %{ "popcnt  $dst, $src" %}
 8777   ins_encode %{
 8778     __ popcntq($dst$$Register, $src$$Register);
 8779   %}
 8780   ins_pipe(ialu_reg);
 8781 %}
 8782 
 8783 // Note: Long.bitCount(long) returns an int.
 8784 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8785   predicate(UsePopCountInstruction);
 8786   match(Set dst (PopCountL (LoadL mem)));
 8787   effect(KILL cr);
 8788 
 8789   format %{ "popcnt  $dst, $mem" %}
 8790   ins_encode %{
 8791     __ popcntq($dst$$Register, $mem$$Address);
 8792   %}
 8793   ins_pipe(ialu_reg);
 8794 %}
 8795 
 8796 
 8797 //----------MemBar Instructions-----------------------------------------------
 8798 // Memory barrier flavors
 8799 
 8800 instruct membar_acquire()
 8801 %{
 8802   match(MemBarAcquire);
 8803   match(LoadFence);
 8804   ins_cost(0);
 8805 
 8806   size(0);
 8807   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8808   ins_encode();
 8809   ins_pipe(empty);
 8810 %}
 8811 
 8812 instruct membar_acquire_lock()
 8813 %{
 8814   match(MemBarAcquireLock);
 8815   ins_cost(0);
 8816 
 8817   size(0);
 8818   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8819   ins_encode();
 8820   ins_pipe(empty);
 8821 %}
 8822 
 8823 instruct membar_release()
 8824 %{
 8825   match(MemBarRelease);
 8826   match(StoreFence);
 8827   ins_cost(0);
 8828 
 8829   size(0);
 8830   format %{ "MEMBAR-release ! (empty encoding)" %}
 8831   ins_encode();
 8832   ins_pipe(empty);
 8833 %}
 8834 
 8835 instruct membar_release_lock()
 8836 %{
 8837   match(MemBarReleaseLock);
 8838   ins_cost(0);
 8839 
 8840   size(0);
 8841   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8842   ins_encode();
 8843   ins_pipe(empty);
 8844 %}
 8845 
 8846 instruct membar_volatile(rFlagsReg cr) %{
 8847   match(MemBarVolatile);
 8848   effect(KILL cr);
 8849   ins_cost(400);
 8850 
 8851   format %{
 8852     $$template
 8853     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8854   %}
 8855   ins_encode %{
 8856     __ membar(Assembler::StoreLoad);
 8857   %}
 8858   ins_pipe(pipe_slow);
 8859 %}
 8860 
 8861 instruct unnecessary_membar_volatile()
 8862 %{
 8863   match(MemBarVolatile);
 8864   predicate(Matcher::post_store_load_barrier(n));
 8865   ins_cost(0);
 8866 
 8867   size(0);
 8868   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8869   ins_encode();
 8870   ins_pipe(empty);
 8871 %}
 8872 
 8873 instruct membar_storestore() %{
 8874   match(MemBarStoreStore);
 8875   match(StoreStoreFence);
 8876   ins_cost(0);
 8877 
 8878   size(0);
 8879   format %{ "MEMBAR-storestore (empty encoding)" %}
 8880   ins_encode( );
 8881   ins_pipe(empty);
 8882 %}
 8883 
 8884 //----------Move Instructions--------------------------------------------------
 8885 
 8886 instruct castX2P(rRegP dst, rRegL src)
 8887 %{
 8888   match(Set dst (CastX2P src));
 8889 
 8890   format %{ "movq    $dst, $src\t# long->ptr" %}
 8891   ins_encode %{
 8892     if ($dst$$reg != $src$$reg) {
 8893       __ movptr($dst$$Register, $src$$Register);
 8894     }
 8895   %}
 8896   ins_pipe(ialu_reg_reg); // XXX
 8897 %}
 8898 
 8899 instruct castI2N(rRegN dst, rRegI src)
 8900 %{
 8901   match(Set dst (CastI2N src));
 8902 
 8903   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8904   ins_encode %{
 8905     if ($dst$$reg != $src$$reg) {
 8906       __ movl($dst$$Register, $src$$Register);
 8907     }
 8908   %}
 8909   ins_pipe(ialu_reg_reg); // XXX
 8910 %}
 8911 
 8912 instruct castN2X(rRegL dst, rRegN src)
 8913 %{
 8914   match(Set dst (CastP2X src));
 8915 
 8916   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8917   ins_encode %{
 8918     if ($dst$$reg != $src$$reg) {
 8919       __ movptr($dst$$Register, $src$$Register);
 8920     }
 8921   %}
 8922   ins_pipe(ialu_reg_reg); // XXX
 8923 %}
 8924 
 8925 instruct castP2X(rRegL dst, rRegP src)
 8926 %{
 8927   match(Set dst (CastP2X src));
 8928 
 8929   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8930   ins_encode %{
 8931     if ($dst$$reg != $src$$reg) {
 8932       __ movptr($dst$$Register, $src$$Register);
 8933     }
 8934   %}
 8935   ins_pipe(ialu_reg_reg); // XXX
 8936 %}
 8937 
 8938 // Convert oop into int for vectors alignment masking
 8939 instruct convP2I(rRegI dst, rRegP src)
 8940 %{
 8941   match(Set dst (ConvL2I (CastP2X src)));
 8942 
 8943   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8944   ins_encode %{
 8945     __ movl($dst$$Register, $src$$Register);
 8946   %}
 8947   ins_pipe(ialu_reg_reg); // XXX
 8948 %}
 8949 
 8950 // Convert compressed oop into int for vectors alignment masking
 8951 // in case of 32bit oops (heap < 4Gb).
 8952 instruct convN2I(rRegI dst, rRegN src)
 8953 %{
 8954   predicate(CompressedOops::shift() == 0);
 8955   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8956 
 8957   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8958   ins_encode %{
 8959     __ movl($dst$$Register, $src$$Register);
 8960   %}
 8961   ins_pipe(ialu_reg_reg); // XXX
 8962 %}
 8963 
 8964 // Convert oop pointer into compressed form
 8965 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8966   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8967   match(Set dst (EncodeP src));
 8968   effect(KILL cr);
 8969   format %{ "encode_heap_oop $dst,$src" %}
 8970   ins_encode %{
 8971     Register s = $src$$Register;
 8972     Register d = $dst$$Register;
 8973     if (s != d) {
 8974       __ movq(d, s);
 8975     }
 8976     __ encode_heap_oop(d);
 8977   %}
 8978   ins_pipe(ialu_reg_long);
 8979 %}
 8980 
 8981 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8982   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8983   match(Set dst (EncodeP src));
 8984   effect(KILL cr);
 8985   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8986   ins_encode %{
 8987     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8988   %}
 8989   ins_pipe(ialu_reg_long);
 8990 %}
 8991 
 8992 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8993   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8994             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8995   match(Set dst (DecodeN src));
 8996   effect(KILL cr);
 8997   format %{ "decode_heap_oop $dst,$src" %}
 8998   ins_encode %{
 8999     Register s = $src$$Register;
 9000     Register d = $dst$$Register;
 9001     if (s != d) {
 9002       __ movq(d, s);
 9003     }
 9004     __ decode_heap_oop(d);
 9005   %}
 9006   ins_pipe(ialu_reg_long);
 9007 %}
 9008 
 9009 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9010   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9011             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9012   match(Set dst (DecodeN src));
 9013   effect(KILL cr);
 9014   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9015   ins_encode %{
 9016     Register s = $src$$Register;
 9017     Register d = $dst$$Register;
 9018     if (s != d) {
 9019       __ decode_heap_oop_not_null(d, s);
 9020     } else {
 9021       __ decode_heap_oop_not_null(d);
 9022     }
 9023   %}
 9024   ins_pipe(ialu_reg_long);
 9025 %}
 9026 
 9027 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9028   match(Set dst (EncodePKlass src));
 9029   effect(TEMP dst, KILL cr);
 9030   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9031   ins_encode %{
 9032     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9033   %}
 9034   ins_pipe(ialu_reg_long);
 9035 %}
 9036 
 9037 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9038   match(Set dst (DecodeNKlass src));
 9039   effect(TEMP dst, KILL cr);
 9040   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9041   ins_encode %{
 9042     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9043   %}
 9044   ins_pipe(ialu_reg_long);
 9045 %}
 9046 
 9047 //----------Conditional Move---------------------------------------------------
 9048 // Jump
 9049 // dummy instruction for generating temp registers
 9050 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9051   match(Jump (LShiftL switch_val shift));
 9052   ins_cost(350);
 9053   predicate(false);
 9054   effect(TEMP dest);
 9055 
 9056   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9057             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9058   ins_encode %{
 9059     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9060     // to do that and the compiler is using that register as one it can allocate.
 9061     // So we build it all by hand.
 9062     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9063     // ArrayAddress dispatch(table, index);
 9064     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9065     __ lea($dest$$Register, $constantaddress);
 9066     __ jmp(dispatch);
 9067   %}
 9068   ins_pipe(pipe_jmp);
 9069 %}
 9070 
 9071 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9072   match(Jump (AddL (LShiftL switch_val shift) offset));
 9073   ins_cost(350);
 9074   effect(TEMP dest);
 9075 
 9076   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9077             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9078   ins_encode %{
 9079     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9080     // to do that and the compiler is using that register as one it can allocate.
 9081     // So we build it all by hand.
 9082     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9083     // ArrayAddress dispatch(table, index);
 9084     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9085     __ lea($dest$$Register, $constantaddress);
 9086     __ jmp(dispatch);
 9087   %}
 9088   ins_pipe(pipe_jmp);
 9089 %}
 9090 
 9091 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9092   match(Jump switch_val);
 9093   ins_cost(350);
 9094   effect(TEMP dest);
 9095 
 9096   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9097             "jmp     [$dest + $switch_val]\n\t" %}
 9098   ins_encode %{
 9099     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9100     // to do that and the compiler is using that register as one it can allocate.
 9101     // So we build it all by hand.
 9102     // Address index(noreg, switch_reg, Address::times_1);
 9103     // ArrayAddress dispatch(table, index);
 9104     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9105     __ lea($dest$$Register, $constantaddress);
 9106     __ jmp(dispatch);
 9107   %}
 9108   ins_pipe(pipe_jmp);
 9109 %}
 9110 
 9111 // Conditional move
 9112 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9113 %{
 9114   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9115   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9116 
 9117   ins_cost(100); // XXX
 9118   format %{ "setbn$cop $dst\t# signed, int" %}
 9119   ins_encode %{
 9120     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9121     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9122   %}
 9123   ins_pipe(ialu_reg);
 9124 %}
 9125 
 9126 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9127 %{
 9128   predicate(!UseAPX);
 9129   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9130 
 9131   ins_cost(200); // XXX
 9132   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9133   ins_encode %{
 9134     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9135   %}
 9136   ins_pipe(pipe_cmov_reg);
 9137 %}
 9138 
 9139 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9140 %{
 9141   predicate(UseAPX);
 9142   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9143 
 9144   ins_cost(200);
 9145   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9146   ins_encode %{
 9147     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9148   %}
 9149   ins_pipe(pipe_cmov_reg);
 9150 %}
 9151 
 9152 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9153 %{
 9154   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9155   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9156 
 9157   ins_cost(100); // XXX
 9158   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9159   ins_encode %{
 9160     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9161     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9162   %}
 9163   ins_pipe(ialu_reg);
 9164 %}
 9165 
 9166 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9167   predicate(!UseAPX);
 9168   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9169 
 9170   ins_cost(200); // XXX
 9171   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9172   ins_encode %{
 9173     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9174   %}
 9175   ins_pipe(pipe_cmov_reg);
 9176 %}
 9177 
 9178 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9179   predicate(UseAPX);
 9180   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9181 
 9182   ins_cost(200);
 9183   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9184   ins_encode %{
 9185     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9186   %}
 9187   ins_pipe(pipe_cmov_reg);
 9188 %}
 9189 
 9190 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9191 %{
 9192   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9193   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9194 
 9195   ins_cost(100); // XXX
 9196   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9197   ins_encode %{
 9198     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9199     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9200   %}
 9201   ins_pipe(ialu_reg);
 9202 %}
 9203 
 9204 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9205   predicate(!UseAPX);
 9206   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9207   ins_cost(200);
 9208   expand %{
 9209     cmovI_regU(cop, cr, dst, src);
 9210   %}
 9211 %}
 9212 
 9213 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9214   predicate(UseAPX);
 9215   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9216   ins_cost(200);
 9217   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9218   ins_encode %{
 9219     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9220   %}
 9221   ins_pipe(pipe_cmov_reg);
 9222 %}
 9223 
 9224 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9225   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9226   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9227 
 9228   ins_cost(200); // XXX
 9229   format %{ "cmovpl  $dst, $src\n\t"
 9230             "cmovnel $dst, $src" %}
 9231   ins_encode %{
 9232     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9233     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9234   %}
 9235   ins_pipe(pipe_cmov_reg);
 9236 %}
 9237 
 9238 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9239   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9240   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9241   effect(TEMP dst);
 9242 
 9243   ins_cost(200);
 9244   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9245             "cmovnel  $dst, $src2" %}
 9246   ins_encode %{
 9247     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9248     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9249   %}
 9250   ins_pipe(pipe_cmov_reg);
 9251 %}
 9252 
 9253 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9254 // inputs of the CMove
 9255 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9256   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9257   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9258   effect(TEMP dst);
 9259 
 9260   ins_cost(200); // XXX
 9261   format %{ "cmovpl  $dst, $src\n\t"
 9262             "cmovnel $dst, $src" %}
 9263   ins_encode %{
 9264     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9265     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9266   %}
 9267   ins_pipe(pipe_cmov_reg);
 9268 %}
 9269 
 9270 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9271 // and parity flag bit is set if any of the operand is a NaN.
 9272 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9273   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9274   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9275   effect(TEMP dst);
 9276 
 9277   ins_cost(200);
 9278   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9279             "cmovnel  $dst, $src2" %}
 9280   ins_encode %{
 9281     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9282     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9283   %}
 9284   ins_pipe(pipe_cmov_reg);
 9285 %}
 9286 
 9287 // Conditional move
 9288 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9289   predicate(!UseAPX);
 9290   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9291 
 9292   ins_cost(250); // XXX
 9293   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9294   ins_encode %{
 9295     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9296   %}
 9297   ins_pipe(pipe_cmov_mem);
 9298 %}
 9299 
 9300 // Conditional move
 9301 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9302 %{
 9303   predicate(UseAPX);
 9304   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9305 
 9306   ins_cost(250);
 9307   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9308   ins_encode %{
 9309     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9310   %}
 9311   ins_pipe(pipe_cmov_mem);
 9312 %}
 9313 
 9314 // Conditional move
 9315 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9316 %{
 9317   predicate(!UseAPX);
 9318   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9319 
 9320   ins_cost(250); // XXX
 9321   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9322   ins_encode %{
 9323     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9324   %}
 9325   ins_pipe(pipe_cmov_mem);
 9326 %}
 9327 
 9328 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9329   predicate(!UseAPX);
 9330   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9331   ins_cost(250);
 9332   expand %{
 9333     cmovI_memU(cop, cr, dst, src);
 9334   %}
 9335 %}
 9336 
 9337 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9338 %{
 9339   predicate(UseAPX);
 9340   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9341 
 9342   ins_cost(250);
 9343   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9344   ins_encode %{
 9345     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9346   %}
 9347   ins_pipe(pipe_cmov_mem);
 9348 %}
 9349 
 9350 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9351 %{
 9352   predicate(UseAPX);
 9353   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9354   ins_cost(250);
 9355   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9356   ins_encode %{
 9357     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9358   %}
 9359   ins_pipe(pipe_cmov_mem);
 9360 %}
 9361 
 9362 // Conditional move
 9363 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9364 %{
 9365   predicate(!UseAPX);
 9366   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9367 
 9368   ins_cost(200); // XXX
 9369   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9370   ins_encode %{
 9371     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9372   %}
 9373   ins_pipe(pipe_cmov_reg);
 9374 %}
 9375 
 9376 // Conditional move ndd
 9377 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9378 %{
 9379   predicate(UseAPX);
 9380   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9381 
 9382   ins_cost(200);
 9383   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9384   ins_encode %{
 9385     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9386   %}
 9387   ins_pipe(pipe_cmov_reg);
 9388 %}
 9389 
 9390 // Conditional move
 9391 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9392 %{
 9393   predicate(!UseAPX);
 9394   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9395 
 9396   ins_cost(200); // XXX
 9397   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9398   ins_encode %{
 9399     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9400   %}
 9401   ins_pipe(pipe_cmov_reg);
 9402 %}
 9403 
 9404 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9405   predicate(!UseAPX);
 9406   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9407   ins_cost(200);
 9408   expand %{
 9409     cmovN_regU(cop, cr, dst, src);
 9410   %}
 9411 %}
 9412 
 9413 // Conditional move ndd
 9414 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9415 %{
 9416   predicate(UseAPX);
 9417   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9418 
 9419   ins_cost(200);
 9420   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9421   ins_encode %{
 9422     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9423   %}
 9424   ins_pipe(pipe_cmov_reg);
 9425 %}
 9426 
 9427 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9428   predicate(UseAPX);
 9429   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9430   ins_cost(200);
 9431   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9432   ins_encode %{
 9433     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9434   %}
 9435   ins_pipe(pipe_cmov_reg);
 9436 %}
 9437 
 9438 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9439   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9440   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9441 
 9442   ins_cost(200); // XXX
 9443   format %{ "cmovpl  $dst, $src\n\t"
 9444             "cmovnel $dst, $src" %}
 9445   ins_encode %{
 9446     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9447     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9448   %}
 9449   ins_pipe(pipe_cmov_reg);
 9450 %}
 9451 
 9452 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9453 // inputs of the CMove
 9454 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9455   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9456   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9457 
 9458   ins_cost(200); // XXX
 9459   format %{ "cmovpl  $dst, $src\n\t"
 9460             "cmovnel $dst, $src" %}
 9461   ins_encode %{
 9462     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9463     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9464   %}
 9465   ins_pipe(pipe_cmov_reg);
 9466 %}
 9467 
 9468 // Conditional move
 9469 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9470 %{
 9471   predicate(!UseAPX);
 9472   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9473 
 9474   ins_cost(200); // XXX
 9475   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9476   ins_encode %{
 9477     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9478   %}
 9479   ins_pipe(pipe_cmov_reg);  // XXX
 9480 %}
 9481 
 9482 // Conditional move ndd
 9483 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9484 %{
 9485   predicate(UseAPX);
 9486   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9487 
 9488   ins_cost(200);
 9489   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9490   ins_encode %{
 9491     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9492   %}
 9493   ins_pipe(pipe_cmov_reg);
 9494 %}
 9495 
 9496 // Conditional move
 9497 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9498 %{
 9499   predicate(!UseAPX);
 9500   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9501 
 9502   ins_cost(200); // XXX
 9503   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9504   ins_encode %{
 9505     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9506   %}
 9507   ins_pipe(pipe_cmov_reg); // XXX
 9508 %}
 9509 
 9510 // Conditional move ndd
 9511 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9512 %{
 9513   predicate(UseAPX);
 9514   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9515 
 9516   ins_cost(200);
 9517   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9518   ins_encode %{
 9519     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9520   %}
 9521   ins_pipe(pipe_cmov_reg);
 9522 %}
 9523 
 9524 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9525   predicate(!UseAPX);
 9526   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9527   ins_cost(200);
 9528   expand %{
 9529     cmovP_regU(cop, cr, dst, src);
 9530   %}
 9531 %}
 9532 
 9533 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9534   predicate(UseAPX);
 9535   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9536   ins_cost(200);
 9537   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9538   ins_encode %{
 9539     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9540   %}
 9541   ins_pipe(pipe_cmov_reg);
 9542 %}
 9543 
 9544 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9545   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9546   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9547 
 9548   ins_cost(200); // XXX
 9549   format %{ "cmovpq  $dst, $src\n\t"
 9550             "cmovneq $dst, $src" %}
 9551   ins_encode %{
 9552     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9553     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9554   %}
 9555   ins_pipe(pipe_cmov_reg);
 9556 %}
 9557 
 9558 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9559   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9560   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9561   effect(TEMP dst);
 9562 
 9563   ins_cost(200);
 9564   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9565             "cmovneq  $dst, $src2" %}
 9566   ins_encode %{
 9567     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9568     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9569   %}
 9570   ins_pipe(pipe_cmov_reg);
 9571 %}
 9572 
 9573 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9574 // inputs of the CMove
 9575 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9576   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9577   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9578 
 9579   ins_cost(200); // XXX
 9580   format %{ "cmovpq  $dst, $src\n\t"
 9581             "cmovneq $dst, $src" %}
 9582   ins_encode %{
 9583     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9584     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9585   %}
 9586   ins_pipe(pipe_cmov_reg);
 9587 %}
 9588 
 9589 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9590   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9591   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9592   effect(TEMP dst);
 9593 
 9594   ins_cost(200);
 9595   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9596             "cmovneq  $dst, $src2" %}
 9597   ins_encode %{
 9598     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9599     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9600   %}
 9601   ins_pipe(pipe_cmov_reg);
 9602 %}
 9603 
 9604 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9605 %{
 9606   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9607   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9608 
 9609   ins_cost(100); // XXX
 9610   format %{ "setbn$cop $dst\t# signed, long" %}
 9611   ins_encode %{
 9612     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9613     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9614   %}
 9615   ins_pipe(ialu_reg);
 9616 %}
 9617 
 9618 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9619 %{
 9620   predicate(!UseAPX);
 9621   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9622 
 9623   ins_cost(200); // XXX
 9624   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9625   ins_encode %{
 9626     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9627   %}
 9628   ins_pipe(pipe_cmov_reg);  // XXX
 9629 %}
 9630 
 9631 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9632 %{
 9633   predicate(UseAPX);
 9634   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9635 
 9636   ins_cost(200);
 9637   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9638   ins_encode %{
 9639     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9640   %}
 9641   ins_pipe(pipe_cmov_reg);
 9642 %}
 9643 
 9644 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9645 %{
 9646   predicate(!UseAPX);
 9647   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9648 
 9649   ins_cost(200); // XXX
 9650   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9651   ins_encode %{
 9652     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9653   %}
 9654   ins_pipe(pipe_cmov_mem);  // XXX
 9655 %}
 9656 
 9657 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9658 %{
 9659   predicate(UseAPX);
 9660   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9661 
 9662   ins_cost(200);
 9663   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9664   ins_encode %{
 9665     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9666   %}
 9667   ins_pipe(pipe_cmov_mem);
 9668 %}
 9669 
 9670 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9671 %{
 9672   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9673   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9674 
 9675   ins_cost(100); // XXX
 9676   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9677   ins_encode %{
 9678     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9679     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9680   %}
 9681   ins_pipe(ialu_reg);
 9682 %}
 9683 
 9684 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9685 %{
 9686   predicate(!UseAPX);
 9687   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9688 
 9689   ins_cost(200); // XXX
 9690   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9691   ins_encode %{
 9692     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9693   %}
 9694   ins_pipe(pipe_cmov_reg); // XXX
 9695 %}
 9696 
 9697 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9698 %{
 9699   predicate(UseAPX);
 9700   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9701 
 9702   ins_cost(200);
 9703   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9704   ins_encode %{
 9705     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9706   %}
 9707   ins_pipe(pipe_cmov_reg);
 9708 %}
 9709 
 9710 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9711 %{
 9712   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9713   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9714 
 9715   ins_cost(100); // XXX
 9716   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9717   ins_encode %{
 9718     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9719     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9720   %}
 9721   ins_pipe(ialu_reg);
 9722 %}
 9723 
 9724 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9725   predicate(!UseAPX);
 9726   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9727   ins_cost(200);
 9728   expand %{
 9729     cmovL_regU(cop, cr, dst, src);
 9730   %}
 9731 %}
 9732 
 9733 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9734 %{
 9735   predicate(UseAPX);
 9736   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9737   ins_cost(200);
 9738   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9739   ins_encode %{
 9740     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9741   %}
 9742   ins_pipe(pipe_cmov_reg);
 9743 %}
 9744 
 9745 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9746   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9747   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9748 
 9749   ins_cost(200); // XXX
 9750   format %{ "cmovpq  $dst, $src\n\t"
 9751             "cmovneq $dst, $src" %}
 9752   ins_encode %{
 9753     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9754     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9755   %}
 9756   ins_pipe(pipe_cmov_reg);
 9757 %}
 9758 
 9759 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9760   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9761   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9762   effect(TEMP dst);
 9763 
 9764   ins_cost(200);
 9765   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9766             "cmovneq  $dst, $src2" %}
 9767   ins_encode %{
 9768     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9769     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9770   %}
 9771   ins_pipe(pipe_cmov_reg);
 9772 %}
 9773 
 9774 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9775 // inputs of the CMove
 9776 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9777   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9778   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9779 
 9780   ins_cost(200); // XXX
 9781   format %{ "cmovpq  $dst, $src\n\t"
 9782             "cmovneq $dst, $src" %}
 9783   ins_encode %{
 9784     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9785     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9786   %}
 9787   ins_pipe(pipe_cmov_reg);
 9788 %}
 9789 
 9790 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9791   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9792   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9793   effect(TEMP dst);
 9794 
 9795   ins_cost(200);
 9796   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9797             "cmovneq $dst, $src2" %}
 9798   ins_encode %{
 9799     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9800     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9801   %}
 9802   ins_pipe(pipe_cmov_reg);
 9803 %}
 9804 
 9805 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9806 %{
 9807   predicate(!UseAPX);
 9808   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9809 
 9810   ins_cost(200); // XXX
 9811   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9812   ins_encode %{
 9813     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9814   %}
 9815   ins_pipe(pipe_cmov_mem); // XXX
 9816 %}
 9817 
 9818 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9819   predicate(!UseAPX);
 9820   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9821   ins_cost(200);
 9822   expand %{
 9823     cmovL_memU(cop, cr, dst, src);
 9824   %}
 9825 %}
 9826 
 9827 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9828 %{
 9829   predicate(UseAPX);
 9830   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9831 
 9832   ins_cost(200);
 9833   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9834   ins_encode %{
 9835     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9836   %}
 9837   ins_pipe(pipe_cmov_mem);
 9838 %}
 9839 
 9840 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9841 %{
 9842   predicate(UseAPX);
 9843   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9844   ins_cost(200);
 9845   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9846   ins_encode %{
 9847     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9848   %}
 9849   ins_pipe(pipe_cmov_mem);
 9850 %}
 9851 
 9852 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9853 %{
 9854   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9855 
 9856   ins_cost(200); // XXX
 9857   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9858             "movss     $dst, $src\n"
 9859     "skip:" %}
 9860   ins_encode %{
 9861     Label Lskip;
 9862     // Invert sense of branch from sense of CMOV
 9863     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9864     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9865     __ bind(Lskip);
 9866   %}
 9867   ins_pipe(pipe_slow);
 9868 %}
 9869 
 9870 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9871 %{
 9872   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9873 
 9874   ins_cost(200); // XXX
 9875   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9876             "movss     $dst, $src\n"
 9877     "skip:" %}
 9878   ins_encode %{
 9879     Label Lskip;
 9880     // Invert sense of branch from sense of CMOV
 9881     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9882     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9883     __ bind(Lskip);
 9884   %}
 9885   ins_pipe(pipe_slow);
 9886 %}
 9887 
 9888 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9889   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9890   ins_cost(200);
 9891   expand %{
 9892     cmovF_regU(cop, cr, dst, src);
 9893   %}
 9894 %}
 9895 
 9896 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9897 %{
 9898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9899 
 9900   ins_cost(200); // XXX
 9901   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9902             "movsd     $dst, $src\n"
 9903     "skip:" %}
 9904   ins_encode %{
 9905     Label Lskip;
 9906     // Invert sense of branch from sense of CMOV
 9907     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9908     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9909     __ bind(Lskip);
 9910   %}
 9911   ins_pipe(pipe_slow);
 9912 %}
 9913 
 9914 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9915 %{
 9916   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9917 
 9918   ins_cost(200); // XXX
 9919   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9920             "movsd     $dst, $src\n"
 9921     "skip:" %}
 9922   ins_encode %{
 9923     Label Lskip;
 9924     // Invert sense of branch from sense of CMOV
 9925     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9926     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9927     __ bind(Lskip);
 9928   %}
 9929   ins_pipe(pipe_slow);
 9930 %}
 9931 
 9932 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9933   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9934   ins_cost(200);
 9935   expand %{
 9936     cmovD_regU(cop, cr, dst, src);
 9937   %}
 9938 %}
 9939 
 9940 //----------Arithmetic Instructions--------------------------------------------
 9941 //----------Addition Instructions----------------------------------------------
 9942 
 9943 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9944 %{
 9945   predicate(!UseAPX);
 9946   match(Set dst (AddI dst src));
 9947   effect(KILL cr);
 9948   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9949   format %{ "addl    $dst, $src\t# int" %}
 9950   ins_encode %{
 9951     __ addl($dst$$Register, $src$$Register);
 9952   %}
 9953   ins_pipe(ialu_reg_reg);
 9954 %}
 9955 
 9956 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9957 %{
 9958   predicate(UseAPX);
 9959   match(Set dst (AddI src1 src2));
 9960   effect(KILL cr);
 9961   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
 9962 
 9963   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9964   ins_encode %{
 9965     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9966   %}
 9967   ins_pipe(ialu_reg_reg);
 9968 %}
 9969 
 9970 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9971 %{
 9972   predicate(!UseAPX);
 9973   match(Set dst (AddI dst src));
 9974   effect(KILL cr);
 9975   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9976 
 9977   format %{ "addl    $dst, $src\t# int" %}
 9978   ins_encode %{
 9979     __ addl($dst$$Register, $src$$constant);
 9980   %}
 9981   ins_pipe( ialu_reg );
 9982 %}
 9983 
 9984 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9985 %{
 9986   predicate(UseAPX);
 9987   match(Set dst (AddI src1 src2));
 9988   effect(KILL cr);
 9989   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
 9990 
 9991   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9992   ins_encode %{
 9993     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9994   %}
 9995   ins_pipe( ialu_reg );
 9996 %}
 9997 
 9998 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9999 %{
10000   predicate(UseAPX);
10001   match(Set dst (AddI (LoadI src1) src2));
10002   effect(KILL cr);
10003   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10004 
10005   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10006   ins_encode %{
10007     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10008   %}
10009   ins_pipe( ialu_reg );
10010 %}
10011 
10012 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10013 %{
10014   predicate(!UseAPX);
10015   match(Set dst (AddI dst (LoadI src)));
10016   effect(KILL cr);
10017   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10018 
10019   ins_cost(150); // XXX
10020   format %{ "addl    $dst, $src\t# int" %}
10021   ins_encode %{
10022     __ addl($dst$$Register, $src$$Address);
10023   %}
10024   ins_pipe(ialu_reg_mem);
10025 %}
10026 
10027 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10028 %{
10029   predicate(UseAPX);
10030   match(Set dst (AddI src1 (LoadI src2)));
10031   effect(KILL cr);
10032   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10033 
10034   ins_cost(150);
10035   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10036   ins_encode %{
10037     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10038   %}
10039   ins_pipe(ialu_reg_mem);
10040 %}
10041 
10042 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10043 %{
10044   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10045   effect(KILL cr);
10046   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047 
10048   ins_cost(150); // XXX
10049   format %{ "addl    $dst, $src\t# int" %}
10050   ins_encode %{
10051     __ addl($dst$$Address, $src$$Register);
10052   %}
10053   ins_pipe(ialu_mem_reg);
10054 %}
10055 
10056 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10057 %{
10058   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10059   effect(KILL cr);
10060   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10061 
10062 
10063   ins_cost(125); // XXX
10064   format %{ "addl    $dst, $src\t# int" %}
10065   ins_encode %{
10066     __ addl($dst$$Address, $src$$constant);
10067   %}
10068   ins_pipe(ialu_mem_imm);
10069 %}
10070 
10071 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10072 %{
10073   predicate(!UseAPX && UseIncDec);
10074   match(Set dst (AddI dst src));
10075   effect(KILL cr);
10076 
10077   format %{ "incl    $dst\t# int" %}
10078   ins_encode %{
10079     __ incrementl($dst$$Register);
10080   %}
10081   ins_pipe(ialu_reg);
10082 %}
10083 
10084 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10085 %{
10086   predicate(UseAPX && UseIncDec);
10087   match(Set dst (AddI src val));
10088   effect(KILL cr);
10089   flag(PD::Flag_ndd_demotable);
10090 
10091   format %{ "eincl    $dst, $src\t# int ndd" %}
10092   ins_encode %{
10093     __ eincl($dst$$Register, $src$$Register, false);
10094   %}
10095   ins_pipe(ialu_reg);
10096 %}
10097 
10098 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10099 %{
10100   predicate(UseAPX && UseIncDec);
10101   match(Set dst (AddI (LoadI src) val));
10102   effect(KILL cr);
10103 
10104   format %{ "eincl    $dst, $src\t# int ndd" %}
10105   ins_encode %{
10106     __ eincl($dst$$Register, $src$$Address, false);
10107   %}
10108   ins_pipe(ialu_reg);
10109 %}
10110 
10111 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10112 %{
10113   predicate(UseIncDec);
10114   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10115   effect(KILL cr);
10116 
10117   ins_cost(125); // XXX
10118   format %{ "incl    $dst\t# int" %}
10119   ins_encode %{
10120     __ incrementl($dst$$Address);
10121   %}
10122   ins_pipe(ialu_mem_imm);
10123 %}
10124 
10125 // XXX why does that use AddI
10126 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10127 %{
10128   predicate(!UseAPX && UseIncDec);
10129   match(Set dst (AddI dst src));
10130   effect(KILL cr);
10131 
10132   format %{ "decl    $dst\t# int" %}
10133   ins_encode %{
10134     __ decrementl($dst$$Register);
10135   %}
10136   ins_pipe(ialu_reg);
10137 %}
10138 
10139 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10140 %{
10141   predicate(UseAPX && UseIncDec);
10142   match(Set dst (AddI src val));
10143   effect(KILL cr);
10144   flag(PD::Flag_ndd_demotable);
10145 
10146   format %{ "edecl    $dst, $src\t# int ndd" %}
10147   ins_encode %{
10148     __ edecl($dst$$Register, $src$$Register, false);
10149   %}
10150   ins_pipe(ialu_reg);
10151 %}
10152 
10153 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10154 %{
10155   predicate(UseAPX && UseIncDec);
10156   match(Set dst (AddI (LoadI src) val));
10157   effect(KILL cr);
10158 
10159   format %{ "edecl    $dst, $src\t# int ndd" %}
10160   ins_encode %{
10161     __ edecl($dst$$Register, $src$$Address, false);
10162   %}
10163   ins_pipe(ialu_reg);
10164 %}
10165 
10166 // XXX why does that use AddI
10167 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10168 %{
10169   predicate(UseIncDec);
10170   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10171   effect(KILL cr);
10172 
10173   ins_cost(125); // XXX
10174   format %{ "decl    $dst\t# int" %}
10175   ins_encode %{
10176     __ decrementl($dst$$Address);
10177   %}
10178   ins_pipe(ialu_mem_imm);
10179 %}
10180 
10181 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10182 %{
10183   predicate(VM_Version::supports_fast_2op_lea());
10184   match(Set dst (AddI (LShiftI index scale) disp));
10185 
10186   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10187   ins_encode %{
10188     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10189     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10190   %}
10191   ins_pipe(ialu_reg_reg);
10192 %}
10193 
10194 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10195 %{
10196   predicate(VM_Version::supports_fast_3op_lea());
10197   match(Set dst (AddI (AddI base index) disp));
10198 
10199   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10200   ins_encode %{
10201     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10202   %}
10203   ins_pipe(ialu_reg_reg);
10204 %}
10205 
10206 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10207 %{
10208   predicate(VM_Version::supports_fast_2op_lea());
10209   match(Set dst (AddI base (LShiftI index scale)));
10210 
10211   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10212   ins_encode %{
10213     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10215   %}
10216   ins_pipe(ialu_reg_reg);
10217 %}
10218 
10219 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10220 %{
10221   predicate(VM_Version::supports_fast_3op_lea());
10222   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10223 
10224   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10225   ins_encode %{
10226     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10227     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10228   %}
10229   ins_pipe(ialu_reg_reg);
10230 %}
10231 
10232 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10233 %{
10234   predicate(!UseAPX);
10235   match(Set dst (AddL dst src));
10236   effect(KILL cr);
10237   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10238 
10239   format %{ "addq    $dst, $src\t# long" %}
10240   ins_encode %{
10241     __ addq($dst$$Register, $src$$Register);
10242   %}
10243   ins_pipe(ialu_reg_reg);
10244 %}
10245 
10246 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10247 %{
10248   predicate(UseAPX);
10249   match(Set dst (AddL src1 src2));
10250   effect(KILL cr);
10251   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10252 
10253   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10254   ins_encode %{
10255     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10256   %}
10257   ins_pipe(ialu_reg_reg);
10258 %}
10259 
10260 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10261 %{
10262   predicate(!UseAPX);
10263   match(Set dst (AddL dst src));
10264   effect(KILL cr);
10265   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10266 
10267   format %{ "addq    $dst, $src\t# long" %}
10268   ins_encode %{
10269     __ addq($dst$$Register, $src$$constant);
10270   %}
10271   ins_pipe( ialu_reg );
10272 %}
10273 
10274 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10275 %{
10276   predicate(UseAPX);
10277   match(Set dst (AddL src1 src2));
10278   effect(KILL cr);
10279   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
10280 
10281   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10282   ins_encode %{
10283     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10284   %}
10285   ins_pipe( ialu_reg );
10286 %}
10287 
10288 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10289 %{
10290   predicate(UseAPX);
10291   match(Set dst (AddL (LoadL src1) src2));
10292   effect(KILL cr);
10293   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10294 
10295   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10296   ins_encode %{
10297     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10298   %}
10299   ins_pipe( ialu_reg );
10300 %}
10301 
10302 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10303 %{
10304   predicate(!UseAPX);
10305   match(Set dst (AddL dst (LoadL src)));
10306   effect(KILL cr);
10307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308 
10309   ins_cost(150); // XXX
10310   format %{ "addq    $dst, $src\t# long" %}
10311   ins_encode %{
10312     __ addq($dst$$Register, $src$$Address);
10313   %}
10314   ins_pipe(ialu_reg_mem);
10315 %}
10316 
10317 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10318 %{
10319   predicate(UseAPX);
10320   match(Set dst (AddL src1 (LoadL src2)));
10321   effect(KILL cr);
10322   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10323 
10324   ins_cost(150);
10325   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10326   ins_encode %{
10327     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10328   %}
10329   ins_pipe(ialu_reg_mem);
10330 %}
10331 
10332 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10333 %{
10334   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10335   effect(KILL cr);
10336   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337 
10338   ins_cost(150); // XXX
10339   format %{ "addq    $dst, $src\t# long" %}
10340   ins_encode %{
10341     __ addq($dst$$Address, $src$$Register);
10342   %}
10343   ins_pipe(ialu_mem_reg);
10344 %}
10345 
10346 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10347 %{
10348   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10349   effect(KILL cr);
10350   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10351 
10352   ins_cost(125); // XXX
10353   format %{ "addq    $dst, $src\t# long" %}
10354   ins_encode %{
10355     __ addq($dst$$Address, $src$$constant);
10356   %}
10357   ins_pipe(ialu_mem_imm);
10358 %}
10359 
10360 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10361 %{
10362   predicate(!UseAPX && UseIncDec);
10363   match(Set dst (AddL dst src));
10364   effect(KILL cr);
10365 
10366   format %{ "incq    $dst\t# long" %}
10367   ins_encode %{
10368     __ incrementq($dst$$Register);
10369   %}
10370   ins_pipe(ialu_reg);
10371 %}
10372 
10373 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10374 %{
10375   predicate(UseAPX && UseIncDec);
10376   match(Set dst (AddL src val));
10377   effect(KILL cr);
10378   flag(PD::Flag_ndd_demotable);
10379 
10380   format %{ "eincq    $dst, $src\t# long ndd" %}
10381   ins_encode %{
10382     __ eincq($dst$$Register, $src$$Register, false);
10383   %}
10384   ins_pipe(ialu_reg);
10385 %}
10386 
10387 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10388 %{
10389   predicate(UseAPX && UseIncDec);
10390   match(Set dst (AddL (LoadL src) val));
10391   effect(KILL cr);
10392 
10393   format %{ "eincq    $dst, $src\t# long ndd" %}
10394   ins_encode %{
10395     __ eincq($dst$$Register, $src$$Address, false);
10396   %}
10397   ins_pipe(ialu_reg);
10398 %}
10399 
10400 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10401 %{
10402   predicate(UseIncDec);
10403   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10404   effect(KILL cr);
10405 
10406   ins_cost(125); // XXX
10407   format %{ "incq    $dst\t# long" %}
10408   ins_encode %{
10409     __ incrementq($dst$$Address);
10410   %}
10411   ins_pipe(ialu_mem_imm);
10412 %}
10413 
10414 // XXX why does that use AddL
10415 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10416 %{
10417   predicate(!UseAPX && UseIncDec);
10418   match(Set dst (AddL dst src));
10419   effect(KILL cr);
10420 
10421   format %{ "decq    $dst\t# long" %}
10422   ins_encode %{
10423     __ decrementq($dst$$Register);
10424   %}
10425   ins_pipe(ialu_reg);
10426 %}
10427 
10428 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10429 %{
10430   predicate(UseAPX && UseIncDec);
10431   match(Set dst (AddL src val));
10432   effect(KILL cr);
10433   flag(PD::Flag_ndd_demotable);
10434 
10435   format %{ "edecq    $dst, $src\t# long ndd" %}
10436   ins_encode %{
10437     __ edecq($dst$$Register, $src$$Register, false);
10438   %}
10439   ins_pipe(ialu_reg);
10440 %}
10441 
10442 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10443 %{
10444   predicate(UseAPX && UseIncDec);
10445   match(Set dst (AddL (LoadL src) val));
10446   effect(KILL cr);
10447 
10448   format %{ "edecq    $dst, $src\t# long ndd" %}
10449   ins_encode %{
10450     __ edecq($dst$$Register, $src$$Address, false);
10451   %}
10452   ins_pipe(ialu_reg);
10453 %}
10454 
10455 // XXX why does that use AddL
10456 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10457 %{
10458   predicate(UseIncDec);
10459   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10460   effect(KILL cr);
10461 
10462   ins_cost(125); // XXX
10463   format %{ "decq    $dst\t# long" %}
10464   ins_encode %{
10465     __ decrementq($dst$$Address);
10466   %}
10467   ins_pipe(ialu_mem_imm);
10468 %}
10469 
10470 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10471 %{
10472   predicate(VM_Version::supports_fast_2op_lea());
10473   match(Set dst (AddL (LShiftL index scale) disp));
10474 
10475   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10476   ins_encode %{
10477     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10478     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10479   %}
10480   ins_pipe(ialu_reg_reg);
10481 %}
10482 
10483 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10484 %{
10485   predicate(VM_Version::supports_fast_3op_lea());
10486   match(Set dst (AddL (AddL base index) disp));
10487 
10488   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10489   ins_encode %{
10490     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10491   %}
10492   ins_pipe(ialu_reg_reg);
10493 %}
10494 
10495 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10496 %{
10497   predicate(VM_Version::supports_fast_2op_lea());
10498   match(Set dst (AddL base (LShiftL index scale)));
10499 
10500   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10501   ins_encode %{
10502     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10504   %}
10505   ins_pipe(ialu_reg_reg);
10506 %}
10507 
10508 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10509 %{
10510   predicate(VM_Version::supports_fast_3op_lea());
10511   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10512 
10513   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10514   ins_encode %{
10515     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10516     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10517   %}
10518   ins_pipe(ialu_reg_reg);
10519 %}
10520 
10521 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10522 %{
10523   match(Set dst (AddP dst src));
10524   effect(KILL cr);
10525   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526 
10527   format %{ "addq    $dst, $src\t# ptr" %}
10528   ins_encode %{
10529     __ addq($dst$$Register, $src$$Register);
10530   %}
10531   ins_pipe(ialu_reg_reg);
10532 %}
10533 
10534 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10535 %{
10536   match(Set dst (AddP dst src));
10537   effect(KILL cr);
10538   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10539 
10540   format %{ "addq    $dst, $src\t# ptr" %}
10541   ins_encode %{
10542     __ addq($dst$$Register, $src$$constant);
10543   %}
10544   ins_pipe( ialu_reg );
10545 %}
10546 
10547 // XXX addP mem ops ????
10548 
10549 instruct checkCastPP(rRegP dst)
10550 %{
10551   match(Set dst (CheckCastPP dst));
10552 
10553   size(0);
10554   format %{ "# checkcastPP of $dst" %}
10555   ins_encode(/* empty encoding */);
10556   ins_pipe(empty);
10557 %}
10558 
10559 instruct castPP(rRegP dst)
10560 %{
10561   match(Set dst (CastPP dst));
10562 
10563   size(0);
10564   format %{ "# castPP of $dst" %}
10565   ins_encode(/* empty encoding */);
10566   ins_pipe(empty);
10567 %}
10568 
10569 instruct castII(rRegI dst)
10570 %{
10571   predicate(VerifyConstraintCasts == 0);
10572   match(Set dst (CastII dst));
10573 
10574   size(0);
10575   format %{ "# castII of $dst" %}
10576   ins_encode(/* empty encoding */);
10577   ins_cost(0);
10578   ins_pipe(empty);
10579 %}
10580 
10581 instruct castII_checked(rRegI dst, rFlagsReg cr)
10582 %{
10583   predicate(VerifyConstraintCasts > 0);
10584   match(Set dst (CastII dst));
10585 
10586   effect(KILL cr);
10587   format %{ "# cast_checked_II $dst" %}
10588   ins_encode %{
10589     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10590   %}
10591   ins_pipe(pipe_slow);
10592 %}
10593 
10594 instruct castLL(rRegL dst)
10595 %{
10596   predicate(VerifyConstraintCasts == 0);
10597   match(Set dst (CastLL dst));
10598 
10599   size(0);
10600   format %{ "# castLL of $dst" %}
10601   ins_encode(/* empty encoding */);
10602   ins_cost(0);
10603   ins_pipe(empty);
10604 %}
10605 
10606 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10607 %{
10608   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10609   match(Set dst (CastLL dst));
10610 
10611   effect(KILL cr);
10612   format %{ "# cast_checked_LL $dst" %}
10613   ins_encode %{
10614     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10615   %}
10616   ins_pipe(pipe_slow);
10617 %}
10618 
10619 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10620 %{
10621   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10622   match(Set dst (CastLL dst));
10623 
10624   effect(KILL cr, TEMP tmp);
10625   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10626   ins_encode %{
10627     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10628   %}
10629   ins_pipe(pipe_slow);
10630 %}
10631 
10632 instruct castFF(regF dst)
10633 %{
10634   match(Set dst (CastFF dst));
10635 
10636   size(0);
10637   format %{ "# castFF of $dst" %}
10638   ins_encode(/* empty encoding */);
10639   ins_cost(0);
10640   ins_pipe(empty);
10641 %}
10642 
10643 instruct castHH(regF dst)
10644 %{
10645   match(Set dst (CastHH dst));
10646 
10647   size(0);
10648   format %{ "# castHH of $dst" %}
10649   ins_encode(/* empty encoding */);
10650   ins_cost(0);
10651   ins_pipe(empty);
10652 %}
10653 
10654 instruct castDD(regD dst)
10655 %{
10656   match(Set dst (CastDD dst));
10657 
10658   size(0);
10659   format %{ "# castDD of $dst" %}
10660   ins_encode(/* empty encoding */);
10661   ins_cost(0);
10662   ins_pipe(empty);
10663 %}
10664 
10665 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10666 instruct compareAndSwapP(rRegI res,
10667                          memory mem_ptr,
10668                          rax_RegP oldval, rRegP newval,
10669                          rFlagsReg cr)
10670 %{
10671   predicate(n->as_LoadStore()->barrier_data() == 0);
10672   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10673   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10674   effect(KILL cr, KILL oldval);
10675 
10676   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10677             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679   ins_encode %{
10680     __ lock();
10681     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10682     __ setcc(Assembler::equal, $res$$Register);
10683   %}
10684   ins_pipe( pipe_cmpxchg );
10685 %}
10686 
10687 instruct compareAndSwapL(rRegI res,
10688                          memory mem_ptr,
10689                          rax_RegL oldval, rRegL newval,
10690                          rFlagsReg cr)
10691 %{
10692   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10693   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10694   effect(KILL cr, KILL oldval);
10695 
10696   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10697             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699   ins_encode %{
10700     __ lock();
10701     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10702     __ setcc(Assembler::equal, $res$$Register);
10703   %}
10704   ins_pipe( pipe_cmpxchg );
10705 %}
10706 
10707 instruct compareAndSwapI(rRegI res,
10708                          memory mem_ptr,
10709                          rax_RegI oldval, rRegI newval,
10710                          rFlagsReg cr)
10711 %{
10712   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10713   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10714   effect(KILL cr, KILL oldval);
10715 
10716   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10717             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719   ins_encode %{
10720     __ lock();
10721     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10722     __ setcc(Assembler::equal, $res$$Register);
10723   %}
10724   ins_pipe( pipe_cmpxchg );
10725 %}
10726 
10727 instruct compareAndSwapB(rRegI res,
10728                          memory mem_ptr,
10729                          rax_RegI oldval, rRegI newval,
10730                          rFlagsReg cr)
10731 %{
10732   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10733   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10734   effect(KILL cr, KILL oldval);
10735 
10736   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10737             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10738             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10739   ins_encode %{
10740     __ lock();
10741     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10742     __ setcc(Assembler::equal, $res$$Register);
10743   %}
10744   ins_pipe( pipe_cmpxchg );
10745 %}
10746 
10747 instruct compareAndSwapS(rRegI res,
10748                          memory mem_ptr,
10749                          rax_RegI oldval, rRegI newval,
10750                          rFlagsReg cr)
10751 %{
10752   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10753   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10754   effect(KILL cr, KILL oldval);
10755 
10756   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10757             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10758             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10759   ins_encode %{
10760     __ lock();
10761     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10762     __ setcc(Assembler::equal, $res$$Register);
10763   %}
10764   ins_pipe( pipe_cmpxchg );
10765 %}
10766 
10767 instruct compareAndSwapN(rRegI res,
10768                           memory mem_ptr,
10769                           rax_RegN oldval, rRegN newval,
10770                           rFlagsReg cr) %{
10771   predicate(n->as_LoadStore()->barrier_data() == 0);
10772   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10773   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10774   effect(KILL cr, KILL oldval);
10775 
10776   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10777             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10778             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10779   ins_encode %{
10780     __ lock();
10781     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10782     __ setcc(Assembler::equal, $res$$Register);
10783   %}
10784   ins_pipe( pipe_cmpxchg );
10785 %}
10786 
10787 instruct compareAndExchangeB(
10788                          memory mem_ptr,
10789                          rax_RegI oldval, rRegI newval,
10790                          rFlagsReg cr)
10791 %{
10792   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10793   effect(KILL cr);
10794 
10795   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10796             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10797   ins_encode %{
10798     __ lock();
10799     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10800   %}
10801   ins_pipe( pipe_cmpxchg );
10802 %}
10803 
10804 instruct compareAndExchangeS(
10805                          memory mem_ptr,
10806                          rax_RegI oldval, rRegI newval,
10807                          rFlagsReg cr)
10808 %{
10809   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10810   effect(KILL cr);
10811 
10812   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10813             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10814   ins_encode %{
10815     __ lock();
10816     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10817   %}
10818   ins_pipe( pipe_cmpxchg );
10819 %}
10820 
10821 instruct compareAndExchangeI(
10822                          memory mem_ptr,
10823                          rax_RegI oldval, rRegI newval,
10824                          rFlagsReg cr)
10825 %{
10826   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10827   effect(KILL cr);
10828 
10829   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10830             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10831   ins_encode %{
10832     __ lock();
10833     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10834   %}
10835   ins_pipe( pipe_cmpxchg );
10836 %}
10837 
10838 instruct compareAndExchangeL(
10839                          memory mem_ptr,
10840                          rax_RegL oldval, rRegL newval,
10841                          rFlagsReg cr)
10842 %{
10843   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10844   effect(KILL cr);
10845 
10846   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10847             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10848   ins_encode %{
10849     __ lock();
10850     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10851   %}
10852   ins_pipe( pipe_cmpxchg );
10853 %}
10854 
10855 instruct compareAndExchangeN(
10856                           memory mem_ptr,
10857                           rax_RegN oldval, rRegN newval,
10858                           rFlagsReg cr) %{
10859   predicate(n->as_LoadStore()->barrier_data() == 0);
10860   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10861   effect(KILL cr);
10862 
10863   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10864             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10865   ins_encode %{
10866     __ lock();
10867     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10868   %}
10869   ins_pipe( pipe_cmpxchg );
10870 %}
10871 
10872 instruct compareAndExchangeP(
10873                          memory mem_ptr,
10874                          rax_RegP oldval, rRegP newval,
10875                          rFlagsReg cr)
10876 %{
10877   predicate(n->as_LoadStore()->barrier_data() == 0);
10878   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10879   effect(KILL cr);
10880 
10881   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10882             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10883   ins_encode %{
10884     __ lock();
10885     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10886   %}
10887   ins_pipe( pipe_cmpxchg );
10888 %}
10889 
10890 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10891   predicate(n->as_LoadStore()->result_not_used());
10892   match(Set dummy (GetAndAddB mem add));
10893   effect(KILL cr);
10894   format %{ "addb_lock   $mem, $add" %}
10895   ins_encode %{
10896     __ lock();
10897     __ addb($mem$$Address, $add$$Register);
10898   %}
10899   ins_pipe(pipe_cmpxchg);
10900 %}
10901 
10902 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10903   predicate(n->as_LoadStore()->result_not_used());
10904   match(Set dummy (GetAndAddB mem add));
10905   effect(KILL cr);
10906   format %{ "addb_lock   $mem, $add" %}
10907   ins_encode %{
10908     __ lock();
10909     __ addb($mem$$Address, $add$$constant);
10910   %}
10911   ins_pipe(pipe_cmpxchg);
10912 %}
10913 
10914 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10915   predicate(!n->as_LoadStore()->result_not_used());
10916   match(Set newval (GetAndAddB mem newval));
10917   effect(KILL cr);
10918   format %{ "xaddb_lock  $mem, $newval" %}
10919   ins_encode %{
10920     __ lock();
10921     __ xaddb($mem$$Address, $newval$$Register);
10922   %}
10923   ins_pipe(pipe_cmpxchg);
10924 %}
10925 
10926 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10927   predicate(n->as_LoadStore()->result_not_used());
10928   match(Set dummy (GetAndAddS mem add));
10929   effect(KILL cr);
10930   format %{ "addw_lock   $mem, $add" %}
10931   ins_encode %{
10932     __ lock();
10933     __ addw($mem$$Address, $add$$Register);
10934   %}
10935   ins_pipe(pipe_cmpxchg);
10936 %}
10937 
10938 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10939   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10940   match(Set dummy (GetAndAddS mem add));
10941   effect(KILL cr);
10942   format %{ "addw_lock   $mem, $add" %}
10943   ins_encode %{
10944     __ lock();
10945     __ addw($mem$$Address, $add$$constant);
10946   %}
10947   ins_pipe(pipe_cmpxchg);
10948 %}
10949 
10950 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10951   predicate(!n->as_LoadStore()->result_not_used());
10952   match(Set newval (GetAndAddS mem newval));
10953   effect(KILL cr);
10954   format %{ "xaddw_lock  $mem, $newval" %}
10955   ins_encode %{
10956     __ lock();
10957     __ xaddw($mem$$Address, $newval$$Register);
10958   %}
10959   ins_pipe(pipe_cmpxchg);
10960 %}
10961 
10962 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10963   predicate(n->as_LoadStore()->result_not_used());
10964   match(Set dummy (GetAndAddI mem add));
10965   effect(KILL cr);
10966   format %{ "addl_lock   $mem, $add" %}
10967   ins_encode %{
10968     __ lock();
10969     __ addl($mem$$Address, $add$$Register);
10970   %}
10971   ins_pipe(pipe_cmpxchg);
10972 %}
10973 
10974 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10975   predicate(n->as_LoadStore()->result_not_used());
10976   match(Set dummy (GetAndAddI mem add));
10977   effect(KILL cr);
10978   format %{ "addl_lock   $mem, $add" %}
10979   ins_encode %{
10980     __ lock();
10981     __ addl($mem$$Address, $add$$constant);
10982   %}
10983   ins_pipe(pipe_cmpxchg);
10984 %}
10985 
10986 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10987   predicate(!n->as_LoadStore()->result_not_used());
10988   match(Set newval (GetAndAddI mem newval));
10989   effect(KILL cr);
10990   format %{ "xaddl_lock  $mem, $newval" %}
10991   ins_encode %{
10992     __ lock();
10993     __ xaddl($mem$$Address, $newval$$Register);
10994   %}
10995   ins_pipe(pipe_cmpxchg);
10996 %}
10997 
10998 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10999   predicate(n->as_LoadStore()->result_not_used());
11000   match(Set dummy (GetAndAddL mem add));
11001   effect(KILL cr);
11002   format %{ "addq_lock   $mem, $add" %}
11003   ins_encode %{
11004     __ lock();
11005     __ addq($mem$$Address, $add$$Register);
11006   %}
11007   ins_pipe(pipe_cmpxchg);
11008 %}
11009 
11010 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11011   predicate(n->as_LoadStore()->result_not_used());
11012   match(Set dummy (GetAndAddL mem add));
11013   effect(KILL cr);
11014   format %{ "addq_lock   $mem, $add" %}
11015   ins_encode %{
11016     __ lock();
11017     __ addq($mem$$Address, $add$$constant);
11018   %}
11019   ins_pipe(pipe_cmpxchg);
11020 %}
11021 
11022 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11023   predicate(!n->as_LoadStore()->result_not_used());
11024   match(Set newval (GetAndAddL mem newval));
11025   effect(KILL cr);
11026   format %{ "xaddq_lock  $mem, $newval" %}
11027   ins_encode %{
11028     __ lock();
11029     __ xaddq($mem$$Address, $newval$$Register);
11030   %}
11031   ins_pipe(pipe_cmpxchg);
11032 %}
11033 
11034 instruct xchgB( memory mem, rRegI newval) %{
11035   match(Set newval (GetAndSetB mem newval));
11036   format %{ "XCHGB  $newval,[$mem]" %}
11037   ins_encode %{
11038     __ xchgb($newval$$Register, $mem$$Address);
11039   %}
11040   ins_pipe( pipe_cmpxchg );
11041 %}
11042 
11043 instruct xchgS( memory mem, rRegI newval) %{
11044   match(Set newval (GetAndSetS mem newval));
11045   format %{ "XCHGW  $newval,[$mem]" %}
11046   ins_encode %{
11047     __ xchgw($newval$$Register, $mem$$Address);
11048   %}
11049   ins_pipe( pipe_cmpxchg );
11050 %}
11051 
11052 instruct xchgI( memory mem, rRegI newval) %{
11053   match(Set newval (GetAndSetI mem newval));
11054   format %{ "XCHGL  $newval,[$mem]" %}
11055   ins_encode %{
11056     __ xchgl($newval$$Register, $mem$$Address);
11057   %}
11058   ins_pipe( pipe_cmpxchg );
11059 %}
11060 
11061 instruct xchgL( memory mem, rRegL newval) %{
11062   match(Set newval (GetAndSetL mem newval));
11063   format %{ "XCHGL  $newval,[$mem]" %}
11064   ins_encode %{
11065     __ xchgq($newval$$Register, $mem$$Address);
11066   %}
11067   ins_pipe( pipe_cmpxchg );
11068 %}
11069 
11070 instruct xchgP( memory mem, rRegP newval) %{
11071   match(Set newval (GetAndSetP mem newval));
11072   predicate(n->as_LoadStore()->barrier_data() == 0);
11073   format %{ "XCHGQ  $newval,[$mem]" %}
11074   ins_encode %{
11075     __ xchgq($newval$$Register, $mem$$Address);
11076   %}
11077   ins_pipe( pipe_cmpxchg );
11078 %}
11079 
11080 instruct xchgN( memory mem, rRegN newval) %{
11081   predicate(n->as_LoadStore()->barrier_data() == 0);
11082   match(Set newval (GetAndSetN mem newval));
11083   format %{ "XCHGL  $newval,$mem]" %}
11084   ins_encode %{
11085     __ xchgl($newval$$Register, $mem$$Address);
11086   %}
11087   ins_pipe( pipe_cmpxchg );
11088 %}
11089 
11090 //----------Abs Instructions-------------------------------------------
11091 
11092 // Integer Absolute Instructions
11093 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11094 %{
11095   match(Set dst (AbsI src));
11096   effect(TEMP dst, KILL cr);
11097   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11098             "subl    $dst, $src\n\t"
11099             "cmovll  $dst, $src" %}
11100   ins_encode %{
11101     __ xorl($dst$$Register, $dst$$Register);
11102     __ subl($dst$$Register, $src$$Register);
11103     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11104   %}
11105 
11106   ins_pipe(ialu_reg_reg);
11107 %}
11108 
11109 // Long Absolute Instructions
11110 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11111 %{
11112   match(Set dst (AbsL src));
11113   effect(TEMP dst, KILL cr);
11114   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11115             "subq    $dst, $src\n\t"
11116             "cmovlq  $dst, $src" %}
11117   ins_encode %{
11118     __ xorl($dst$$Register, $dst$$Register);
11119     __ subq($dst$$Register, $src$$Register);
11120     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11121   %}
11122 
11123   ins_pipe(ialu_reg_reg);
11124 %}
11125 
11126 //----------Subtraction Instructions-------------------------------------------
11127 
11128 // Integer Subtraction Instructions
11129 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11130 %{
11131   predicate(!UseAPX);
11132   match(Set dst (SubI dst src));
11133   effect(KILL cr);
11134   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11135 
11136   format %{ "subl    $dst, $src\t# int" %}
11137   ins_encode %{
11138     __ subl($dst$$Register, $src$$Register);
11139   %}
11140   ins_pipe(ialu_reg_reg);
11141 %}
11142 
11143 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11144 %{
11145   predicate(UseAPX);
11146   match(Set dst (SubI src1 src2));
11147   effect(KILL cr);
11148   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11149 
11150   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11151   ins_encode %{
11152     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11153   %}
11154   ins_pipe(ialu_reg_reg);
11155 %}
11156 
11157 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11158 %{
11159   predicate(UseAPX);
11160   match(Set dst (SubI src1 src2));
11161   effect(KILL cr);
11162   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11163 
11164   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11165   ins_encode %{
11166     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11167   %}
11168   ins_pipe(ialu_reg_reg);
11169 %}
11170 
11171 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11172 %{
11173   predicate(UseAPX);
11174   match(Set dst (SubI (LoadI src1) src2));
11175   effect(KILL cr);
11176   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11177 
11178   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11179   ins_encode %{
11180     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11181   %}
11182   ins_pipe(ialu_reg_reg);
11183 %}
11184 
11185 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11186 %{
11187   predicate(!UseAPX);
11188   match(Set dst (SubI dst (LoadI src)));
11189   effect(KILL cr);
11190   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11191 
11192   ins_cost(150);
11193   format %{ "subl    $dst, $src\t# int" %}
11194   ins_encode %{
11195     __ subl($dst$$Register, $src$$Address);
11196   %}
11197   ins_pipe(ialu_reg_mem);
11198 %}
11199 
11200 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11201 %{
11202   predicate(UseAPX);
11203   match(Set dst (SubI src1 (LoadI src2)));
11204   effect(KILL cr);
11205   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11206 
11207   ins_cost(150);
11208   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11209   ins_encode %{
11210     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11211   %}
11212   ins_pipe(ialu_reg_mem);
11213 %}
11214 
11215 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11216 %{
11217   predicate(UseAPX);
11218   match(Set dst (SubI (LoadI src1) src2));
11219   effect(KILL cr);
11220   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11221 
11222   ins_cost(150);
11223   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11224   ins_encode %{
11225     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11226   %}
11227   ins_pipe(ialu_reg_mem);
11228 %}
11229 
11230 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11231 %{
11232   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11233   effect(KILL cr);
11234   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11235 
11236   ins_cost(150);
11237   format %{ "subl    $dst, $src\t# int" %}
11238   ins_encode %{
11239     __ subl($dst$$Address, $src$$Register);
11240   %}
11241   ins_pipe(ialu_mem_reg);
11242 %}
11243 
11244 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11245 %{
11246   predicate(!UseAPX);
11247   match(Set dst (SubL dst src));
11248   effect(KILL cr);
11249   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11250 
11251   format %{ "subq    $dst, $src\t# long" %}
11252   ins_encode %{
11253     __ subq($dst$$Register, $src$$Register);
11254   %}
11255   ins_pipe(ialu_reg_reg);
11256 %}
11257 
11258 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11259 %{
11260   predicate(UseAPX);
11261   match(Set dst (SubL src1 src2));
11262   effect(KILL cr);
11263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11264 
11265   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11266   ins_encode %{
11267     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11268   %}
11269   ins_pipe(ialu_reg_reg);
11270 %}
11271 
11272 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11273 %{
11274   predicate(UseAPX);
11275   match(Set dst (SubL src1 src2));
11276   effect(KILL cr);
11277   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11278 
11279   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11280   ins_encode %{
11281     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11282   %}
11283   ins_pipe(ialu_reg_reg);
11284 %}
11285 
11286 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11287 %{
11288   predicate(UseAPX);
11289   match(Set dst (SubL (LoadL src1) src2));
11290   effect(KILL cr);
11291   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11292 
11293   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11294   ins_encode %{
11295     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11296   %}
11297   ins_pipe(ialu_reg_reg);
11298 %}
11299 
11300 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11301 %{
11302   predicate(!UseAPX);
11303   match(Set dst (SubL dst (LoadL src)));
11304   effect(KILL cr);
11305   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11306 
11307   ins_cost(150);
11308   format %{ "subq    $dst, $src\t# long" %}
11309   ins_encode %{
11310     __ subq($dst$$Register, $src$$Address);
11311   %}
11312   ins_pipe(ialu_reg_mem);
11313 %}
11314 
11315 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11316 %{
11317   predicate(UseAPX);
11318   match(Set dst (SubL src1 (LoadL src2)));
11319   effect(KILL cr);
11320   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11321 
11322   ins_cost(150);
11323   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11324   ins_encode %{
11325     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11326   %}
11327   ins_pipe(ialu_reg_mem);
11328 %}
11329 
11330 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11331 %{
11332   predicate(UseAPX);
11333   match(Set dst (SubL (LoadL src1) src2));
11334   effect(KILL cr);
11335   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11336 
11337   ins_cost(150);
11338   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11339   ins_encode %{
11340     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11341   %}
11342   ins_pipe(ialu_reg_mem);
11343 %}
11344 
11345 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11346 %{
11347   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11348   effect(KILL cr);
11349   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11350 
11351   ins_cost(150);
11352   format %{ "subq    $dst, $src\t# long" %}
11353   ins_encode %{
11354     __ subq($dst$$Address, $src$$Register);
11355   %}
11356   ins_pipe(ialu_mem_reg);
11357 %}
11358 
11359 // Subtract from a pointer
11360 // XXX hmpf???
11361 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11362 %{
11363   match(Set dst (AddP dst (SubI zero src)));
11364   effect(KILL cr);
11365 
11366   format %{ "subq    $dst, $src\t# ptr - int" %}
11367   ins_encode %{
11368     __ subq($dst$$Register, $src$$Register);
11369   %}
11370   ins_pipe(ialu_reg_reg);
11371 %}
11372 
11373 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11374 %{
11375   predicate(!UseAPX);
11376   match(Set dst (SubI zero dst));
11377   effect(KILL cr);
11378   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11379 
11380   format %{ "negl    $dst\t# int" %}
11381   ins_encode %{
11382     __ negl($dst$$Register);
11383   %}
11384   ins_pipe(ialu_reg);
11385 %}
11386 
11387 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11388 %{
11389   predicate(UseAPX);
11390   match(Set dst (SubI zero src));
11391   effect(KILL cr);
11392   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11393 
11394   format %{ "enegl    $dst, $src\t# int ndd" %}
11395   ins_encode %{
11396     __ enegl($dst$$Register, $src$$Register, false);
11397   %}
11398   ins_pipe(ialu_reg);
11399 %}
11400 
11401 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11402 %{
11403   predicate(!UseAPX);
11404   match(Set dst (NegI dst));
11405   effect(KILL cr);
11406   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11407 
11408   format %{ "negl    $dst\t# int" %}
11409   ins_encode %{
11410     __ negl($dst$$Register);
11411   %}
11412   ins_pipe(ialu_reg);
11413 %}
11414 
11415 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11416 %{
11417   predicate(UseAPX);
11418   match(Set dst (NegI src));
11419   effect(KILL cr);
11420   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11421 
11422   format %{ "enegl    $dst, $src\t# int ndd" %}
11423   ins_encode %{
11424     __ enegl($dst$$Register, $src$$Register, false);
11425   %}
11426   ins_pipe(ialu_reg);
11427 %}
11428 
11429 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11430 %{
11431   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11432   effect(KILL cr);
11433   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11434 
11435   format %{ "negl    $dst\t# int" %}
11436   ins_encode %{
11437     __ negl($dst$$Address);
11438   %}
11439   ins_pipe(ialu_reg);
11440 %}
11441 
11442 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11443 %{
11444   predicate(!UseAPX);
11445   match(Set dst (SubL zero dst));
11446   effect(KILL cr);
11447   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11448 
11449   format %{ "negq    $dst\t# long" %}
11450   ins_encode %{
11451     __ negq($dst$$Register);
11452   %}
11453   ins_pipe(ialu_reg);
11454 %}
11455 
11456 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11457 %{
11458   predicate(UseAPX);
11459   match(Set dst (SubL zero src));
11460   effect(KILL cr);
11461   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11462 
11463   format %{ "enegq    $dst, $src\t# long ndd" %}
11464   ins_encode %{
11465     __ enegq($dst$$Register, $src$$Register, false);
11466   %}
11467   ins_pipe(ialu_reg);
11468 %}
11469 
11470 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11471 %{
11472   predicate(!UseAPX);
11473   match(Set dst (NegL dst));
11474   effect(KILL cr);
11475   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11476 
11477   format %{ "negq    $dst\t# int" %}
11478   ins_encode %{
11479     __ negq($dst$$Register);
11480   %}
11481   ins_pipe(ialu_reg);
11482 %}
11483 
11484 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11485 %{
11486   predicate(UseAPX);
11487   match(Set dst (NegL src));
11488   effect(KILL cr);
11489   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11490 
11491   format %{ "enegq    $dst, $src\t# long ndd" %}
11492   ins_encode %{
11493     __ enegq($dst$$Register, $src$$Register, false);
11494   %}
11495   ins_pipe(ialu_reg);
11496 %}
11497 
11498 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11499 %{
11500   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11501   effect(KILL cr);
11502   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11503 
11504   format %{ "negq    $dst\t# long" %}
11505   ins_encode %{
11506     __ negq($dst$$Address);
11507   %}
11508   ins_pipe(ialu_reg);
11509 %}
11510 
11511 //----------Multiplication/Division Instructions-------------------------------
11512 // Integer Multiplication Instructions
11513 // Multiply Register
11514 
11515 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11516 %{
11517   predicate(!UseAPX);
11518   match(Set dst (MulI dst src));
11519   effect(KILL cr);
11520 
11521   ins_cost(300);
11522   format %{ "imull   $dst, $src\t# int" %}
11523   ins_encode %{
11524     __ imull($dst$$Register, $src$$Register);
11525   %}
11526   ins_pipe(ialu_reg_reg_alu0);
11527 %}
11528 
11529 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11530 %{
11531   predicate(UseAPX);
11532   match(Set dst (MulI src1 src2));
11533   effect(KILL cr);
11534   flag(PD::Flag_ndd_demotable_commutative);
11535 
11536   ins_cost(300);
11537   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11538   ins_encode %{
11539     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11540   %}
11541   ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543 
11544 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11545 %{
11546   match(Set dst (MulI src imm));
11547   effect(KILL cr);
11548 
11549   ins_cost(300);
11550   format %{ "imull   $dst, $src, $imm\t# int" %}
11551   ins_encode %{
11552     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11553   %}
11554   ins_pipe(ialu_reg_reg_alu0);
11555 %}
11556 
11557 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11558 %{
11559   predicate(!UseAPX);
11560   match(Set dst (MulI dst (LoadI src)));
11561   effect(KILL cr);
11562 
11563   ins_cost(350);
11564   format %{ "imull   $dst, $src\t# int" %}
11565   ins_encode %{
11566     __ imull($dst$$Register, $src$$Address);
11567   %}
11568   ins_pipe(ialu_reg_mem_alu0);
11569 %}
11570 
11571 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11572 %{
11573   predicate(UseAPX);
11574   match(Set dst (MulI src1 (LoadI src2)));
11575   effect(KILL cr);
11576   flag(PD::Flag_ndd_demotable);
11577 
11578   ins_cost(350);
11579   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11580   ins_encode %{
11581     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11582   %}
11583   ins_pipe(ialu_reg_mem_alu0);
11584 %}
11585 
11586 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11587 %{
11588   match(Set dst (MulI (LoadI src) imm));
11589   effect(KILL cr);
11590 
11591   ins_cost(300);
11592   format %{ "imull   $dst, $src, $imm\t# int" %}
11593   ins_encode %{
11594     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11595   %}
11596   ins_pipe(ialu_reg_mem_alu0);
11597 %}
11598 
11599 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11600 %{
11601   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11602   effect(KILL cr, KILL src2);
11603 
11604   expand %{ mulI_rReg(dst, src1, cr);
11605            mulI_rReg(src2, src3, cr);
11606            addI_rReg(dst, src2, cr); %}
11607 %}
11608 
11609 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11610 %{
11611   predicate(!UseAPX);
11612   match(Set dst (MulL dst src));
11613   effect(KILL cr);
11614 
11615   ins_cost(300);
11616   format %{ "imulq   $dst, $src\t# long" %}
11617   ins_encode %{
11618     __ imulq($dst$$Register, $src$$Register);
11619   %}
11620   ins_pipe(ialu_reg_reg_alu0);
11621 %}
11622 
11623 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11624 %{
11625   predicate(UseAPX);
11626   match(Set dst (MulL src1 src2));
11627   effect(KILL cr);
11628   flag(PD::Flag_ndd_demotable_commutative);
11629 
11630   ins_cost(300);
11631   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11632   ins_encode %{
11633     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11634   %}
11635   ins_pipe(ialu_reg_reg_alu0);
11636 %}
11637 
11638 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11639 %{
11640   match(Set dst (MulL src imm));
11641   effect(KILL cr);
11642 
11643   ins_cost(300);
11644   format %{ "imulq   $dst, $src, $imm\t# long" %}
11645   ins_encode %{
11646     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11647   %}
11648   ins_pipe(ialu_reg_reg_alu0);
11649 %}
11650 
11651 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11652 %{
11653   predicate(!UseAPX);
11654   match(Set dst (MulL dst (LoadL src)));
11655   effect(KILL cr);
11656 
11657   ins_cost(350);
11658   format %{ "imulq   $dst, $src\t# long" %}
11659   ins_encode %{
11660     __ imulq($dst$$Register, $src$$Address);
11661   %}
11662   ins_pipe(ialu_reg_mem_alu0);
11663 %}
11664 
11665 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11666 %{
11667   predicate(UseAPX);
11668   match(Set dst (MulL src1 (LoadL src2)));
11669   effect(KILL cr);
11670   flag(PD::Flag_ndd_demotable_commutative);
11671 
11672   ins_cost(350);
11673   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11674   ins_encode %{
11675     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11676   %}
11677   ins_pipe(ialu_reg_mem_alu0);
11678 %}
11679 
11680 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11681 %{
11682   match(Set dst (MulL (LoadL src) imm));
11683   effect(KILL cr);
11684 
11685   ins_cost(300);
11686   format %{ "imulq   $dst, $src, $imm\t# long" %}
11687   ins_encode %{
11688     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11689   %}
11690   ins_pipe(ialu_reg_mem_alu0);
11691 %}
11692 
11693 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11694 %{
11695   match(Set dst (MulHiL src rax));
11696   effect(USE_KILL rax, KILL cr);
11697 
11698   ins_cost(300);
11699   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11700   ins_encode %{
11701     __ imulq($src$$Register);
11702   %}
11703   ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705 
11706 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11707 %{
11708   match(Set dst (UMulHiL src rax));
11709   effect(USE_KILL rax, KILL cr);
11710 
11711   ins_cost(300);
11712   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11713   ins_encode %{
11714     __ mulq($src$$Register);
11715   %}
11716   ins_pipe(ialu_reg_reg_alu0);
11717 %}
11718 
11719 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11720                    rFlagsReg cr)
11721 %{
11722   match(Set rax (DivI rax div));
11723   effect(KILL rdx, KILL cr);
11724 
11725   ins_cost(30*100+10*100); // XXX
11726   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11727             "jne,s   normal\n\t"
11728             "xorl    rdx, rdx\n\t"
11729             "cmpl    $div, -1\n\t"
11730             "je,s    done\n"
11731     "normal: cdql\n\t"
11732             "idivl   $div\n"
11733     "done:"        %}
11734   ins_encode(cdql_enc(div));
11735   ins_pipe(ialu_reg_reg_alu0);
11736 %}
11737 
11738 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11739                    rFlagsReg cr)
11740 %{
11741   match(Set rax (DivL rax div));
11742   effect(KILL rdx, KILL cr);
11743 
11744   ins_cost(30*100+10*100); // XXX
11745   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11746             "cmpq    rax, rdx\n\t"
11747             "jne,s   normal\n\t"
11748             "xorl    rdx, rdx\n\t"
11749             "cmpq    $div, -1\n\t"
11750             "je,s    done\n"
11751     "normal: cdqq\n\t"
11752             "idivq   $div\n"
11753     "done:"        %}
11754   ins_encode(cdqq_enc(div));
11755   ins_pipe(ialu_reg_reg_alu0);
11756 %}
11757 
11758 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11759 %{
11760   match(Set rax (UDivI rax div));
11761   effect(KILL rdx, KILL cr);
11762 
11763   ins_cost(300);
11764   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11765   ins_encode %{
11766     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11767   %}
11768   ins_pipe(ialu_reg_reg_alu0);
11769 %}
11770 
11771 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11772 %{
11773   match(Set rax (UDivL rax div));
11774   effect(KILL rdx, KILL cr);
11775 
11776   ins_cost(300);
11777   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11778   ins_encode %{
11779      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11780   %}
11781   ins_pipe(ialu_reg_reg_alu0);
11782 %}
11783 
11784 // Integer DIVMOD with Register, both quotient and mod results
11785 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11786                              rFlagsReg cr)
11787 %{
11788   match(DivModI rax div);
11789   effect(KILL cr);
11790 
11791   ins_cost(30*100+10*100); // XXX
11792   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11793             "jne,s   normal\n\t"
11794             "xorl    rdx, rdx\n\t"
11795             "cmpl    $div, -1\n\t"
11796             "je,s    done\n"
11797     "normal: cdql\n\t"
11798             "idivl   $div\n"
11799     "done:"        %}
11800   ins_encode(cdql_enc(div));
11801   ins_pipe(pipe_slow);
11802 %}
11803 
11804 // Long DIVMOD with Register, both quotient and mod results
11805 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11806                              rFlagsReg cr)
11807 %{
11808   match(DivModL rax div);
11809   effect(KILL cr);
11810 
11811   ins_cost(30*100+10*100); // XXX
11812   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11813             "cmpq    rax, rdx\n\t"
11814             "jne,s   normal\n\t"
11815             "xorl    rdx, rdx\n\t"
11816             "cmpq    $div, -1\n\t"
11817             "je,s    done\n"
11818     "normal: cdqq\n\t"
11819             "idivq   $div\n"
11820     "done:"        %}
11821   ins_encode(cdqq_enc(div));
11822   ins_pipe(pipe_slow);
11823 %}
11824 
11825 // Unsigned integer DIVMOD with Register, both quotient and mod results
11826 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11827                               no_rax_rdx_RegI div, rFlagsReg cr)
11828 %{
11829   match(UDivModI rax div);
11830   effect(TEMP tmp, KILL cr);
11831 
11832   ins_cost(300);
11833   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11834             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11835           %}
11836   ins_encode %{
11837     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11838   %}
11839   ins_pipe(pipe_slow);
11840 %}
11841 
11842 // Unsigned long DIVMOD with Register, both quotient and mod results
11843 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11844                               no_rax_rdx_RegL div, rFlagsReg cr)
11845 %{
11846   match(UDivModL rax div);
11847   effect(TEMP tmp, KILL cr);
11848 
11849   ins_cost(300);
11850   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11851             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11852           %}
11853   ins_encode %{
11854     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11855   %}
11856   ins_pipe(pipe_slow);
11857 %}
11858 
11859 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11860                    rFlagsReg cr)
11861 %{
11862   match(Set rdx (ModI rax div));
11863   effect(KILL rax, KILL cr);
11864 
11865   ins_cost(300); // XXX
11866   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11867             "jne,s   normal\n\t"
11868             "xorl    rdx, rdx\n\t"
11869             "cmpl    $div, -1\n\t"
11870             "je,s    done\n"
11871     "normal: cdql\n\t"
11872             "idivl   $div\n"
11873     "done:"        %}
11874   ins_encode(cdql_enc(div));
11875   ins_pipe(ialu_reg_reg_alu0);
11876 %}
11877 
11878 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11879                    rFlagsReg cr)
11880 %{
11881   match(Set rdx (ModL rax div));
11882   effect(KILL rax, KILL cr);
11883 
11884   ins_cost(300); // XXX
11885   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11886             "cmpq    rax, rdx\n\t"
11887             "jne,s   normal\n\t"
11888             "xorl    rdx, rdx\n\t"
11889             "cmpq    $div, -1\n\t"
11890             "je,s    done\n"
11891     "normal: cdqq\n\t"
11892             "idivq   $div\n"
11893     "done:"        %}
11894   ins_encode(cdqq_enc(div));
11895   ins_pipe(ialu_reg_reg_alu0);
11896 %}
11897 
11898 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11899 %{
11900   match(Set rdx (UModI rax div));
11901   effect(KILL rax, KILL cr);
11902 
11903   ins_cost(300);
11904   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11905   ins_encode %{
11906     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11907   %}
11908   ins_pipe(ialu_reg_reg_alu0);
11909 %}
11910 
11911 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11912 %{
11913   match(Set rdx (UModL rax div));
11914   effect(KILL rax, KILL cr);
11915 
11916   ins_cost(300);
11917   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11918   ins_encode %{
11919     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11920   %}
11921   ins_pipe(ialu_reg_reg_alu0);
11922 %}
11923 
11924 // Integer Shift Instructions
11925 // Shift Left by one, two, three
11926 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11927 %{
11928   predicate(!UseAPX);
11929   match(Set dst (LShiftI dst shift));
11930   effect(KILL cr);
11931 
11932   format %{ "sall    $dst, $shift" %}
11933   ins_encode %{
11934     __ sall($dst$$Register, $shift$$constant);
11935   %}
11936   ins_pipe(ialu_reg);
11937 %}
11938 
11939 // Shift Left by one, two, three
11940 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11941 %{
11942   predicate(UseAPX);
11943   match(Set dst (LShiftI src shift));
11944   effect(KILL cr);
11945   flag(PD::Flag_ndd_demotable);
11946 
11947   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11948   ins_encode %{
11949     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11950   %}
11951   ins_pipe(ialu_reg);
11952 %}
11953 
11954 // Shift Left by 8-bit immediate
11955 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11956 %{
11957   predicate(!UseAPX);
11958   match(Set dst (LShiftI dst shift));
11959   effect(KILL cr);
11960 
11961   format %{ "sall    $dst, $shift" %}
11962   ins_encode %{
11963     __ sall($dst$$Register, $shift$$constant);
11964   %}
11965   ins_pipe(ialu_reg);
11966 %}
11967 
11968 // Shift Left by 8-bit immediate
11969 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11970 %{
11971   predicate(UseAPX);
11972   match(Set dst (LShiftI src shift));
11973   effect(KILL cr);
11974   flag(PD::Flag_ndd_demotable);
11975 
11976   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11977   ins_encode %{
11978     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11979   %}
11980   ins_pipe(ialu_reg);
11981 %}
11982 
11983 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11984 %{
11985   predicate(UseAPX);
11986   match(Set dst (LShiftI (LoadI src) shift));
11987   effect(KILL cr);
11988 
11989   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11990   ins_encode %{
11991     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11992   %}
11993   ins_pipe(ialu_reg);
11994 %}
11995 
11996 // Shift Left by 8-bit immediate
11997 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11998 %{
11999   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12000   effect(KILL cr);
12001 
12002   format %{ "sall    $dst, $shift" %}
12003   ins_encode %{
12004     __ sall($dst$$Address, $shift$$constant);
12005   %}
12006   ins_pipe(ialu_mem_imm);
12007 %}
12008 
12009 // Shift Left by variable
12010 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12011 %{
12012   predicate(!VM_Version::supports_bmi2());
12013   match(Set dst (LShiftI dst shift));
12014   effect(KILL cr);
12015 
12016   format %{ "sall    $dst, $shift" %}
12017   ins_encode %{
12018     __ sall($dst$$Register);
12019   %}
12020   ins_pipe(ialu_reg_reg);
12021 %}
12022 
12023 // Shift Left by variable
12024 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12025 %{
12026   predicate(!VM_Version::supports_bmi2());
12027   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12028   effect(KILL cr);
12029 
12030   format %{ "sall    $dst, $shift" %}
12031   ins_encode %{
12032     __ sall($dst$$Address);
12033   %}
12034   ins_pipe(ialu_mem_reg);
12035 %}
12036 
12037 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12038 %{
12039   predicate(VM_Version::supports_bmi2());
12040   match(Set dst (LShiftI src shift));
12041 
12042   format %{ "shlxl   $dst, $src, $shift" %}
12043   ins_encode %{
12044     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12045   %}
12046   ins_pipe(ialu_reg_reg);
12047 %}
12048 
12049 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12050 %{
12051   predicate(VM_Version::supports_bmi2());
12052   match(Set dst (LShiftI (LoadI src) shift));
12053   ins_cost(175);
12054   format %{ "shlxl   $dst, $src, $shift" %}
12055   ins_encode %{
12056     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12057   %}
12058   ins_pipe(ialu_reg_mem);
12059 %}
12060 
12061 // Arithmetic Shift Right by 8-bit immediate
12062 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12063 %{
12064   predicate(!UseAPX);
12065   match(Set dst (RShiftI dst shift));
12066   effect(KILL cr);
12067 
12068   format %{ "sarl    $dst, $shift" %}
12069   ins_encode %{
12070     __ sarl($dst$$Register, $shift$$constant);
12071   %}
12072   ins_pipe(ialu_mem_imm);
12073 %}
12074 
12075 // Arithmetic Shift Right by 8-bit immediate
12076 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12077 %{
12078   predicate(UseAPX);
12079   match(Set dst (RShiftI src shift));
12080   effect(KILL cr);
12081   flag(PD::Flag_ndd_demotable);
12082 
12083   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12084   ins_encode %{
12085     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12086   %}
12087   ins_pipe(ialu_mem_imm);
12088 %}
12089 
12090 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12091 %{
12092   predicate(UseAPX);
12093   match(Set dst (RShiftI (LoadI src) shift));
12094   effect(KILL cr);
12095 
12096   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12097   ins_encode %{
12098     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12099   %}
12100   ins_pipe(ialu_mem_imm);
12101 %}
12102 
12103 // Arithmetic Shift Right by 8-bit immediate
12104 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12105 %{
12106   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12107   effect(KILL cr);
12108 
12109   format %{ "sarl    $dst, $shift" %}
12110   ins_encode %{
12111     __ sarl($dst$$Address, $shift$$constant);
12112   %}
12113   ins_pipe(ialu_mem_imm);
12114 %}
12115 
12116 // Arithmetic Shift Right by variable
12117 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12118 %{
12119   predicate(!VM_Version::supports_bmi2());
12120   match(Set dst (RShiftI dst shift));
12121   effect(KILL cr);
12122 
12123   format %{ "sarl    $dst, $shift" %}
12124   ins_encode %{
12125     __ sarl($dst$$Register);
12126   %}
12127   ins_pipe(ialu_reg_reg);
12128 %}
12129 
12130 // Arithmetic Shift Right by variable
12131 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12132 %{
12133   predicate(!VM_Version::supports_bmi2());
12134   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12135   effect(KILL cr);
12136 
12137   format %{ "sarl    $dst, $shift" %}
12138   ins_encode %{
12139     __ sarl($dst$$Address);
12140   %}
12141   ins_pipe(ialu_mem_reg);
12142 %}
12143 
12144 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12145 %{
12146   predicate(VM_Version::supports_bmi2());
12147   match(Set dst (RShiftI src shift));
12148 
12149   format %{ "sarxl   $dst, $src, $shift" %}
12150   ins_encode %{
12151     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12152   %}
12153   ins_pipe(ialu_reg_reg);
12154 %}
12155 
12156 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12157 %{
12158   predicate(VM_Version::supports_bmi2());
12159   match(Set dst (RShiftI (LoadI src) shift));
12160   ins_cost(175);
12161   format %{ "sarxl   $dst, $src, $shift" %}
12162   ins_encode %{
12163     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12164   %}
12165   ins_pipe(ialu_reg_mem);
12166 %}
12167 
12168 // Logical Shift Right by 8-bit immediate
12169 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12170 %{
12171   predicate(!UseAPX);
12172   match(Set dst (URShiftI dst shift));
12173   effect(KILL cr);
12174 
12175   format %{ "shrl    $dst, $shift" %}
12176   ins_encode %{
12177     __ shrl($dst$$Register, $shift$$constant);
12178   %}
12179   ins_pipe(ialu_reg);
12180 %}
12181 
12182 // Logical Shift Right by 8-bit immediate
12183 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12184 %{
12185   predicate(UseAPX);
12186   match(Set dst (URShiftI src shift));
12187   effect(KILL cr);
12188   flag(PD::Flag_ndd_demotable);
12189 
12190   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12191   ins_encode %{
12192     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12193   %}
12194   ins_pipe(ialu_reg);
12195 %}
12196 
12197 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12198 %{
12199   predicate(UseAPX);
12200   match(Set dst (URShiftI (LoadI src) shift));
12201   effect(KILL cr);
12202 
12203   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12204   ins_encode %{
12205     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12206   %}
12207   ins_pipe(ialu_reg);
12208 %}
12209 
12210 // Logical Shift Right by 8-bit immediate
12211 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12212 %{
12213   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12214   effect(KILL cr);
12215 
12216   format %{ "shrl    $dst, $shift" %}
12217   ins_encode %{
12218     __ shrl($dst$$Address, $shift$$constant);
12219   %}
12220   ins_pipe(ialu_mem_imm);
12221 %}
12222 
12223 // Logical Shift Right by variable
12224 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12225 %{
12226   predicate(!VM_Version::supports_bmi2());
12227   match(Set dst (URShiftI dst shift));
12228   effect(KILL cr);
12229 
12230   format %{ "shrl    $dst, $shift" %}
12231   ins_encode %{
12232     __ shrl($dst$$Register);
12233   %}
12234   ins_pipe(ialu_reg_reg);
12235 %}
12236 
12237 // Logical Shift Right by variable
12238 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12239 %{
12240   predicate(!VM_Version::supports_bmi2());
12241   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12242   effect(KILL cr);
12243 
12244   format %{ "shrl    $dst, $shift" %}
12245   ins_encode %{
12246     __ shrl($dst$$Address);
12247   %}
12248   ins_pipe(ialu_mem_reg);
12249 %}
12250 
12251 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12252 %{
12253   predicate(VM_Version::supports_bmi2());
12254   match(Set dst (URShiftI src shift));
12255 
12256   format %{ "shrxl   $dst, $src, $shift" %}
12257   ins_encode %{
12258     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12259   %}
12260   ins_pipe(ialu_reg_reg);
12261 %}
12262 
12263 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12264 %{
12265   predicate(VM_Version::supports_bmi2());
12266   match(Set dst (URShiftI (LoadI src) shift));
12267   ins_cost(175);
12268   format %{ "shrxl   $dst, $src, $shift" %}
12269   ins_encode %{
12270     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12271   %}
12272   ins_pipe(ialu_reg_mem);
12273 %}
12274 
12275 // Long Shift Instructions
12276 // Shift Left by one, two, three
12277 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12278 %{
12279   predicate(!UseAPX);
12280   match(Set dst (LShiftL dst shift));
12281   effect(KILL cr);
12282 
12283   format %{ "salq    $dst, $shift" %}
12284   ins_encode %{
12285     __ salq($dst$$Register, $shift$$constant);
12286   %}
12287   ins_pipe(ialu_reg);
12288 %}
12289 
12290 // Shift Left by one, two, three
12291 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12292 %{
12293   predicate(UseAPX);
12294   match(Set dst (LShiftL src shift));
12295   effect(KILL cr);
12296   flag(PD::Flag_ndd_demotable);
12297 
12298   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12299   ins_encode %{
12300     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12301   %}
12302   ins_pipe(ialu_reg);
12303 %}
12304 
12305 // Shift Left by 8-bit immediate
12306 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12307 %{
12308   predicate(!UseAPX);
12309   match(Set dst (LShiftL dst shift));
12310   effect(KILL cr);
12311 
12312   format %{ "salq    $dst, $shift" %}
12313   ins_encode %{
12314     __ salq($dst$$Register, $shift$$constant);
12315   %}
12316   ins_pipe(ialu_reg);
12317 %}
12318 
12319 // Shift Left by 8-bit immediate
12320 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12321 %{
12322   predicate(UseAPX);
12323   match(Set dst (LShiftL src shift));
12324   effect(KILL cr);
12325   flag(PD::Flag_ndd_demotable);
12326 
12327   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12328   ins_encode %{
12329     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12330   %}
12331   ins_pipe(ialu_reg);
12332 %}
12333 
12334 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12335 %{
12336   predicate(UseAPX);
12337   match(Set dst (LShiftL (LoadL src) shift));
12338   effect(KILL cr);
12339 
12340   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12341   ins_encode %{
12342     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12343   %}
12344   ins_pipe(ialu_reg);
12345 %}
12346 
12347 // Shift Left by 8-bit immediate
12348 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12349 %{
12350   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12351   effect(KILL cr);
12352 
12353   format %{ "salq    $dst, $shift" %}
12354   ins_encode %{
12355     __ salq($dst$$Address, $shift$$constant);
12356   %}
12357   ins_pipe(ialu_mem_imm);
12358 %}
12359 
12360 // Shift Left by variable
12361 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12362 %{
12363   predicate(!VM_Version::supports_bmi2());
12364   match(Set dst (LShiftL dst shift));
12365   effect(KILL cr);
12366 
12367   format %{ "salq    $dst, $shift" %}
12368   ins_encode %{
12369     __ salq($dst$$Register);
12370   %}
12371   ins_pipe(ialu_reg_reg);
12372 %}
12373 
12374 // Shift Left by variable
12375 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12376 %{
12377   predicate(!VM_Version::supports_bmi2());
12378   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12379   effect(KILL cr);
12380 
12381   format %{ "salq    $dst, $shift" %}
12382   ins_encode %{
12383     __ salq($dst$$Address);
12384   %}
12385   ins_pipe(ialu_mem_reg);
12386 %}
12387 
12388 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12389 %{
12390   predicate(VM_Version::supports_bmi2());
12391   match(Set dst (LShiftL src shift));
12392 
12393   format %{ "shlxq   $dst, $src, $shift" %}
12394   ins_encode %{
12395     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12396   %}
12397   ins_pipe(ialu_reg_reg);
12398 %}
12399 
12400 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12401 %{
12402   predicate(VM_Version::supports_bmi2());
12403   match(Set dst (LShiftL (LoadL src) shift));
12404   ins_cost(175);
12405   format %{ "shlxq   $dst, $src, $shift" %}
12406   ins_encode %{
12407     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12408   %}
12409   ins_pipe(ialu_reg_mem);
12410 %}
12411 
12412 // Arithmetic Shift Right by 8-bit immediate
12413 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12414 %{
12415   predicate(!UseAPX);
12416   match(Set dst (RShiftL dst shift));
12417   effect(KILL cr);
12418 
12419   format %{ "sarq    $dst, $shift" %}
12420   ins_encode %{
12421     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12422   %}
12423   ins_pipe(ialu_mem_imm);
12424 %}
12425 
12426 // Arithmetic Shift Right by 8-bit immediate
12427 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12428 %{
12429   predicate(UseAPX);
12430   match(Set dst (RShiftL src shift));
12431   effect(KILL cr);
12432   flag(PD::Flag_ndd_demotable);
12433 
12434   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12435   ins_encode %{
12436     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12437   %}
12438   ins_pipe(ialu_mem_imm);
12439 %}
12440 
12441 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12442 %{
12443   predicate(UseAPX);
12444   match(Set dst (RShiftL (LoadL src) shift));
12445   effect(KILL cr);
12446 
12447   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12448   ins_encode %{
12449     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12450   %}
12451   ins_pipe(ialu_mem_imm);
12452 %}
12453 
12454 // Arithmetic Shift Right by 8-bit immediate
12455 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12456 %{
12457   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12458   effect(KILL cr);
12459 
12460   format %{ "sarq    $dst, $shift" %}
12461   ins_encode %{
12462     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12463   %}
12464   ins_pipe(ialu_mem_imm);
12465 %}
12466 
12467 // Arithmetic Shift Right by variable
12468 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12469 %{
12470   predicate(!VM_Version::supports_bmi2());
12471   match(Set dst (RShiftL dst shift));
12472   effect(KILL cr);
12473 
12474   format %{ "sarq    $dst, $shift" %}
12475   ins_encode %{
12476     __ sarq($dst$$Register);
12477   %}
12478   ins_pipe(ialu_reg_reg);
12479 %}
12480 
12481 // Arithmetic Shift Right by variable
12482 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12483 %{
12484   predicate(!VM_Version::supports_bmi2());
12485   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12486   effect(KILL cr);
12487 
12488   format %{ "sarq    $dst, $shift" %}
12489   ins_encode %{
12490     __ sarq($dst$$Address);
12491   %}
12492   ins_pipe(ialu_mem_reg);
12493 %}
12494 
12495 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12496 %{
12497   predicate(VM_Version::supports_bmi2());
12498   match(Set dst (RShiftL src shift));
12499 
12500   format %{ "sarxq   $dst, $src, $shift" %}
12501   ins_encode %{
12502     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12503   %}
12504   ins_pipe(ialu_reg_reg);
12505 %}
12506 
12507 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12508 %{
12509   predicate(VM_Version::supports_bmi2());
12510   match(Set dst (RShiftL (LoadL src) shift));
12511   ins_cost(175);
12512   format %{ "sarxq   $dst, $src, $shift" %}
12513   ins_encode %{
12514     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12515   %}
12516   ins_pipe(ialu_reg_mem);
12517 %}
12518 
12519 // Logical Shift Right by 8-bit immediate
12520 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12521 %{
12522   predicate(!UseAPX);
12523   match(Set dst (URShiftL dst shift));
12524   effect(KILL cr);
12525 
12526   format %{ "shrq    $dst, $shift" %}
12527   ins_encode %{
12528     __ shrq($dst$$Register, $shift$$constant);
12529   %}
12530   ins_pipe(ialu_reg);
12531 %}
12532 
12533 // Logical Shift Right by 8-bit immediate
12534 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12535 %{
12536   predicate(UseAPX);
12537   match(Set dst (URShiftL src shift));
12538   effect(KILL cr);
12539   flag(PD::Flag_ndd_demotable);
12540 
12541   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12542   ins_encode %{
12543     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12544   %}
12545   ins_pipe(ialu_reg);
12546 %}
12547 
12548 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12549 %{
12550   predicate(UseAPX);
12551   match(Set dst (URShiftL (LoadL src) shift));
12552   effect(KILL cr);
12553 
12554   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12555   ins_encode %{
12556     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12557   %}
12558   ins_pipe(ialu_reg);
12559 %}
12560 
12561 // Logical Shift Right by 8-bit immediate
12562 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12563 %{
12564   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12565   effect(KILL cr);
12566 
12567   format %{ "shrq    $dst, $shift" %}
12568   ins_encode %{
12569     __ shrq($dst$$Address, $shift$$constant);
12570   %}
12571   ins_pipe(ialu_mem_imm);
12572 %}
12573 
12574 // Logical Shift Right by variable
12575 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12576 %{
12577   predicate(!VM_Version::supports_bmi2());
12578   match(Set dst (URShiftL dst shift));
12579   effect(KILL cr);
12580 
12581   format %{ "shrq    $dst, $shift" %}
12582   ins_encode %{
12583     __ shrq($dst$$Register);
12584   %}
12585   ins_pipe(ialu_reg_reg);
12586 %}
12587 
12588 // Logical Shift Right by variable
12589 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12590 %{
12591   predicate(!VM_Version::supports_bmi2());
12592   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12593   effect(KILL cr);
12594 
12595   format %{ "shrq    $dst, $shift" %}
12596   ins_encode %{
12597     __ shrq($dst$$Address);
12598   %}
12599   ins_pipe(ialu_mem_reg);
12600 %}
12601 
12602 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12603 %{
12604   predicate(VM_Version::supports_bmi2());
12605   match(Set dst (URShiftL src shift));
12606 
12607   format %{ "shrxq   $dst, $src, $shift" %}
12608   ins_encode %{
12609     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12610   %}
12611   ins_pipe(ialu_reg_reg);
12612 %}
12613 
12614 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12615 %{
12616   predicate(VM_Version::supports_bmi2());
12617   match(Set dst (URShiftL (LoadL src) shift));
12618   ins_cost(175);
12619   format %{ "shrxq   $dst, $src, $shift" %}
12620   ins_encode %{
12621     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12622   %}
12623   ins_pipe(ialu_reg_mem);
12624 %}
12625 
12626 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12627 // This idiom is used by the compiler for the i2b bytecode.
12628 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12629 %{
12630   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12631 
12632   format %{ "movsbl  $dst, $src\t# i2b" %}
12633   ins_encode %{
12634     __ movsbl($dst$$Register, $src$$Register);
12635   %}
12636   ins_pipe(ialu_reg_reg);
12637 %}
12638 
12639 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12640 // This idiom is used by the compiler the i2s bytecode.
12641 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12642 %{
12643   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12644 
12645   format %{ "movswl  $dst, $src\t# i2s" %}
12646   ins_encode %{
12647     __ movswl($dst$$Register, $src$$Register);
12648   %}
12649   ins_pipe(ialu_reg_reg);
12650 %}
12651 
12652 // ROL/ROR instructions
12653 
12654 // Rotate left by constant.
12655 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12656 %{
12657   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12658   match(Set dst (RotateLeft dst shift));
12659   effect(KILL cr);
12660   format %{ "roll    $dst, $shift" %}
12661   ins_encode %{
12662     __ roll($dst$$Register, $shift$$constant);
12663   %}
12664   ins_pipe(ialu_reg);
12665 %}
12666 
12667 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12668 %{
12669   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12670   match(Set dst (RotateLeft src shift));
12671   format %{ "rolxl   $dst, $src, $shift" %}
12672   ins_encode %{
12673     int shift = 32 - ($shift$$constant & 31);
12674     __ rorxl($dst$$Register, $src$$Register, shift);
12675   %}
12676   ins_pipe(ialu_reg_reg);
12677 %}
12678 
12679 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12680 %{
12681   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12682   match(Set dst (RotateLeft (LoadI src) shift));
12683   ins_cost(175);
12684   format %{ "rolxl   $dst, $src, $shift" %}
12685   ins_encode %{
12686     int shift = 32 - ($shift$$constant & 31);
12687     __ rorxl($dst$$Register, $src$$Address, shift);
12688   %}
12689   ins_pipe(ialu_reg_mem);
12690 %}
12691 
12692 // Rotate Left by variable
12693 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12694 %{
12695   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12696   match(Set dst (RotateLeft dst shift));
12697   effect(KILL cr);
12698   format %{ "roll    $dst, $shift" %}
12699   ins_encode %{
12700     __ roll($dst$$Register);
12701   %}
12702   ins_pipe(ialu_reg_reg);
12703 %}
12704 
12705 // Rotate Left by variable
12706 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12707 %{
12708   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12709   match(Set dst (RotateLeft src shift));
12710   effect(KILL cr);
12711   flag(PD::Flag_ndd_demotable);
12712 
12713   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12714   ins_encode %{
12715     __ eroll($dst$$Register, $src$$Register, false);
12716   %}
12717   ins_pipe(ialu_reg_reg);
12718 %}
12719 
12720 // Rotate Right by constant.
12721 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12722 %{
12723   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724   match(Set dst (RotateRight dst shift));
12725   effect(KILL cr);
12726   format %{ "rorl    $dst, $shift" %}
12727   ins_encode %{
12728     __ rorl($dst$$Register, $shift$$constant);
12729   %}
12730   ins_pipe(ialu_reg);
12731 %}
12732 
12733 // Rotate Right by constant.
12734 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12735 %{
12736   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12737   match(Set dst (RotateRight src shift));
12738   format %{ "rorxl   $dst, $src, $shift" %}
12739   ins_encode %{
12740     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12741   %}
12742   ins_pipe(ialu_reg_reg);
12743 %}
12744 
12745 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12746 %{
12747   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12748   match(Set dst (RotateRight (LoadI src) shift));
12749   ins_cost(175);
12750   format %{ "rorxl   $dst, $src, $shift" %}
12751   ins_encode %{
12752     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12753   %}
12754   ins_pipe(ialu_reg_mem);
12755 %}
12756 
12757 // Rotate Right by variable
12758 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12759 %{
12760   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12761   match(Set dst (RotateRight dst shift));
12762   effect(KILL cr);
12763   format %{ "rorl    $dst, $shift" %}
12764   ins_encode %{
12765     __ rorl($dst$$Register);
12766   %}
12767   ins_pipe(ialu_reg_reg);
12768 %}
12769 
12770 // Rotate Right by variable
12771 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12772 %{
12773   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12774   match(Set dst (RotateRight src shift));
12775   effect(KILL cr);
12776   flag(PD::Flag_ndd_demotable);
12777 
12778   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12779   ins_encode %{
12780     __ erorl($dst$$Register, $src$$Register, false);
12781   %}
12782   ins_pipe(ialu_reg_reg);
12783 %}
12784 
12785 // Rotate Left by constant.
12786 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12787 %{
12788   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12789   match(Set dst (RotateLeft dst shift));
12790   effect(KILL cr);
12791   format %{ "rolq    $dst, $shift" %}
12792   ins_encode %{
12793     __ rolq($dst$$Register, $shift$$constant);
12794   %}
12795   ins_pipe(ialu_reg);
12796 %}
12797 
12798 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12799 %{
12800   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12801   match(Set dst (RotateLeft src shift));
12802   format %{ "rolxq   $dst, $src, $shift" %}
12803   ins_encode %{
12804     int shift = 64 - ($shift$$constant & 63);
12805     __ rorxq($dst$$Register, $src$$Register, shift);
12806   %}
12807   ins_pipe(ialu_reg_reg);
12808 %}
12809 
12810 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12811 %{
12812   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12813   match(Set dst (RotateLeft (LoadL src) shift));
12814   ins_cost(175);
12815   format %{ "rolxq   $dst, $src, $shift" %}
12816   ins_encode %{
12817     int shift = 64 - ($shift$$constant & 63);
12818     __ rorxq($dst$$Register, $src$$Address, shift);
12819   %}
12820   ins_pipe(ialu_reg_mem);
12821 %}
12822 
12823 // Rotate Left by variable
12824 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12825 %{
12826   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12827   match(Set dst (RotateLeft dst shift));
12828   effect(KILL cr);
12829 
12830   format %{ "rolq    $dst, $shift" %}
12831   ins_encode %{
12832     __ rolq($dst$$Register);
12833   %}
12834   ins_pipe(ialu_reg_reg);
12835 %}
12836 
12837 // Rotate Left by variable
12838 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12839 %{
12840   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12841   match(Set dst (RotateLeft src shift));
12842   effect(KILL cr);
12843   flag(PD::Flag_ndd_demotable);
12844 
12845   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12846   ins_encode %{
12847     __ erolq($dst$$Register, $src$$Register, false);
12848   %}
12849   ins_pipe(ialu_reg_reg);
12850 %}
12851 
12852 // Rotate Right by constant.
12853 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12854 %{
12855   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856   match(Set dst (RotateRight dst shift));
12857   effect(KILL cr);
12858   format %{ "rorq    $dst, $shift" %}
12859   ins_encode %{
12860     __ rorq($dst$$Register, $shift$$constant);
12861   %}
12862   ins_pipe(ialu_reg);
12863 %}
12864 
12865 // Rotate Right by constant
12866 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12867 %{
12868   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12869   match(Set dst (RotateRight src shift));
12870   format %{ "rorxq   $dst, $src, $shift" %}
12871   ins_encode %{
12872     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12873   %}
12874   ins_pipe(ialu_reg_reg);
12875 %}
12876 
12877 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12878 %{
12879   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12880   match(Set dst (RotateRight (LoadL src) shift));
12881   ins_cost(175);
12882   format %{ "rorxq   $dst, $src, $shift" %}
12883   ins_encode %{
12884     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12885   %}
12886   ins_pipe(ialu_reg_mem);
12887 %}
12888 
12889 // Rotate Right by variable
12890 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893   match(Set dst (RotateRight dst shift));
12894   effect(KILL cr);
12895   format %{ "rorq    $dst, $shift" %}
12896   ins_encode %{
12897     __ rorq($dst$$Register);
12898   %}
12899   ins_pipe(ialu_reg_reg);
12900 %}
12901 
12902 // Rotate Right by variable
12903 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12904 %{
12905   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12906   match(Set dst (RotateRight src shift));
12907   effect(KILL cr);
12908   flag(PD::Flag_ndd_demotable);
12909 
12910   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12911   ins_encode %{
12912     __ erorq($dst$$Register, $src$$Register, false);
12913   %}
12914   ins_pipe(ialu_reg_reg);
12915 %}
12916 
12917 //----------------------------- CompressBits/ExpandBits ------------------------
12918 
12919 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12920   predicate(n->bottom_type()->isa_long());
12921   match(Set dst (CompressBits src mask));
12922   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12923   ins_encode %{
12924     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12925   %}
12926   ins_pipe( pipe_slow );
12927 %}
12928 
12929 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12930   predicate(n->bottom_type()->isa_long());
12931   match(Set dst (ExpandBits src mask));
12932   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12933   ins_encode %{
12934     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12935   %}
12936   ins_pipe( pipe_slow );
12937 %}
12938 
12939 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12940   predicate(n->bottom_type()->isa_long());
12941   match(Set dst (CompressBits src (LoadL mask)));
12942   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12943   ins_encode %{
12944     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12945   %}
12946   ins_pipe( pipe_slow );
12947 %}
12948 
12949 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12950   predicate(n->bottom_type()->isa_long());
12951   match(Set dst (ExpandBits src (LoadL mask)));
12952   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12953   ins_encode %{
12954     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12955   %}
12956   ins_pipe( pipe_slow );
12957 %}
12958 
12959 
12960 // Logical Instructions
12961 
12962 // Integer Logical Instructions
12963 
12964 // And Instructions
12965 // And Register with Register
12966 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12967 %{
12968   predicate(!UseAPX);
12969   match(Set dst (AndI dst src));
12970   effect(KILL cr);
12971   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12972 
12973   format %{ "andl    $dst, $src\t# int" %}
12974   ins_encode %{
12975     __ andl($dst$$Register, $src$$Register);
12976   %}
12977   ins_pipe(ialu_reg_reg);
12978 %}
12979 
12980 // And Register with Register using New Data Destination (NDD)
12981 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12982 %{
12983   predicate(UseAPX);
12984   match(Set dst (AndI src1 src2));
12985   effect(KILL cr);
12986   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
12987 
12988   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12989   ins_encode %{
12990     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12991 
12992   %}
12993   ins_pipe(ialu_reg_reg);
12994 %}
12995 
12996 // And Register with Immediate 255
12997 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12998 %{
12999   match(Set dst (AndI src mask));
13000 
13001   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13002   ins_encode %{
13003     __ movzbl($dst$$Register, $src$$Register);
13004   %}
13005   ins_pipe(ialu_reg);
13006 %}
13007 
13008 // And Register with Immediate 255 and promote to long
13009 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13010 %{
13011   match(Set dst (ConvI2L (AndI src mask)));
13012 
13013   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13014   ins_encode %{
13015     __ movzbl($dst$$Register, $src$$Register);
13016   %}
13017   ins_pipe(ialu_reg);
13018 %}
13019 
13020 // And Register with Immediate 65535
13021 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13022 %{
13023   match(Set dst (AndI src mask));
13024 
13025   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13026   ins_encode %{
13027     __ movzwl($dst$$Register, $src$$Register);
13028   %}
13029   ins_pipe(ialu_reg);
13030 %}
13031 
13032 // And Register with Immediate 65535 and promote to long
13033 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13034 %{
13035   match(Set dst (ConvI2L (AndI src mask)));
13036 
13037   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13038   ins_encode %{
13039     __ movzwl($dst$$Register, $src$$Register);
13040   %}
13041   ins_pipe(ialu_reg);
13042 %}
13043 
13044 // Can skip int2long conversions after AND with small bitmask
13045 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13046 %{
13047   predicate(VM_Version::supports_bmi2());
13048   ins_cost(125);
13049   effect(TEMP tmp, KILL cr);
13050   match(Set dst (ConvI2L (AndI src mask)));
13051   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13052   ins_encode %{
13053     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13054     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13055   %}
13056   ins_pipe(ialu_reg_reg);
13057 %}
13058 
13059 // And Register with Immediate
13060 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13061 %{
13062   predicate(!UseAPX);
13063   match(Set dst (AndI dst src));
13064   effect(KILL cr);
13065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13066 
13067   format %{ "andl    $dst, $src\t# int" %}
13068   ins_encode %{
13069     __ andl($dst$$Register, $src$$constant);
13070   %}
13071   ins_pipe(ialu_reg);
13072 %}
13073 
13074 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13075 %{
13076   predicate(UseAPX);
13077   match(Set dst (AndI src1 src2));
13078   effect(KILL cr);
13079   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13080 
13081   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13082   ins_encode %{
13083     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13084   %}
13085   ins_pipe(ialu_reg);
13086 %}
13087 
13088 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13089 %{
13090   predicate(UseAPX);
13091   match(Set dst (AndI (LoadI src1) src2));
13092   effect(KILL cr);
13093   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13094 
13095   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13096   ins_encode %{
13097     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13098   %}
13099   ins_pipe(ialu_reg);
13100 %}
13101 
13102 // And Register with Memory
13103 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13104 %{
13105   predicate(!UseAPX);
13106   match(Set dst (AndI dst (LoadI src)));
13107   effect(KILL cr);
13108   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13109 
13110   ins_cost(150);
13111   format %{ "andl    $dst, $src\t# int" %}
13112   ins_encode %{
13113     __ andl($dst$$Register, $src$$Address);
13114   %}
13115   ins_pipe(ialu_reg_mem);
13116 %}
13117 
13118 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13119 %{
13120   predicate(UseAPX);
13121   match(Set dst (AndI src1 (LoadI src2)));
13122   effect(KILL cr);
13123   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13124 
13125   ins_cost(150);
13126   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13127   ins_encode %{
13128     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13129   %}
13130   ins_pipe(ialu_reg_mem);
13131 %}
13132 
13133 // And Memory with Register
13134 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13135 %{
13136   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13137   effect(KILL cr);
13138   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139 
13140   ins_cost(150);
13141   format %{ "andb    $dst, $src\t# byte" %}
13142   ins_encode %{
13143     __ andb($dst$$Address, $src$$Register);
13144   %}
13145   ins_pipe(ialu_mem_reg);
13146 %}
13147 
13148 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13149 %{
13150   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13151   effect(KILL cr);
13152   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13153 
13154   ins_cost(150);
13155   format %{ "andl    $dst, $src\t# int" %}
13156   ins_encode %{
13157     __ andl($dst$$Address, $src$$Register);
13158   %}
13159   ins_pipe(ialu_mem_reg);
13160 %}
13161 
13162 // And Memory with Immediate
13163 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13164 %{
13165   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13166   effect(KILL cr);
13167   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13168 
13169   ins_cost(125);
13170   format %{ "andl    $dst, $src\t# int" %}
13171   ins_encode %{
13172     __ andl($dst$$Address, $src$$constant);
13173   %}
13174   ins_pipe(ialu_mem_imm);
13175 %}
13176 
13177 // BMI1 instructions
13178 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13179   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13180   predicate(UseBMI1Instructions);
13181   effect(KILL cr);
13182   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13183 
13184   ins_cost(125);
13185   format %{ "andnl  $dst, $src1, $src2" %}
13186 
13187   ins_encode %{
13188     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13189   %}
13190   ins_pipe(ialu_reg_mem);
13191 %}
13192 
13193 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13194   match(Set dst (AndI (XorI src1 minus_1) src2));
13195   predicate(UseBMI1Instructions);
13196   effect(KILL cr);
13197   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13198 
13199   format %{ "andnl  $dst, $src1, $src2" %}
13200 
13201   ins_encode %{
13202     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13203   %}
13204   ins_pipe(ialu_reg);
13205 %}
13206 
13207 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13208   match(Set dst (AndI (SubI imm_zero src) src));
13209   predicate(UseBMI1Instructions);
13210   effect(KILL cr);
13211   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13212 
13213   format %{ "blsil  $dst, $src" %}
13214 
13215   ins_encode %{
13216     __ blsil($dst$$Register, $src$$Register);
13217   %}
13218   ins_pipe(ialu_reg);
13219 %}
13220 
13221 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13222   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13223   predicate(UseBMI1Instructions);
13224   effect(KILL cr);
13225   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13226 
13227   ins_cost(125);
13228   format %{ "blsil  $dst, $src" %}
13229 
13230   ins_encode %{
13231     __ blsil($dst$$Register, $src$$Address);
13232   %}
13233   ins_pipe(ialu_reg_mem);
13234 %}
13235 
13236 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13237 %{
13238   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13239   predicate(UseBMI1Instructions);
13240   effect(KILL cr);
13241   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13242 
13243   ins_cost(125);
13244   format %{ "blsmskl $dst, $src" %}
13245 
13246   ins_encode %{
13247     __ blsmskl($dst$$Register, $src$$Address);
13248   %}
13249   ins_pipe(ialu_reg_mem);
13250 %}
13251 
13252 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13253 %{
13254   match(Set dst (XorI (AddI src minus_1) src));
13255   predicate(UseBMI1Instructions);
13256   effect(KILL cr);
13257   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13258 
13259   format %{ "blsmskl $dst, $src" %}
13260 
13261   ins_encode %{
13262     __ blsmskl($dst$$Register, $src$$Register);
13263   %}
13264 
13265   ins_pipe(ialu_reg);
13266 %}
13267 
13268 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13269 %{
13270   match(Set dst (AndI (AddI src minus_1) src) );
13271   predicate(UseBMI1Instructions);
13272   effect(KILL cr);
13273   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13274 
13275   format %{ "blsrl  $dst, $src" %}
13276 
13277   ins_encode %{
13278     __ blsrl($dst$$Register, $src$$Register);
13279   %}
13280 
13281   ins_pipe(ialu_reg_mem);
13282 %}
13283 
13284 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13285 %{
13286   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13287   predicate(UseBMI1Instructions);
13288   effect(KILL cr);
13289   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13290 
13291   ins_cost(125);
13292   format %{ "blsrl  $dst, $src" %}
13293 
13294   ins_encode %{
13295     __ blsrl($dst$$Register, $src$$Address);
13296   %}
13297 
13298   ins_pipe(ialu_reg);
13299 %}
13300 
13301 // Or Instructions
13302 // Or Register with Register
13303 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13304 %{
13305   predicate(!UseAPX);
13306   match(Set dst (OrI dst src));
13307   effect(KILL cr);
13308   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13309 
13310   format %{ "orl     $dst, $src\t# int" %}
13311   ins_encode %{
13312     __ orl($dst$$Register, $src$$Register);
13313   %}
13314   ins_pipe(ialu_reg_reg);
13315 %}
13316 
13317 // Or Register with Register using New Data Destination (NDD)
13318 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13319 %{
13320   predicate(UseAPX);
13321   match(Set dst (OrI src1 src2));
13322   effect(KILL cr);
13323   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13324 
13325   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13326   ins_encode %{
13327     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13328   %}
13329   ins_pipe(ialu_reg_reg);
13330 %}
13331 
13332 // Or Register with Immediate
13333 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13334 %{
13335   predicate(!UseAPX);
13336   match(Set dst (OrI dst src));
13337   effect(KILL cr);
13338   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13339 
13340   format %{ "orl     $dst, $src\t# int" %}
13341   ins_encode %{
13342     __ orl($dst$$Register, $src$$constant);
13343   %}
13344   ins_pipe(ialu_reg);
13345 %}
13346 
13347 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13348 %{
13349   predicate(UseAPX);
13350   match(Set dst (OrI src1 src2));
13351   effect(KILL cr);
13352   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13353 
13354   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13355   ins_encode %{
13356     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13357   %}
13358   ins_pipe(ialu_reg);
13359 %}
13360 
13361 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13362 %{
13363   predicate(UseAPX);
13364   match(Set dst (OrI src1 src2));
13365   effect(KILL cr);
13366   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13367 
13368   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13369   ins_encode %{
13370     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13371   %}
13372   ins_pipe(ialu_reg);
13373 %}
13374 
13375 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13376 %{
13377   predicate(UseAPX);
13378   match(Set dst (OrI (LoadI src1) src2));
13379   effect(KILL cr);
13380   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13381 
13382   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13383   ins_encode %{
13384     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13385   %}
13386   ins_pipe(ialu_reg);
13387 %}
13388 
13389 // Or Register with Memory
13390 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13391 %{
13392   predicate(!UseAPX);
13393   match(Set dst (OrI dst (LoadI src)));
13394   effect(KILL cr);
13395   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13396 
13397   ins_cost(150);
13398   format %{ "orl     $dst, $src\t# int" %}
13399   ins_encode %{
13400     __ orl($dst$$Register, $src$$Address);
13401   %}
13402   ins_pipe(ialu_reg_mem);
13403 %}
13404 
13405 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13406 %{
13407   predicate(UseAPX);
13408   match(Set dst (OrI src1 (LoadI src2)));
13409   effect(KILL cr);
13410   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13411 
13412   ins_cost(150);
13413   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13414   ins_encode %{
13415     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13416   %}
13417   ins_pipe(ialu_reg_mem);
13418 %}
13419 
13420 // Or Memory with Register
13421 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13422 %{
13423   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13424   effect(KILL cr);
13425   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13426 
13427   ins_cost(150);
13428   format %{ "orb    $dst, $src\t# byte" %}
13429   ins_encode %{
13430     __ orb($dst$$Address, $src$$Register);
13431   %}
13432   ins_pipe(ialu_mem_reg);
13433 %}
13434 
13435 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13436 %{
13437   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13438   effect(KILL cr);
13439   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13440 
13441   ins_cost(150);
13442   format %{ "orl     $dst, $src\t# int" %}
13443   ins_encode %{
13444     __ orl($dst$$Address, $src$$Register);
13445   %}
13446   ins_pipe(ialu_mem_reg);
13447 %}
13448 
13449 // Or Memory with Immediate
13450 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13451 %{
13452   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13453   effect(KILL cr);
13454   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13455 
13456   ins_cost(125);
13457   format %{ "orl     $dst, $src\t# int" %}
13458   ins_encode %{
13459     __ orl($dst$$Address, $src$$constant);
13460   %}
13461   ins_pipe(ialu_mem_imm);
13462 %}
13463 
13464 // Xor Instructions
13465 // Xor Register with Register
13466 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13467 %{
13468   predicate(!UseAPX);
13469   match(Set dst (XorI dst src));
13470   effect(KILL cr);
13471   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13472 
13473   format %{ "xorl    $dst, $src\t# int" %}
13474   ins_encode %{
13475     __ xorl($dst$$Register, $src$$Register);
13476   %}
13477   ins_pipe(ialu_reg_reg);
13478 %}
13479 
13480 // Xor Register with Register using New Data Destination (NDD)
13481 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13482 %{
13483   predicate(UseAPX);
13484   match(Set dst (XorI src1 src2));
13485   effect(KILL cr);
13486   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13487 
13488   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13489   ins_encode %{
13490     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13491   %}
13492   ins_pipe(ialu_reg_reg);
13493 %}
13494 
13495 // Xor Register with Immediate -1
13496 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13497 %{
13498   predicate(!UseAPX);
13499   match(Set dst (XorI dst imm));
13500 
13501   format %{ "notl    $dst" %}
13502   ins_encode %{
13503      __ notl($dst$$Register);
13504   %}
13505   ins_pipe(ialu_reg);
13506 %}
13507 
13508 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13509 %{
13510   match(Set dst (XorI src imm));
13511   predicate(UseAPX);
13512   flag(PD::Flag_ndd_demotable);
13513 
13514   format %{ "enotl    $dst, $src" %}
13515   ins_encode %{
13516      __ enotl($dst$$Register, $src$$Register);
13517   %}
13518   ins_pipe(ialu_reg);
13519 %}
13520 
13521 // Xor Register with Immediate
13522 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13523 %{
13524   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13525   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13526   match(Set dst (XorI dst src));
13527   effect(KILL cr);
13528   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13529 
13530   format %{ "xorl    $dst, $src\t# int" %}
13531   ins_encode %{
13532     __ xorl($dst$$Register, $src$$constant);
13533   %}
13534   ins_pipe(ialu_reg);
13535 %}
13536 
13537 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13538 %{
13539   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13540   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13541   match(Set dst (XorI src1 src2));
13542   effect(KILL cr);
13543   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13544 
13545   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13546   ins_encode %{
13547     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13548   %}
13549   ins_pipe(ialu_reg);
13550 %}
13551 
13552 // Xor Memory with Immediate
13553 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13554 %{
13555   predicate(UseAPX);
13556   match(Set dst (XorI (LoadI src1) src2));
13557   effect(KILL cr);
13558   ins_cost(150);
13559   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13560 
13561   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13562   ins_encode %{
13563     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13564   %}
13565   ins_pipe(ialu_reg);
13566 %}
13567 
13568 // Xor Register with Memory
13569 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13570 %{
13571   predicate(!UseAPX);
13572   match(Set dst (XorI dst (LoadI src)));
13573   effect(KILL cr);
13574   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13575 
13576   ins_cost(150);
13577   format %{ "xorl    $dst, $src\t# int" %}
13578   ins_encode %{
13579     __ xorl($dst$$Register, $src$$Address);
13580   %}
13581   ins_pipe(ialu_reg_mem);
13582 %}
13583 
13584 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13585 %{
13586   predicate(UseAPX);
13587   match(Set dst (XorI src1 (LoadI src2)));
13588   effect(KILL cr);
13589   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13590 
13591   ins_cost(150);
13592   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13593   ins_encode %{
13594     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13595   %}
13596   ins_pipe(ialu_reg_mem);
13597 %}
13598 
13599 // Xor Memory with Register
13600 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13601 %{
13602   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13603   effect(KILL cr);
13604   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13605 
13606   ins_cost(150);
13607   format %{ "xorb    $dst, $src\t# byte" %}
13608   ins_encode %{
13609     __ xorb($dst$$Address, $src$$Register);
13610   %}
13611   ins_pipe(ialu_mem_reg);
13612 %}
13613 
13614 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13615 %{
13616   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13617   effect(KILL cr);
13618   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13619 
13620   ins_cost(150);
13621   format %{ "xorl    $dst, $src\t# int" %}
13622   ins_encode %{
13623     __ xorl($dst$$Address, $src$$Register);
13624   %}
13625   ins_pipe(ialu_mem_reg);
13626 %}
13627 
13628 // Xor Memory with Immediate
13629 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13630 %{
13631   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13632   effect(KILL cr);
13633   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13634 
13635   ins_cost(125);
13636   format %{ "xorl    $dst, $src\t# int" %}
13637   ins_encode %{
13638     __ xorl($dst$$Address, $src$$constant);
13639   %}
13640   ins_pipe(ialu_mem_imm);
13641 %}
13642 
13643 
13644 // Long Logical Instructions
13645 
13646 // And Instructions
13647 // And Register with Register
13648 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13649 %{
13650   predicate(!UseAPX);
13651   match(Set dst (AndL dst src));
13652   effect(KILL cr);
13653   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13654 
13655   format %{ "andq    $dst, $src\t# long" %}
13656   ins_encode %{
13657     __ andq($dst$$Register, $src$$Register);
13658   %}
13659   ins_pipe(ialu_reg_reg);
13660 %}
13661 
13662 // And Register with Register using New Data Destination (NDD)
13663 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13664 %{
13665   predicate(UseAPX);
13666   match(Set dst (AndL src1 src2));
13667   effect(KILL cr);
13668   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13669 
13670   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13671   ins_encode %{
13672     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13673 
13674   %}
13675   ins_pipe(ialu_reg_reg);
13676 %}
13677 
13678 // And Register with Immediate 255
13679 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13680 %{
13681   match(Set dst (AndL src mask));
13682 
13683   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13684   ins_encode %{
13685     // movzbl zeroes out the upper 32-bit and does not need REX.W
13686     __ movzbl($dst$$Register, $src$$Register);
13687   %}
13688   ins_pipe(ialu_reg);
13689 %}
13690 
13691 // And Register with Immediate 65535
13692 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13693 %{
13694   match(Set dst (AndL src mask));
13695 
13696   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13697   ins_encode %{
13698     // movzwl zeroes out the upper 32-bit and does not need REX.W
13699     __ movzwl($dst$$Register, $src$$Register);
13700   %}
13701   ins_pipe(ialu_reg);
13702 %}
13703 
13704 // And Register with Immediate
13705 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13706 %{
13707   predicate(!UseAPX);
13708   match(Set dst (AndL dst src));
13709   effect(KILL cr);
13710   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13711 
13712   format %{ "andq    $dst, $src\t# long" %}
13713   ins_encode %{
13714     __ andq($dst$$Register, $src$$constant);
13715   %}
13716   ins_pipe(ialu_reg);
13717 %}
13718 
13719 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13720 %{
13721   predicate(UseAPX);
13722   match(Set dst (AndL src1 src2));
13723   effect(KILL cr);
13724   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13725 
13726   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13727   ins_encode %{
13728     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13729   %}
13730   ins_pipe(ialu_reg);
13731 %}
13732 
13733 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13734 %{
13735   predicate(UseAPX);
13736   match(Set dst (AndL (LoadL src1) src2));
13737   effect(KILL cr);
13738   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13739 
13740   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13741   ins_encode %{
13742     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13743   %}
13744   ins_pipe(ialu_reg);
13745 %}
13746 
13747 // And Register with Memory
13748 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13749 %{
13750   predicate(!UseAPX);
13751   match(Set dst (AndL dst (LoadL src)));
13752   effect(KILL cr);
13753   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13754 
13755   ins_cost(150);
13756   format %{ "andq    $dst, $src\t# long" %}
13757   ins_encode %{
13758     __ andq($dst$$Register, $src$$Address);
13759   %}
13760   ins_pipe(ialu_reg_mem);
13761 %}
13762 
13763 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13764 %{
13765   predicate(UseAPX);
13766   match(Set dst (AndL src1 (LoadL src2)));
13767   effect(KILL cr);
13768   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13769 
13770   ins_cost(150);
13771   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13772   ins_encode %{
13773     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13774   %}
13775   ins_pipe(ialu_reg_mem);
13776 %}
13777 
13778 // And Memory with Register
13779 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13780 %{
13781   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13782   effect(KILL cr);
13783   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13784 
13785   ins_cost(150);
13786   format %{ "andq    $dst, $src\t# long" %}
13787   ins_encode %{
13788     __ andq($dst$$Address, $src$$Register);
13789   %}
13790   ins_pipe(ialu_mem_reg);
13791 %}
13792 
13793 // And Memory with Immediate
13794 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13795 %{
13796   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13797   effect(KILL cr);
13798   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13799 
13800   ins_cost(125);
13801   format %{ "andq    $dst, $src\t# long" %}
13802   ins_encode %{
13803     __ andq($dst$$Address, $src$$constant);
13804   %}
13805   ins_pipe(ialu_mem_imm);
13806 %}
13807 
13808 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13809 %{
13810   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13811   // because AND/OR works well enough for 8/32-bit values.
13812   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13813 
13814   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13815   effect(KILL cr);
13816 
13817   ins_cost(125);
13818   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13819   ins_encode %{
13820     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13821   %}
13822   ins_pipe(ialu_mem_imm);
13823 %}
13824 
13825 // BMI1 instructions
13826 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13827   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13828   predicate(UseBMI1Instructions);
13829   effect(KILL cr);
13830   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13831 
13832   ins_cost(125);
13833   format %{ "andnq  $dst, $src1, $src2" %}
13834 
13835   ins_encode %{
13836     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13837   %}
13838   ins_pipe(ialu_reg_mem);
13839 %}
13840 
13841 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13842   match(Set dst (AndL (XorL src1 minus_1) src2));
13843   predicate(UseBMI1Instructions);
13844   effect(KILL cr);
13845   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13846 
13847   format %{ "andnq  $dst, $src1, $src2" %}
13848 
13849   ins_encode %{
13850   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13851   %}
13852   ins_pipe(ialu_reg_mem);
13853 %}
13854 
13855 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13856   match(Set dst (AndL (SubL imm_zero src) src));
13857   predicate(UseBMI1Instructions);
13858   effect(KILL cr);
13859   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13860 
13861   format %{ "blsiq  $dst, $src" %}
13862 
13863   ins_encode %{
13864     __ blsiq($dst$$Register, $src$$Register);
13865   %}
13866   ins_pipe(ialu_reg);
13867 %}
13868 
13869 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13870   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13871   predicate(UseBMI1Instructions);
13872   effect(KILL cr);
13873   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13874 
13875   ins_cost(125);
13876   format %{ "blsiq  $dst, $src" %}
13877 
13878   ins_encode %{
13879     __ blsiq($dst$$Register, $src$$Address);
13880   %}
13881   ins_pipe(ialu_reg_mem);
13882 %}
13883 
13884 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13885 %{
13886   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13887   predicate(UseBMI1Instructions);
13888   effect(KILL cr);
13889   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13890 
13891   ins_cost(125);
13892   format %{ "blsmskq $dst, $src" %}
13893 
13894   ins_encode %{
13895     __ blsmskq($dst$$Register, $src$$Address);
13896   %}
13897   ins_pipe(ialu_reg_mem);
13898 %}
13899 
13900 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13901 %{
13902   match(Set dst (XorL (AddL src minus_1) src));
13903   predicate(UseBMI1Instructions);
13904   effect(KILL cr);
13905   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13906 
13907   format %{ "blsmskq $dst, $src" %}
13908 
13909   ins_encode %{
13910     __ blsmskq($dst$$Register, $src$$Register);
13911   %}
13912 
13913   ins_pipe(ialu_reg);
13914 %}
13915 
13916 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13917 %{
13918   match(Set dst (AndL (AddL src minus_1) src) );
13919   predicate(UseBMI1Instructions);
13920   effect(KILL cr);
13921   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13922 
13923   format %{ "blsrq  $dst, $src" %}
13924 
13925   ins_encode %{
13926     __ blsrq($dst$$Register, $src$$Register);
13927   %}
13928 
13929   ins_pipe(ialu_reg);
13930 %}
13931 
13932 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13933 %{
13934   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13935   predicate(UseBMI1Instructions);
13936   effect(KILL cr);
13937   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13938 
13939   ins_cost(125);
13940   format %{ "blsrq  $dst, $src" %}
13941 
13942   ins_encode %{
13943     __ blsrq($dst$$Register, $src$$Address);
13944   %}
13945 
13946   ins_pipe(ialu_reg);
13947 %}
13948 
13949 // Or Instructions
13950 // Or Register with Register
13951 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13952 %{
13953   predicate(!UseAPX);
13954   match(Set dst (OrL dst src));
13955   effect(KILL cr);
13956   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13957 
13958   format %{ "orq     $dst, $src\t# long" %}
13959   ins_encode %{
13960     __ orq($dst$$Register, $src$$Register);
13961   %}
13962   ins_pipe(ialu_reg_reg);
13963 %}
13964 
13965 // Or Register with Register using New Data Destination (NDD)
13966 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13967 %{
13968   predicate(UseAPX);
13969   match(Set dst (OrL src1 src2));
13970   effect(KILL cr);
13971   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13972 
13973   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13974   ins_encode %{
13975     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13976 
13977   %}
13978   ins_pipe(ialu_reg_reg);
13979 %}
13980 
13981 // Use any_RegP to match R15 (TLS register) without spilling.
13982 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13983   match(Set dst (OrL dst (CastP2X src)));
13984   effect(KILL cr);
13985   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986 
13987   format %{ "orq     $dst, $src\t# long" %}
13988   ins_encode %{
13989     __ orq($dst$$Register, $src$$Register);
13990   %}
13991   ins_pipe(ialu_reg_reg);
13992 %}
13993 
13994 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13995   match(Set dst (OrL src1 (CastP2X src2)));
13996   effect(KILL cr);
13997   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13998 
13999   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14000   ins_encode %{
14001     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14002   %}
14003   ins_pipe(ialu_reg_reg);
14004 %}
14005 
14006 // Or Register with Immediate
14007 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14008 %{
14009   predicate(!UseAPX);
14010   match(Set dst (OrL dst src));
14011   effect(KILL cr);
14012   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14013 
14014   format %{ "orq     $dst, $src\t# long" %}
14015   ins_encode %{
14016     __ orq($dst$$Register, $src$$constant);
14017   %}
14018   ins_pipe(ialu_reg);
14019 %}
14020 
14021 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14022 %{
14023   predicate(UseAPX);
14024   match(Set dst (OrL src1 src2));
14025   effect(KILL cr);
14026   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14027 
14028   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14029   ins_encode %{
14030     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14031   %}
14032   ins_pipe(ialu_reg);
14033 %}
14034 
14035 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14036 %{
14037   predicate(UseAPX);
14038   match(Set dst (OrL src1 src2));
14039   effect(KILL cr);
14040   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14041 
14042   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14043   ins_encode %{
14044     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14045   %}
14046   ins_pipe(ialu_reg);
14047 %}
14048 
14049 // Or Memory with Immediate
14050 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14051 %{
14052   predicate(UseAPX);
14053   match(Set dst (OrL (LoadL src1) src2));
14054   effect(KILL cr);
14055   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056 
14057   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14058   ins_encode %{
14059     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14060   %}
14061   ins_pipe(ialu_reg);
14062 %}
14063 
14064 // Or Register with Memory
14065 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14066 %{
14067   predicate(!UseAPX);
14068   match(Set dst (OrL dst (LoadL src)));
14069   effect(KILL cr);
14070   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14071 
14072   ins_cost(150);
14073   format %{ "orq     $dst, $src\t# long" %}
14074   ins_encode %{
14075     __ orq($dst$$Register, $src$$Address);
14076   %}
14077   ins_pipe(ialu_reg_mem);
14078 %}
14079 
14080 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14081 %{
14082   predicate(UseAPX);
14083   match(Set dst (OrL src1 (LoadL src2)));
14084   effect(KILL cr);
14085   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14086 
14087   ins_cost(150);
14088   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14089   ins_encode %{
14090     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14091   %}
14092   ins_pipe(ialu_reg_mem);
14093 %}
14094 
14095 // Or Memory with Register
14096 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14097 %{
14098   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14099   effect(KILL cr);
14100   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14101 
14102   ins_cost(150);
14103   format %{ "orq     $dst, $src\t# long" %}
14104   ins_encode %{
14105     __ orq($dst$$Address, $src$$Register);
14106   %}
14107   ins_pipe(ialu_mem_reg);
14108 %}
14109 
14110 // Or Memory with Immediate
14111 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14112 %{
14113   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14114   effect(KILL cr);
14115   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14116 
14117   ins_cost(125);
14118   format %{ "orq     $dst, $src\t# long" %}
14119   ins_encode %{
14120     __ orq($dst$$Address, $src$$constant);
14121   %}
14122   ins_pipe(ialu_mem_imm);
14123 %}
14124 
14125 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14126 %{
14127   // con should be a pure 64-bit power of 2 immediate
14128   // because AND/OR works well enough for 8/32-bit values.
14129   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14130 
14131   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14132   effect(KILL cr);
14133 
14134   ins_cost(125);
14135   format %{ "btsq    $dst, log2($con)\t# long" %}
14136   ins_encode %{
14137     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14138   %}
14139   ins_pipe(ialu_mem_imm);
14140 %}
14141 
14142 // Xor Instructions
14143 // Xor Register with Register
14144 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14145 %{
14146   predicate(!UseAPX);
14147   match(Set dst (XorL dst src));
14148   effect(KILL cr);
14149   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14150 
14151   format %{ "xorq    $dst, $src\t# long" %}
14152   ins_encode %{
14153     __ xorq($dst$$Register, $src$$Register);
14154   %}
14155   ins_pipe(ialu_reg_reg);
14156 %}
14157 
14158 // Xor Register with Register using New Data Destination (NDD)
14159 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14160 %{
14161   predicate(UseAPX);
14162   match(Set dst (XorL src1 src2));
14163   effect(KILL cr);
14164   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14165 
14166   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14167   ins_encode %{
14168     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14169   %}
14170   ins_pipe(ialu_reg_reg);
14171 %}
14172 
14173 // Xor Register with Immediate -1
14174 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14175 %{
14176   predicate(!UseAPX);
14177   match(Set dst (XorL dst imm));
14178 
14179   format %{ "notq   $dst" %}
14180   ins_encode %{
14181      __ notq($dst$$Register);
14182   %}
14183   ins_pipe(ialu_reg);
14184 %}
14185 
14186 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14187 %{
14188   predicate(UseAPX);
14189   match(Set dst (XorL src imm));
14190   flag(PD::Flag_ndd_demotable);
14191 
14192   format %{ "enotq   $dst, $src" %}
14193   ins_encode %{
14194     __ enotq($dst$$Register, $src$$Register);
14195   %}
14196   ins_pipe(ialu_reg);
14197 %}
14198 
14199 // Xor Register with Immediate
14200 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14201 %{
14202   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14203   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14204   match(Set dst (XorL dst src));
14205   effect(KILL cr);
14206   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14207 
14208   format %{ "xorq    $dst, $src\t# long" %}
14209   ins_encode %{
14210     __ xorq($dst$$Register, $src$$constant);
14211   %}
14212   ins_pipe(ialu_reg);
14213 %}
14214 
14215 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14216 %{
14217   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14218   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14219   match(Set dst (XorL src1 src2));
14220   effect(KILL cr);
14221   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14222 
14223   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14224   ins_encode %{
14225     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14226   %}
14227   ins_pipe(ialu_reg);
14228 %}
14229 
14230 // Xor Memory with Immediate
14231 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14232 %{
14233   predicate(UseAPX);
14234   match(Set dst (XorL (LoadL src1) src2));
14235   effect(KILL cr);
14236   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14237   ins_cost(150);
14238 
14239   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14240   ins_encode %{
14241     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14242   %}
14243   ins_pipe(ialu_reg);
14244 %}
14245 
14246 // Xor Register with Memory
14247 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14248 %{
14249   predicate(!UseAPX);
14250   match(Set dst (XorL dst (LoadL src)));
14251   effect(KILL cr);
14252   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14253 
14254   ins_cost(150);
14255   format %{ "xorq    $dst, $src\t# long" %}
14256   ins_encode %{
14257     __ xorq($dst$$Register, $src$$Address);
14258   %}
14259   ins_pipe(ialu_reg_mem);
14260 %}
14261 
14262 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14263 %{
14264   predicate(UseAPX);
14265   match(Set dst (XorL src1 (LoadL src2)));
14266   effect(KILL cr);
14267   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14268 
14269   ins_cost(150);
14270   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14271   ins_encode %{
14272     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14273   %}
14274   ins_pipe(ialu_reg_mem);
14275 %}
14276 
14277 // Xor Memory with Register
14278 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14279 %{
14280   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14281   effect(KILL cr);
14282   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14283 
14284   ins_cost(150);
14285   format %{ "xorq    $dst, $src\t# long" %}
14286   ins_encode %{
14287     __ xorq($dst$$Address, $src$$Register);
14288   %}
14289   ins_pipe(ialu_mem_reg);
14290 %}
14291 
14292 // Xor Memory with Immediate
14293 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14294 %{
14295   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14296   effect(KILL cr);
14297   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14298 
14299   ins_cost(125);
14300   format %{ "xorq    $dst, $src\t# long" %}
14301   ins_encode %{
14302     __ xorq($dst$$Address, $src$$constant);
14303   %}
14304   ins_pipe(ialu_mem_imm);
14305 %}
14306 
14307 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14308 %{
14309   match(Set dst (CmpLTMask p q));
14310   effect(KILL cr);
14311 
14312   ins_cost(400);
14313   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14314             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14315             "negl    $dst" %}
14316   ins_encode %{
14317     __ cmpl($p$$Register, $q$$Register);
14318     __ setcc(Assembler::less, $dst$$Register);
14319     __ negl($dst$$Register);
14320   %}
14321   ins_pipe(pipe_slow);
14322 %}
14323 
14324 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14325 %{
14326   match(Set dst (CmpLTMask dst zero));
14327   effect(KILL cr);
14328 
14329   ins_cost(100);
14330   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14331   ins_encode %{
14332     __ sarl($dst$$Register, 31);
14333   %}
14334   ins_pipe(ialu_reg);
14335 %}
14336 
14337 /* Better to save a register than avoid a branch */
14338 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14339 %{
14340   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14341   effect(KILL cr);
14342   ins_cost(300);
14343   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14344             "jge     done\n\t"
14345             "addl    $p,$y\n"
14346             "done:   " %}
14347   ins_encode %{
14348     Register Rp = $p$$Register;
14349     Register Rq = $q$$Register;
14350     Register Ry = $y$$Register;
14351     Label done;
14352     __ subl(Rp, Rq);
14353     __ jccb(Assembler::greaterEqual, done);
14354     __ addl(Rp, Ry);
14355     __ bind(done);
14356   %}
14357   ins_pipe(pipe_cmplt);
14358 %}
14359 
14360 /* Better to save a register than avoid a branch */
14361 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14362 %{
14363   match(Set y (AndI (CmpLTMask p q) y));
14364   effect(KILL cr);
14365 
14366   ins_cost(300);
14367 
14368   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14369             "jlt     done\n\t"
14370             "xorl    $y, $y\n"
14371             "done:   " %}
14372   ins_encode %{
14373     Register Rp = $p$$Register;
14374     Register Rq = $q$$Register;
14375     Register Ry = $y$$Register;
14376     Label done;
14377     __ cmpl(Rp, Rq);
14378     __ jccb(Assembler::less, done);
14379     __ xorl(Ry, Ry);
14380     __ bind(done);
14381   %}
14382   ins_pipe(pipe_cmplt);
14383 %}
14384 
14385 
14386 //---------- FP Instructions------------------------------------------------
14387 
14388 // Really expensive, avoid
14389 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14390 %{
14391   match(Set cr (CmpF src1 src2));
14392 
14393   ins_cost(500);
14394   format %{ "ucomiss $src1, $src2\n\t"
14395             "jnp,s   exit\n\t"
14396             "pushfq\t# saw NaN, set CF\n\t"
14397             "andq    [rsp], #0xffffff2b\n\t"
14398             "popfq\n"
14399     "exit:" %}
14400   ins_encode %{
14401     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14402     emit_cmpfp_fixup(masm);
14403   %}
14404   ins_pipe(pipe_slow);
14405 %}
14406 
14407 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14408   match(Set cr (CmpF src1 src2));
14409 
14410   ins_cost(100);
14411   format %{ "ucomiss $src1, $src2" %}
14412   ins_encode %{
14413     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14414   %}
14415   ins_pipe(pipe_slow);
14416 %}
14417 
14418 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14419   match(Set cr (CmpF src1 (LoadF src2)));
14420 
14421   ins_cost(100);
14422   format %{ "ucomiss $src1, $src2" %}
14423   ins_encode %{
14424     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14425   %}
14426   ins_pipe(pipe_slow);
14427 %}
14428 
14429 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14430   match(Set cr (CmpF src con));
14431   ins_cost(100);
14432   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14433   ins_encode %{
14434     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14435   %}
14436   ins_pipe(pipe_slow);
14437 %}
14438 
14439 // Really expensive, avoid
14440 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14441 %{
14442   match(Set cr (CmpD src1 src2));
14443 
14444   ins_cost(500);
14445   format %{ "ucomisd $src1, $src2\n\t"
14446             "jnp,s   exit\n\t"
14447             "pushfq\t# saw NaN, set CF\n\t"
14448             "andq    [rsp], #0xffffff2b\n\t"
14449             "popfq\n"
14450     "exit:" %}
14451   ins_encode %{
14452     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14453     emit_cmpfp_fixup(masm);
14454   %}
14455   ins_pipe(pipe_slow);
14456 %}
14457 
14458 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14459   match(Set cr (CmpD src1 src2));
14460 
14461   ins_cost(100);
14462   format %{ "ucomisd $src1, $src2 test" %}
14463   ins_encode %{
14464     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14465   %}
14466   ins_pipe(pipe_slow);
14467 %}
14468 
14469 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14470   match(Set cr (CmpD src1 (LoadD src2)));
14471 
14472   ins_cost(100);
14473   format %{ "ucomisd $src1, $src2" %}
14474   ins_encode %{
14475     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14476   %}
14477   ins_pipe(pipe_slow);
14478 %}
14479 
14480 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14481   match(Set cr (CmpD src con));
14482   ins_cost(100);
14483   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14484   ins_encode %{
14485     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14486   %}
14487   ins_pipe(pipe_slow);
14488 %}
14489 
14490 // Compare into -1,0,1
14491 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14492 %{
14493   match(Set dst (CmpF3 src1 src2));
14494   effect(KILL cr);
14495 
14496   ins_cost(275);
14497   format %{ "ucomiss $src1, $src2\n\t"
14498             "movl    $dst, #-1\n\t"
14499             "jp,s    done\n\t"
14500             "jb,s    done\n\t"
14501             "setne   $dst\n\t"
14502             "movzbl  $dst, $dst\n"
14503     "done:" %}
14504   ins_encode %{
14505     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14506     emit_cmpfp3(masm, $dst$$Register);
14507   %}
14508   ins_pipe(pipe_slow);
14509 %}
14510 
14511 // Compare into -1,0,1
14512 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14513 %{
14514   match(Set dst (CmpF3 src1 (LoadF src2)));
14515   effect(KILL cr);
14516 
14517   ins_cost(275);
14518   format %{ "ucomiss $src1, $src2\n\t"
14519             "movl    $dst, #-1\n\t"
14520             "jp,s    done\n\t"
14521             "jb,s    done\n\t"
14522             "setne   $dst\n\t"
14523             "movzbl  $dst, $dst\n"
14524     "done:" %}
14525   ins_encode %{
14526     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14527     emit_cmpfp3(masm, $dst$$Register);
14528   %}
14529   ins_pipe(pipe_slow);
14530 %}
14531 
14532 // Compare into -1,0,1
14533 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14534   match(Set dst (CmpF3 src con));
14535   effect(KILL cr);
14536 
14537   ins_cost(275);
14538   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14539             "movl    $dst, #-1\n\t"
14540             "jp,s    done\n\t"
14541             "jb,s    done\n\t"
14542             "setne   $dst\n\t"
14543             "movzbl  $dst, $dst\n"
14544     "done:" %}
14545   ins_encode %{
14546     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14547     emit_cmpfp3(masm, $dst$$Register);
14548   %}
14549   ins_pipe(pipe_slow);
14550 %}
14551 
14552 // Compare into -1,0,1
14553 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14554 %{
14555   match(Set dst (CmpD3 src1 src2));
14556   effect(KILL cr);
14557 
14558   ins_cost(275);
14559   format %{ "ucomisd $src1, $src2\n\t"
14560             "movl    $dst, #-1\n\t"
14561             "jp,s    done\n\t"
14562             "jb,s    done\n\t"
14563             "setne   $dst\n\t"
14564             "movzbl  $dst, $dst\n"
14565     "done:" %}
14566   ins_encode %{
14567     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14568     emit_cmpfp3(masm, $dst$$Register);
14569   %}
14570   ins_pipe(pipe_slow);
14571 %}
14572 
14573 // Compare into -1,0,1
14574 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14575 %{
14576   match(Set dst (CmpD3 src1 (LoadD src2)));
14577   effect(KILL cr);
14578 
14579   ins_cost(275);
14580   format %{ "ucomisd $src1, $src2\n\t"
14581             "movl    $dst, #-1\n\t"
14582             "jp,s    done\n\t"
14583             "jb,s    done\n\t"
14584             "setne   $dst\n\t"
14585             "movzbl  $dst, $dst\n"
14586     "done:" %}
14587   ins_encode %{
14588     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14589     emit_cmpfp3(masm, $dst$$Register);
14590   %}
14591   ins_pipe(pipe_slow);
14592 %}
14593 
14594 // Compare into -1,0,1
14595 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14596   match(Set dst (CmpD3 src con));
14597   effect(KILL cr);
14598 
14599   ins_cost(275);
14600   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14601             "movl    $dst, #-1\n\t"
14602             "jp,s    done\n\t"
14603             "jb,s    done\n\t"
14604             "setne   $dst\n\t"
14605             "movzbl  $dst, $dst\n"
14606     "done:" %}
14607   ins_encode %{
14608     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14609     emit_cmpfp3(masm, $dst$$Register);
14610   %}
14611   ins_pipe(pipe_slow);
14612 %}
14613 
14614 //----------Arithmetic Conversion Instructions---------------------------------
14615 
14616 instruct convF2D_reg_reg(regD dst, regF src)
14617 %{
14618   match(Set dst (ConvF2D src));
14619 
14620   format %{ "cvtss2sd $dst, $src" %}
14621   ins_encode %{
14622     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14623   %}
14624   ins_pipe(pipe_slow); // XXX
14625 %}
14626 
14627 instruct convF2D_reg_mem(regD dst, memory src)
14628 %{
14629   predicate(UseAVX == 0);
14630   match(Set dst (ConvF2D (LoadF src)));
14631 
14632   format %{ "cvtss2sd $dst, $src" %}
14633   ins_encode %{
14634     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14635   %}
14636   ins_pipe(pipe_slow); // XXX
14637 %}
14638 
14639 instruct convD2F_reg_reg(regF dst, regD src)
14640 %{
14641   match(Set dst (ConvD2F src));
14642 
14643   format %{ "cvtsd2ss $dst, $src" %}
14644   ins_encode %{
14645     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14646   %}
14647   ins_pipe(pipe_slow); // XXX
14648 %}
14649 
14650 instruct convD2F_reg_mem(regF dst, memory src)
14651 %{
14652   predicate(UseAVX == 0);
14653   match(Set dst (ConvD2F (LoadD src)));
14654 
14655   format %{ "cvtsd2ss $dst, $src" %}
14656   ins_encode %{
14657     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14658   %}
14659   ins_pipe(pipe_slow); // XXX
14660 %}
14661 
14662 // XXX do mem variants
14663 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14664 %{
14665   predicate(!VM_Version::supports_avx10_2());
14666   match(Set dst (ConvF2I src));
14667   effect(KILL cr);
14668   format %{ "convert_f2i $dst, $src" %}
14669   ins_encode %{
14670     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14671   %}
14672   ins_pipe(pipe_slow);
14673 %}
14674 
14675 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14676 %{
14677   predicate(VM_Version::supports_avx10_2());
14678   match(Set dst (ConvF2I src));
14679   format %{ "evcvttss2sisl $dst, $src" %}
14680   ins_encode %{
14681     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14682   %}
14683   ins_pipe(pipe_slow);
14684 %}
14685 
14686 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14687 %{
14688   predicate(VM_Version::supports_avx10_2());
14689   match(Set dst (ConvF2I (LoadF src)));
14690   format %{ "evcvttss2sisl $dst, $src" %}
14691   ins_encode %{
14692     __ evcvttss2sisl($dst$$Register, $src$$Address);
14693   %}
14694   ins_pipe(pipe_slow);
14695 %}
14696 
14697 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14698 %{
14699   predicate(!VM_Version::supports_avx10_2());
14700   match(Set dst (ConvF2L src));
14701   effect(KILL cr);
14702   format %{ "convert_f2l $dst, $src"%}
14703   ins_encode %{
14704     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14705   %}
14706   ins_pipe(pipe_slow);
14707 %}
14708 
14709 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14710 %{
14711   predicate(VM_Version::supports_avx10_2());
14712   match(Set dst (ConvF2L src));
14713   format %{ "evcvttss2sisq $dst, $src" %}
14714   ins_encode %{
14715     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14716   %}
14717   ins_pipe(pipe_slow);
14718 %}
14719 
14720 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14721 %{
14722   predicate(VM_Version::supports_avx10_2());
14723   match(Set dst (ConvF2L (LoadF src)));
14724   format %{ "evcvttss2sisq $dst, $src" %}
14725   ins_encode %{
14726     __ evcvttss2sisq($dst$$Register, $src$$Address);
14727   %}
14728   ins_pipe(pipe_slow);
14729 %}
14730 
14731 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14732 %{
14733   predicate(!VM_Version::supports_avx10_2());
14734   match(Set dst (ConvD2I src));
14735   effect(KILL cr);
14736   format %{ "convert_d2i $dst, $src"%}
14737   ins_encode %{
14738     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14739   %}
14740   ins_pipe(pipe_slow);
14741 %}
14742 
14743 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14744 %{
14745   predicate(VM_Version::supports_avx10_2());
14746   match(Set dst (ConvD2I src));
14747   format %{ "evcvttsd2sisl $dst, $src" %}
14748   ins_encode %{
14749     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14750   %}
14751   ins_pipe(pipe_slow);
14752 %}
14753 
14754 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14755 %{
14756   predicate(VM_Version::supports_avx10_2());
14757   match(Set dst (ConvD2I (LoadD src)));
14758   format %{ "evcvttsd2sisl $dst, $src" %}
14759   ins_encode %{
14760     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14761   %}
14762   ins_pipe(pipe_slow);
14763 %}
14764 
14765 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14766 %{
14767   predicate(!VM_Version::supports_avx10_2());
14768   match(Set dst (ConvD2L src));
14769   effect(KILL cr);
14770   format %{ "convert_d2l $dst, $src"%}
14771   ins_encode %{
14772     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14773   %}
14774   ins_pipe(pipe_slow);
14775 %}
14776 
14777 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14778 %{
14779   predicate(VM_Version::supports_avx10_2());
14780   match(Set dst (ConvD2L src));
14781   format %{ "evcvttsd2sisq $dst, $src" %}
14782   ins_encode %{
14783     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14784   %}
14785   ins_pipe(pipe_slow);
14786 %}
14787 
14788 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14789 %{
14790   predicate(VM_Version::supports_avx10_2());
14791   match(Set dst (ConvD2L (LoadD src)));
14792   format %{ "evcvttsd2sisq $dst, $src" %}
14793   ins_encode %{
14794     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14795   %}
14796   ins_pipe(pipe_slow);
14797 %}
14798 
14799 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14800 %{
14801   match(Set dst (RoundD src));
14802   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14803   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14804   ins_encode %{
14805     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14806   %}
14807   ins_pipe(pipe_slow);
14808 %}
14809 
14810 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14811 %{
14812   match(Set dst (RoundF src));
14813   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14814   format %{ "round_float $dst,$src" %}
14815   ins_encode %{
14816     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14817   %}
14818   ins_pipe(pipe_slow);
14819 %}
14820 
14821 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14822 %{
14823   predicate(!UseXmmI2F);
14824   match(Set dst (ConvI2F src));
14825 
14826   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14827   ins_encode %{
14828     if (UseAVX > 0) {
14829       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14830     }
14831     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14832   %}
14833   ins_pipe(pipe_slow); // XXX
14834 %}
14835 
14836 instruct convI2F_reg_mem(regF dst, memory src)
14837 %{
14838   predicate(UseAVX == 0);
14839   match(Set dst (ConvI2F (LoadI src)));
14840 
14841   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14842   ins_encode %{
14843     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14844   %}
14845   ins_pipe(pipe_slow); // XXX
14846 %}
14847 
14848 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14849 %{
14850   predicate(!UseXmmI2D);
14851   match(Set dst (ConvI2D src));
14852 
14853   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14854   ins_encode %{
14855     if (UseAVX > 0) {
14856       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14857     }
14858     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14859   %}
14860   ins_pipe(pipe_slow); // XXX
14861 %}
14862 
14863 instruct convI2D_reg_mem(regD dst, memory src)
14864 %{
14865   predicate(UseAVX == 0);
14866   match(Set dst (ConvI2D (LoadI src)));
14867 
14868   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14869   ins_encode %{
14870     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14871   %}
14872   ins_pipe(pipe_slow); // XXX
14873 %}
14874 
14875 instruct convXI2F_reg(regF dst, rRegI src)
14876 %{
14877   predicate(UseXmmI2F);
14878   match(Set dst (ConvI2F src));
14879 
14880   format %{ "movdl $dst, $src\n\t"
14881             "cvtdq2psl $dst, $dst\t# i2f" %}
14882   ins_encode %{
14883     __ movdl($dst$$XMMRegister, $src$$Register);
14884     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14885   %}
14886   ins_pipe(pipe_slow); // XXX
14887 %}
14888 
14889 instruct convXI2D_reg(regD dst, rRegI src)
14890 %{
14891   predicate(UseXmmI2D);
14892   match(Set dst (ConvI2D src));
14893 
14894   format %{ "movdl $dst, $src\n\t"
14895             "cvtdq2pdl $dst, $dst\t# i2d" %}
14896   ins_encode %{
14897     __ movdl($dst$$XMMRegister, $src$$Register);
14898     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14899   %}
14900   ins_pipe(pipe_slow); // XXX
14901 %}
14902 
14903 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14904 %{
14905   match(Set dst (ConvL2F src));
14906 
14907   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14908   ins_encode %{
14909     if (UseAVX > 0) {
14910       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14911     }
14912     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14913   %}
14914   ins_pipe(pipe_slow); // XXX
14915 %}
14916 
14917 instruct convL2F_reg_mem(regF dst, memory src)
14918 %{
14919   predicate(UseAVX == 0);
14920   match(Set dst (ConvL2F (LoadL src)));
14921 
14922   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14923   ins_encode %{
14924     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14925   %}
14926   ins_pipe(pipe_slow); // XXX
14927 %}
14928 
14929 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14930 %{
14931   match(Set dst (ConvL2D src));
14932 
14933   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14934   ins_encode %{
14935     if (UseAVX > 0) {
14936       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14937     }
14938     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14939   %}
14940   ins_pipe(pipe_slow); // XXX
14941 %}
14942 
14943 instruct convL2D_reg_mem(regD dst, memory src)
14944 %{
14945   predicate(UseAVX == 0);
14946   match(Set dst (ConvL2D (LoadL src)));
14947 
14948   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14949   ins_encode %{
14950     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14951   %}
14952   ins_pipe(pipe_slow); // XXX
14953 %}
14954 
14955 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14956 %{
14957   match(Set dst (ConvI2L src));
14958 
14959   ins_cost(125);
14960   format %{ "movslq  $dst, $src\t# i2l" %}
14961   ins_encode %{
14962     __ movslq($dst$$Register, $src$$Register);
14963   %}
14964   ins_pipe(ialu_reg_reg);
14965 %}
14966 
14967 // Zero-extend convert int to long
14968 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14969 %{
14970   match(Set dst (AndL (ConvI2L src) mask));
14971 
14972   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14973   ins_encode %{
14974     if ($dst$$reg != $src$$reg) {
14975       __ movl($dst$$Register, $src$$Register);
14976     }
14977   %}
14978   ins_pipe(ialu_reg_reg);
14979 %}
14980 
14981 // Zero-extend convert int to long
14982 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14983 %{
14984   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14985 
14986   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14987   ins_encode %{
14988     __ movl($dst$$Register, $src$$Address);
14989   %}
14990   ins_pipe(ialu_reg_mem);
14991 %}
14992 
14993 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14994 %{
14995   match(Set dst (AndL src mask));
14996 
14997   format %{ "movl    $dst, $src\t# zero-extend long" %}
14998   ins_encode %{
14999     __ movl($dst$$Register, $src$$Register);
15000   %}
15001   ins_pipe(ialu_reg_reg);
15002 %}
15003 
15004 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15005 %{
15006   match(Set dst (ConvL2I src));
15007 
15008   format %{ "movl    $dst, $src\t# l2i" %}
15009   ins_encode %{
15010     __ movl($dst$$Register, $src$$Register);
15011   %}
15012   ins_pipe(ialu_reg_reg);
15013 %}
15014 
15015 
15016 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15017   match(Set dst (MoveF2I src));
15018   effect(DEF dst, USE src);
15019 
15020   ins_cost(125);
15021   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15022   ins_encode %{
15023     __ movl($dst$$Register, Address(rsp, $src$$disp));
15024   %}
15025   ins_pipe(ialu_reg_mem);
15026 %}
15027 
15028 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15029   match(Set dst (MoveI2F src));
15030   effect(DEF dst, USE src);
15031 
15032   ins_cost(125);
15033   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15034   ins_encode %{
15035     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15036   %}
15037   ins_pipe(pipe_slow);
15038 %}
15039 
15040 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15041   match(Set dst (MoveD2L src));
15042   effect(DEF dst, USE src);
15043 
15044   ins_cost(125);
15045   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15046   ins_encode %{
15047     __ movq($dst$$Register, Address(rsp, $src$$disp));
15048   %}
15049   ins_pipe(ialu_reg_mem);
15050 %}
15051 
15052 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15053   predicate(!UseXmmLoadAndClearUpper);
15054   match(Set dst (MoveL2D src));
15055   effect(DEF dst, USE src);
15056 
15057   ins_cost(125);
15058   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15059   ins_encode %{
15060     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15061   %}
15062   ins_pipe(pipe_slow);
15063 %}
15064 
15065 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15066   predicate(UseXmmLoadAndClearUpper);
15067   match(Set dst (MoveL2D src));
15068   effect(DEF dst, USE src);
15069 
15070   ins_cost(125);
15071   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15072   ins_encode %{
15073     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15074   %}
15075   ins_pipe(pipe_slow);
15076 %}
15077 
15078 
15079 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15080   match(Set dst (MoveF2I src));
15081   effect(DEF dst, USE src);
15082 
15083   ins_cost(95); // XXX
15084   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15085   ins_encode %{
15086     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15087   %}
15088   ins_pipe(pipe_slow);
15089 %}
15090 
15091 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15092   match(Set dst (MoveI2F src));
15093   effect(DEF dst, USE src);
15094 
15095   ins_cost(100);
15096   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15097   ins_encode %{
15098     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15099   %}
15100   ins_pipe( ialu_mem_reg );
15101 %}
15102 
15103 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15104   match(Set dst (MoveD2L src));
15105   effect(DEF dst, USE src);
15106 
15107   ins_cost(95); // XXX
15108   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15109   ins_encode %{
15110     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15111   %}
15112   ins_pipe(pipe_slow);
15113 %}
15114 
15115 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15116   match(Set dst (MoveL2D src));
15117   effect(DEF dst, USE src);
15118 
15119   ins_cost(100);
15120   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15121   ins_encode %{
15122     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15123   %}
15124   ins_pipe(ialu_mem_reg);
15125 %}
15126 
15127 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15128   match(Set dst (MoveF2I src));
15129   effect(DEF dst, USE src);
15130   ins_cost(85);
15131   format %{ "movd    $dst,$src\t# MoveF2I" %}
15132   ins_encode %{
15133     __ movdl($dst$$Register, $src$$XMMRegister);
15134   %}
15135   ins_pipe( pipe_slow );
15136 %}
15137 
15138 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15139   match(Set dst (MoveD2L src));
15140   effect(DEF dst, USE src);
15141   ins_cost(85);
15142   format %{ "movd    $dst,$src\t# MoveD2L" %}
15143   ins_encode %{
15144     __ movdq($dst$$Register, $src$$XMMRegister);
15145   %}
15146   ins_pipe( pipe_slow );
15147 %}
15148 
15149 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15150   match(Set dst (MoveI2F src));
15151   effect(DEF dst, USE src);
15152   ins_cost(100);
15153   format %{ "movd    $dst,$src\t# MoveI2F" %}
15154   ins_encode %{
15155     __ movdl($dst$$XMMRegister, $src$$Register);
15156   %}
15157   ins_pipe( pipe_slow );
15158 %}
15159 
15160 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15161   match(Set dst (MoveL2D src));
15162   effect(DEF dst, USE src);
15163   ins_cost(100);
15164   format %{ "movd    $dst,$src\t# MoveL2D" %}
15165   ins_encode %{
15166      __ movdq($dst$$XMMRegister, $src$$Register);
15167   %}
15168   ins_pipe( pipe_slow );
15169 %}
15170 
15171 
15172 // Fast clearing of an array
15173 // Small non-constant lenght ClearArray for non-AVX512 targets.
15174 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15175                   Universe dummy, rFlagsReg cr)
15176 %{
15177   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15178   match(Set dummy (ClearArray (Binary cnt base) val));
15179   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15180 
15181   format %{ $$template
15182     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15183     $$emit$$"jg      LARGE\n\t"
15184     $$emit$$"dec     rcx\n\t"
15185     $$emit$$"js      DONE\t# Zero length\n\t"
15186     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15187     $$emit$$"dec     rcx\n\t"
15188     $$emit$$"jge     LOOP\n\t"
15189     $$emit$$"jmp     DONE\n\t"
15190     $$emit$$"# LARGE:\n\t"
15191     if (UseFastStosb) {
15192        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15193        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15194     } else if (UseXMMForObjInit) {
15195        $$emit$$"movdq   $tmp, $val\n\t"
15196        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15197        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15198        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15199        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15201        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15202        $$emit$$"add     0x40,rax\n\t"
15203        $$emit$$"# L_zero_64_bytes:\n\t"
15204        $$emit$$"sub     0x8,rcx\n\t"
15205        $$emit$$"jge     L_loop\n\t"
15206        $$emit$$"add     0x4,rcx\n\t"
15207        $$emit$$"jl      L_tail\n\t"
15208        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15209        $$emit$$"add     0x20,rax\n\t"
15210        $$emit$$"sub     0x4,rcx\n\t"
15211        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212        $$emit$$"add     0x4,rcx\n\t"
15213        $$emit$$"jle     L_end\n\t"
15214        $$emit$$"dec     rcx\n\t"
15215        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216        $$emit$$"vmovq   xmm0,(rax)\n\t"
15217        $$emit$$"add     0x8,rax\n\t"
15218        $$emit$$"dec     rcx\n\t"
15219        $$emit$$"jge     L_sloop\n\t"
15220        $$emit$$"# L_end:\n\t"
15221     } else {
15222        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223     }
15224     $$emit$$"# DONE"
15225   %}
15226   ins_encode %{
15227     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15228                  $tmp$$XMMRegister, false, false);
15229   %}
15230   ins_pipe(pipe_slow);
15231 %}
15232 
15233 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15234                             Universe dummy, rFlagsReg cr)
15235 %{
15236   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15237   match(Set dummy (ClearArray (Binary cnt base) val));
15238   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15239 
15240   format %{ $$template
15241     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15242     $$emit$$"jg      LARGE\n\t"
15243     $$emit$$"dec     rcx\n\t"
15244     $$emit$$"js      DONE\t# Zero length\n\t"
15245     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15246     $$emit$$"dec     rcx\n\t"
15247     $$emit$$"jge     LOOP\n\t"
15248     $$emit$$"jmp     DONE\n\t"
15249     $$emit$$"# LARGE:\n\t"
15250     if (UseXMMForObjInit) {
15251        $$emit$$"movdq   $tmp, $val\n\t"
15252        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15253        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15254        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15255        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15257        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15258        $$emit$$"add     0x40,rax\n\t"
15259        $$emit$$"# L_zero_64_bytes:\n\t"
15260        $$emit$$"sub     0x8,rcx\n\t"
15261        $$emit$$"jge     L_loop\n\t"
15262        $$emit$$"add     0x4,rcx\n\t"
15263        $$emit$$"jl      L_tail\n\t"
15264        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15265        $$emit$$"add     0x20,rax\n\t"
15266        $$emit$$"sub     0x4,rcx\n\t"
15267        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268        $$emit$$"add     0x4,rcx\n\t"
15269        $$emit$$"jle     L_end\n\t"
15270        $$emit$$"dec     rcx\n\t"
15271        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272        $$emit$$"vmovq   xmm0,(rax)\n\t"
15273        $$emit$$"add     0x8,rax\n\t"
15274        $$emit$$"dec     rcx\n\t"
15275        $$emit$$"jge     L_sloop\n\t"
15276        $$emit$$"# L_end:\n\t"
15277     } else {
15278        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279     }
15280     $$emit$$"# DONE"
15281   %}
15282   ins_encode %{
15283     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15284                  $tmp$$XMMRegister, false, true);
15285   %}
15286   ins_pipe(pipe_slow);
15287 %}
15288 
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15291                        Universe dummy, rFlagsReg cr)
15292 %{
15293   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15294   match(Set dummy (ClearArray (Binary cnt base) val));
15295   ins_cost(125);
15296   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15297 
15298   format %{ $$template
15299     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15300     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15301     $$emit$$"jg      LARGE\n\t"
15302     $$emit$$"dec     rcx\n\t"
15303     $$emit$$"js      DONE\t# Zero length\n\t"
15304     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15305     $$emit$$"dec     rcx\n\t"
15306     $$emit$$"jge     LOOP\n\t"
15307     $$emit$$"jmp     DONE\n\t"
15308     $$emit$$"# LARGE:\n\t"
15309     if (UseFastStosb) {
15310        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15311        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312     } else if (UseXMMForObjInit) {
15313        $$emit$$"mov     rdi,rax\n\t"
15314        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15315        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15316        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15317        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15318        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15319        $$emit$$"add     0x40,rax\n\t"
15320        $$emit$$"# L_zero_64_bytes:\n\t"
15321        $$emit$$"sub     0x8,rcx\n\t"
15322        $$emit$$"jge     L_loop\n\t"
15323        $$emit$$"add     0x4,rcx\n\t"
15324        $$emit$$"jl      L_tail\n\t"
15325        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326        $$emit$$"add     0x20,rax\n\t"
15327        $$emit$$"sub     0x4,rcx\n\t"
15328        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329        $$emit$$"add     0x4,rcx\n\t"
15330        $$emit$$"jle     L_end\n\t"
15331        $$emit$$"dec     rcx\n\t"
15332        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333        $$emit$$"vmovq   xmm0,(rax)\n\t"
15334        $$emit$$"add     0x8,rax\n\t"
15335        $$emit$$"dec     rcx\n\t"
15336        $$emit$$"jge     L_sloop\n\t"
15337        $$emit$$"# L_end:\n\t"
15338     } else {
15339        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340     }
15341     $$emit$$"# DONE"
15342   %}
15343   ins_encode %{
15344     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15345                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15346   %}
15347   ins_pipe(pipe_slow);
15348 %}
15349 
15350 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15351                                  Universe dummy, rFlagsReg cr)
15352 %{
15353   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15354   match(Set dummy (ClearArray (Binary cnt base) val));
15355   ins_cost(125);
15356   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15357 
15358   format %{ $$template
15359     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15360     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15361     $$emit$$"jg      LARGE\n\t"
15362     $$emit$$"dec     rcx\n\t"
15363     $$emit$$"js      DONE\t# Zero length\n\t"
15364     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15365     $$emit$$"dec     rcx\n\t"
15366     $$emit$$"jge     LOOP\n\t"
15367     $$emit$$"jmp     DONE\n\t"
15368     $$emit$$"# LARGE:\n\t"
15369     if (UseFastStosb) {
15370        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15371        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15372     } else if (UseXMMForObjInit) {
15373        $$emit$$"mov     rdi,rax\n\t"
15374        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15375        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15376        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15377        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15378        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15379        $$emit$$"add     0x40,rax\n\t"
15380        $$emit$$"# L_zero_64_bytes:\n\t"
15381        $$emit$$"sub     0x8,rcx\n\t"
15382        $$emit$$"jge     L_loop\n\t"
15383        $$emit$$"add     0x4,rcx\n\t"
15384        $$emit$$"jl      L_tail\n\t"
15385        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15386        $$emit$$"add     0x20,rax\n\t"
15387        $$emit$$"sub     0x4,rcx\n\t"
15388        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15389        $$emit$$"add     0x4,rcx\n\t"
15390        $$emit$$"jle     L_end\n\t"
15391        $$emit$$"dec     rcx\n\t"
15392        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15393        $$emit$$"vmovq   xmm0,(rax)\n\t"
15394        $$emit$$"add     0x8,rax\n\t"
15395        $$emit$$"dec     rcx\n\t"
15396        $$emit$$"jge     L_sloop\n\t"
15397        $$emit$$"# L_end:\n\t"
15398     } else {
15399        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15400     }
15401     $$emit$$"# DONE"
15402   %}
15403   ins_encode %{
15404     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15405                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15406   %}
15407   ins_pipe(pipe_slow);
15408 %}
15409 
15410 // Large non-constant length ClearArray for non-AVX512 targets.
15411 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15412                         Universe dummy, rFlagsReg cr)
15413 %{
15414   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15415   match(Set dummy (ClearArray (Binary cnt base) val));
15416   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15417 
15418   format %{ $$template
15419     if (UseFastStosb) {
15420        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15421        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15422     } else if (UseXMMForObjInit) {
15423        $$emit$$"movdq   $tmp, $val\n\t"
15424        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15425        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15426        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15427        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15428        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15429        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15430        $$emit$$"add     0x40,rax\n\t"
15431        $$emit$$"# L_zero_64_bytes:\n\t"
15432        $$emit$$"sub     0x8,rcx\n\t"
15433        $$emit$$"jge     L_loop\n\t"
15434        $$emit$$"add     0x4,rcx\n\t"
15435        $$emit$$"jl      L_tail\n\t"
15436        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15437        $$emit$$"add     0x20,rax\n\t"
15438        $$emit$$"sub     0x4,rcx\n\t"
15439        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15440        $$emit$$"add     0x4,rcx\n\t"
15441        $$emit$$"jle     L_end\n\t"
15442        $$emit$$"dec     rcx\n\t"
15443        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15444        $$emit$$"vmovq   xmm0,(rax)\n\t"
15445        $$emit$$"add     0x8,rax\n\t"
15446        $$emit$$"dec     rcx\n\t"
15447        $$emit$$"jge     L_sloop\n\t"
15448        $$emit$$"# L_end:\n\t"
15449     } else {
15450        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15451     }
15452   %}
15453   ins_encode %{
15454     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455                  $tmp$$XMMRegister, true, false);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15461                                   Universe dummy, rFlagsReg cr)
15462 %{
15463   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15464   match(Set dummy (ClearArray (Binary cnt base) val));
15465   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15466 
15467   format %{ $$template
15468     if (UseXMMForObjInit) {
15469        $$emit$$"movdq   $tmp, $val\n\t"
15470        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15471        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15472        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15473        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15474        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15475        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15476        $$emit$$"add     0x40,rax\n\t"
15477        $$emit$$"# L_zero_64_bytes:\n\t"
15478        $$emit$$"sub     0x8,rcx\n\t"
15479        $$emit$$"jge     L_loop\n\t"
15480        $$emit$$"add     0x4,rcx\n\t"
15481        $$emit$$"jl      L_tail\n\t"
15482        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15483        $$emit$$"add     0x20,rax\n\t"
15484        $$emit$$"sub     0x4,rcx\n\t"
15485        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15486        $$emit$$"add     0x4,rcx\n\t"
15487        $$emit$$"jle     L_end\n\t"
15488        $$emit$$"dec     rcx\n\t"
15489        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15490        $$emit$$"vmovq   xmm0,(rax)\n\t"
15491        $$emit$$"add     0x8,rax\n\t"
15492        $$emit$$"dec     rcx\n\t"
15493        $$emit$$"jge     L_sloop\n\t"
15494        $$emit$$"# L_end:\n\t"
15495     } else {
15496        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15497     }
15498   %}
15499   ins_encode %{
15500     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501                  $tmp$$XMMRegister, true, true);
15502   %}
15503   ins_pipe(pipe_slow);
15504 %}
15505 
15506 // Large non-constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15508                              Universe dummy, rFlagsReg cr)
15509 %{
15510   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15511   match(Set dummy (ClearArray (Binary cnt base) val));
15512   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15513 
15514   format %{ $$template
15515     if (UseFastStosb) {
15516        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15517        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15518        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15519     } else if (UseXMMForObjInit) {
15520        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15521        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15522        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15523        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15524        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15525        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15526        $$emit$$"add     0x40,rax\n\t"
15527        $$emit$$"# L_zero_64_bytes:\n\t"
15528        $$emit$$"sub     0x8,rcx\n\t"
15529        $$emit$$"jge     L_loop\n\t"
15530        $$emit$$"add     0x4,rcx\n\t"
15531        $$emit$$"jl      L_tail\n\t"
15532        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15533        $$emit$$"add     0x20,rax\n\t"
15534        $$emit$$"sub     0x4,rcx\n\t"
15535        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15536        $$emit$$"add     0x4,rcx\n\t"
15537        $$emit$$"jle     L_end\n\t"
15538        $$emit$$"dec     rcx\n\t"
15539        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15540        $$emit$$"vmovq   xmm0,(rax)\n\t"
15541        $$emit$$"add     0x8,rax\n\t"
15542        $$emit$$"dec     rcx\n\t"
15543        $$emit$$"jge     L_sloop\n\t"
15544        $$emit$$"# L_end:\n\t"
15545     } else {
15546        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15547        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15548     }
15549   %}
15550   ins_encode %{
15551     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15552                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15553   %}
15554   ins_pipe(pipe_slow);
15555 %}
15556 
15557 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15558                                        Universe dummy, rFlagsReg cr)
15559 %{
15560   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15561   match(Set dummy (ClearArray (Binary cnt base) val));
15562   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15563 
15564   format %{ $$template
15565     if (UseFastStosb) {
15566        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15567        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15568        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15569     } else if (UseXMMForObjInit) {
15570        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15571        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15572        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15573        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15574        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15575        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15576        $$emit$$"add     0x40,rax\n\t"
15577        $$emit$$"# L_zero_64_bytes:\n\t"
15578        $$emit$$"sub     0x8,rcx\n\t"
15579        $$emit$$"jge     L_loop\n\t"
15580        $$emit$$"add     0x4,rcx\n\t"
15581        $$emit$$"jl      L_tail\n\t"
15582        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15583        $$emit$$"add     0x20,rax\n\t"
15584        $$emit$$"sub     0x4,rcx\n\t"
15585        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15586        $$emit$$"add     0x4,rcx\n\t"
15587        $$emit$$"jle     L_end\n\t"
15588        $$emit$$"dec     rcx\n\t"
15589        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15590        $$emit$$"vmovq   xmm0,(rax)\n\t"
15591        $$emit$$"add     0x8,rax\n\t"
15592        $$emit$$"dec     rcx\n\t"
15593        $$emit$$"jge     L_sloop\n\t"
15594        $$emit$$"# L_end:\n\t"
15595     } else {
15596        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15597        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15598     }
15599   %}
15600   ins_encode %{
15601     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15602                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15603   %}
15604   ins_pipe(pipe_slow);
15605 %}
15606 
15607 // Small constant length ClearArray for AVX512 targets.
15608 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15609 %{
15610   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15611             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15612   match(Set dummy (ClearArray (Binary cnt base) val));
15613   ins_cost(100);
15614   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15615   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15616   ins_encode %{
15617     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15618   %}
15619   ins_pipe(pipe_slow);
15620 %}
15621 
15622 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15623                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15624 %{
15625   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15626   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15627   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15628 
15629   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15630   ins_encode %{
15631     __ string_compare($str1$$Register, $str2$$Register,
15632                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15633                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15634   %}
15635   ins_pipe( pipe_slow );
15636 %}
15637 
15638 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15639                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15640 %{
15641   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15642   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15643   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15644 
15645   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15646   ins_encode %{
15647     __ string_compare($str1$$Register, $str2$$Register,
15648                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15649                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15650   %}
15651   ins_pipe( pipe_slow );
15652 %}
15653 
15654 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15655                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15656 %{
15657   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15658   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15659   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15660 
15661   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15662   ins_encode %{
15663     __ string_compare($str1$$Register, $str2$$Register,
15664                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15665                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15666   %}
15667   ins_pipe( pipe_slow );
15668 %}
15669 
15670 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15671                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15672 %{
15673   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15674   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15675   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15676 
15677   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15678   ins_encode %{
15679     __ string_compare($str1$$Register, $str2$$Register,
15680                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15681                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15682   %}
15683   ins_pipe( pipe_slow );
15684 %}
15685 
15686 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15687                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15688 %{
15689   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15690   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15691   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15692 
15693   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15694   ins_encode %{
15695     __ string_compare($str1$$Register, $str2$$Register,
15696                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15697                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15698   %}
15699   ins_pipe( pipe_slow );
15700 %}
15701 
15702 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15703                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15704 %{
15705   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15706   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15707   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15708 
15709   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15710   ins_encode %{
15711     __ string_compare($str1$$Register, $str2$$Register,
15712                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15713                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15714   %}
15715   ins_pipe( pipe_slow );
15716 %}
15717 
15718 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15719                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15720 %{
15721   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15722   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15723   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15724 
15725   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15726   ins_encode %{
15727     __ string_compare($str2$$Register, $str1$$Register,
15728                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15729                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15730   %}
15731   ins_pipe( pipe_slow );
15732 %}
15733 
15734 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15735                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15736 %{
15737   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15738   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15739   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15740 
15741   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15742   ins_encode %{
15743     __ string_compare($str2$$Register, $str1$$Register,
15744                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15745                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15746   %}
15747   ins_pipe( pipe_slow );
15748 %}
15749 
15750 // fast search of substring with known size.
15751 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15752                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15753 %{
15754   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15755   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15756   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15757 
15758   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15759   ins_encode %{
15760     int icnt2 = (int)$int_cnt2$$constant;
15761     if (icnt2 >= 16) {
15762       // IndexOf for constant substrings with size >= 16 elements
15763       // which don't need to be loaded through stack.
15764       __ string_indexofC8($str1$$Register, $str2$$Register,
15765                           $cnt1$$Register, $cnt2$$Register,
15766                           icnt2, $result$$Register,
15767                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15768     } else {
15769       // Small strings are loaded through stack if they cross page boundary.
15770       __ string_indexof($str1$$Register, $str2$$Register,
15771                         $cnt1$$Register, $cnt2$$Register,
15772                         icnt2, $result$$Register,
15773                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15774     }
15775   %}
15776   ins_pipe( pipe_slow );
15777 %}
15778 
15779 // fast search of substring with known size.
15780 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15781                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15782 %{
15783   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15784   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15785   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15786 
15787   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15788   ins_encode %{
15789     int icnt2 = (int)$int_cnt2$$constant;
15790     if (icnt2 >= 8) {
15791       // IndexOf for constant substrings with size >= 8 elements
15792       // which don't need to be loaded through stack.
15793       __ string_indexofC8($str1$$Register, $str2$$Register,
15794                           $cnt1$$Register, $cnt2$$Register,
15795                           icnt2, $result$$Register,
15796                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15797     } else {
15798       // Small strings are loaded through stack if they cross page boundary.
15799       __ string_indexof($str1$$Register, $str2$$Register,
15800                         $cnt1$$Register, $cnt2$$Register,
15801                         icnt2, $result$$Register,
15802                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15803     }
15804   %}
15805   ins_pipe( pipe_slow );
15806 %}
15807 
15808 // fast search of substring with known size.
15809 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15810                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15811 %{
15812   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15813   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15814   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15815 
15816   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15817   ins_encode %{
15818     int icnt2 = (int)$int_cnt2$$constant;
15819     if (icnt2 >= 8) {
15820       // IndexOf for constant substrings with size >= 8 elements
15821       // which don't need to be loaded through stack.
15822       __ string_indexofC8($str1$$Register, $str2$$Register,
15823                           $cnt1$$Register, $cnt2$$Register,
15824                           icnt2, $result$$Register,
15825                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15826     } else {
15827       // Small strings are loaded through stack if they cross page boundary.
15828       __ string_indexof($str1$$Register, $str2$$Register,
15829                         $cnt1$$Register, $cnt2$$Register,
15830                         icnt2, $result$$Register,
15831                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15832     }
15833   %}
15834   ins_pipe( pipe_slow );
15835 %}
15836 
15837 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15838                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15839 %{
15840   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15841   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15842   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15843 
15844   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15845   ins_encode %{
15846     __ string_indexof($str1$$Register, $str2$$Register,
15847                       $cnt1$$Register, $cnt2$$Register,
15848                       (-1), $result$$Register,
15849                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15850   %}
15851   ins_pipe( pipe_slow );
15852 %}
15853 
15854 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15855                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15856 %{
15857   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15858   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15859   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15860 
15861   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15862   ins_encode %{
15863     __ string_indexof($str1$$Register, $str2$$Register,
15864                       $cnt1$$Register, $cnt2$$Register,
15865                       (-1), $result$$Register,
15866                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15867   %}
15868   ins_pipe( pipe_slow );
15869 %}
15870 
15871 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15872                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15873 %{
15874   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15875   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15876   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15877 
15878   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15879   ins_encode %{
15880     __ string_indexof($str1$$Register, $str2$$Register,
15881                       $cnt1$$Register, $cnt2$$Register,
15882                       (-1), $result$$Register,
15883                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15884   %}
15885   ins_pipe( pipe_slow );
15886 %}
15887 
15888 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15889                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15890 %{
15891   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15892   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15893   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15894   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15895   ins_encode %{
15896     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15897                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15898   %}
15899   ins_pipe( pipe_slow );
15900 %}
15901 
15902 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15903                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15904 %{
15905   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15906   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15907   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15908   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15909   ins_encode %{
15910     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15911                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15912   %}
15913   ins_pipe( pipe_slow );
15914 %}
15915 
15916 // fast string equals
15917 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15918                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15919 %{
15920   predicate(!VM_Version::supports_avx512vlbw());
15921   match(Set result (StrEquals (Binary str1 str2) cnt));
15922   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15923 
15924   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15925   ins_encode %{
15926     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15927                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15928                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15929   %}
15930   ins_pipe( pipe_slow );
15931 %}
15932 
15933 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15934                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15935 %{
15936   predicate(VM_Version::supports_avx512vlbw());
15937   match(Set result (StrEquals (Binary str1 str2) cnt));
15938   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15939 
15940   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15941   ins_encode %{
15942     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15943                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15944                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15945   %}
15946   ins_pipe( pipe_slow );
15947 %}
15948 
15949 // fast array equals
15950 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15951                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15952 %{
15953   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15954   match(Set result (AryEq ary1 ary2));
15955   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15956 
15957   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15958   ins_encode %{
15959     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15960                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15961                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15962   %}
15963   ins_pipe( pipe_slow );
15964 %}
15965 
15966 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15967                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15968 %{
15969   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15970   match(Set result (AryEq ary1 ary2));
15971   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15972 
15973   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15974   ins_encode %{
15975     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15976                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15977                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15978   %}
15979   ins_pipe( pipe_slow );
15980 %}
15981 
15982 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15983                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15984 %{
15985   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15986   match(Set result (AryEq ary1 ary2));
15987   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15988 
15989   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15990   ins_encode %{
15991     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15992                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15993                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15994   %}
15995   ins_pipe( pipe_slow );
15996 %}
15997 
15998 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15999                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16000 %{
16001   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16002   match(Set result (AryEq ary1 ary2));
16003   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16004 
16005   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16006   ins_encode %{
16007     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16008                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16009                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16010   %}
16011   ins_pipe( pipe_slow );
16012 %}
16013 
16014 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16015                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16016                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16017                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16018                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16019 %{
16020   predicate(UseAVX >= 2);
16021   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16022   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16023          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16024          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16025          USE basic_type, KILL cr);
16026 
16027   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16028   ins_encode %{
16029     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16030                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16031                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16032                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16033                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16034                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16035                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16036   %}
16037   ins_pipe( pipe_slow );
16038 %}
16039 
16040 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16041                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16042 %{
16043   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16044   match(Set result (CountPositives ary1 len));
16045   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16046 
16047   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16048   ins_encode %{
16049     __ count_positives($ary1$$Register, $len$$Register,
16050                        $result$$Register, $tmp3$$Register,
16051                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16052   %}
16053   ins_pipe( pipe_slow );
16054 %}
16055 
16056 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16057                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16058 %{
16059   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16060   match(Set result (CountPositives ary1 len));
16061   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16062 
16063   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16064   ins_encode %{
16065     __ count_positives($ary1$$Register, $len$$Register,
16066                        $result$$Register, $tmp3$$Register,
16067                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16068   %}
16069   ins_pipe( pipe_slow );
16070 %}
16071 
16072 // fast char[] to byte[] compression
16073 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16074                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16075   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16076   match(Set result (StrCompressedCopy src (Binary dst len)));
16077   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16078          USE_KILL len, KILL tmp5, KILL cr);
16079 
16080   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16081   ins_encode %{
16082     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16083                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16084                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16085                            knoreg, knoreg);
16086   %}
16087   ins_pipe( pipe_slow );
16088 %}
16089 
16090 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16091                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16092   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16093   match(Set result (StrCompressedCopy src (Binary dst len)));
16094   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16095          USE_KILL len, KILL tmp5, KILL cr);
16096 
16097   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16098   ins_encode %{
16099     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16100                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16101                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16102                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16103   %}
16104   ins_pipe( pipe_slow );
16105 %}
16106 // fast byte[] to char[] inflation
16107 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16108                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16109   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16110   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16111   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16112 
16113   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16114   ins_encode %{
16115     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16116                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16117   %}
16118   ins_pipe( pipe_slow );
16119 %}
16120 
16121 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16122                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16123   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16124   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16125   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16126 
16127   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16128   ins_encode %{
16129     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16130                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16131   %}
16132   ins_pipe( pipe_slow );
16133 %}
16134 
16135 // encode char[] to byte[] in ISO_8859_1
16136 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16137                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16138                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16139   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16140   match(Set result (EncodeISOArray src (Binary dst len)));
16141   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16142 
16143   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16144   ins_encode %{
16145     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16146                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16147                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16148   %}
16149   ins_pipe( pipe_slow );
16150 %}
16151 
16152 // encode char[] to byte[] in ASCII
16153 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16154                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16155                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16156   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16157   match(Set result (EncodeISOArray src (Binary dst len)));
16158   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16159 
16160   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16161   ins_encode %{
16162     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16163                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16164                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16165   %}
16166   ins_pipe( pipe_slow );
16167 %}
16168 
16169 //----------Overflow Math Instructions-----------------------------------------
16170 
16171 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16172 %{
16173   match(Set cr (OverflowAddI op1 op2));
16174   effect(DEF cr, USE_KILL op1, USE op2);
16175 
16176   format %{ "addl    $op1, $op2\t# overflow check int" %}
16177 
16178   ins_encode %{
16179     __ addl($op1$$Register, $op2$$Register);
16180   %}
16181   ins_pipe(ialu_reg_reg);
16182 %}
16183 
16184 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16185 %{
16186   match(Set cr (OverflowAddI op1 op2));
16187   effect(DEF cr, USE_KILL op1, USE op2);
16188 
16189   format %{ "addl    $op1, $op2\t# overflow check int" %}
16190 
16191   ins_encode %{
16192     __ addl($op1$$Register, $op2$$constant);
16193   %}
16194   ins_pipe(ialu_reg_reg);
16195 %}
16196 
16197 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16198 %{
16199   match(Set cr (OverflowAddL op1 op2));
16200   effect(DEF cr, USE_KILL op1, USE op2);
16201 
16202   format %{ "addq    $op1, $op2\t# overflow check long" %}
16203   ins_encode %{
16204     __ addq($op1$$Register, $op2$$Register);
16205   %}
16206   ins_pipe(ialu_reg_reg);
16207 %}
16208 
16209 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16210 %{
16211   match(Set cr (OverflowAddL op1 op2));
16212   effect(DEF cr, USE_KILL op1, USE op2);
16213 
16214   format %{ "addq    $op1, $op2\t# overflow check long" %}
16215   ins_encode %{
16216     __ addq($op1$$Register, $op2$$constant);
16217   %}
16218   ins_pipe(ialu_reg_reg);
16219 %}
16220 
16221 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16222 %{
16223   match(Set cr (OverflowSubI op1 op2));
16224 
16225   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16226   ins_encode %{
16227     __ cmpl($op1$$Register, $op2$$Register);
16228   %}
16229   ins_pipe(ialu_reg_reg);
16230 %}
16231 
16232 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16233 %{
16234   match(Set cr (OverflowSubI op1 op2));
16235 
16236   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16237   ins_encode %{
16238     __ cmpl($op1$$Register, $op2$$constant);
16239   %}
16240   ins_pipe(ialu_reg_reg);
16241 %}
16242 
16243 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16244 %{
16245   match(Set cr (OverflowSubL op1 op2));
16246 
16247   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16248   ins_encode %{
16249     __ cmpq($op1$$Register, $op2$$Register);
16250   %}
16251   ins_pipe(ialu_reg_reg);
16252 %}
16253 
16254 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16255 %{
16256   match(Set cr (OverflowSubL op1 op2));
16257 
16258   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16259   ins_encode %{
16260     __ cmpq($op1$$Register, $op2$$constant);
16261   %}
16262   ins_pipe(ialu_reg_reg);
16263 %}
16264 
16265 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16266 %{
16267   match(Set cr (OverflowSubI zero op2));
16268   effect(DEF cr, USE_KILL op2);
16269 
16270   format %{ "negl    $op2\t# overflow check int" %}
16271   ins_encode %{
16272     __ negl($op2$$Register);
16273   %}
16274   ins_pipe(ialu_reg_reg);
16275 %}
16276 
16277 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16278 %{
16279   match(Set cr (OverflowSubL zero op2));
16280   effect(DEF cr, USE_KILL op2);
16281 
16282   format %{ "negq    $op2\t# overflow check long" %}
16283   ins_encode %{
16284     __ negq($op2$$Register);
16285   %}
16286   ins_pipe(ialu_reg_reg);
16287 %}
16288 
16289 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16290 %{
16291   match(Set cr (OverflowMulI op1 op2));
16292   effect(DEF cr, USE_KILL op1, USE op2);
16293 
16294   format %{ "imull    $op1, $op2\t# overflow check int" %}
16295   ins_encode %{
16296     __ imull($op1$$Register, $op2$$Register);
16297   %}
16298   ins_pipe(ialu_reg_reg_alu0);
16299 %}
16300 
16301 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16302 %{
16303   match(Set cr (OverflowMulI op1 op2));
16304   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16305 
16306   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16307   ins_encode %{
16308     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16309   %}
16310   ins_pipe(ialu_reg_reg_alu0);
16311 %}
16312 
16313 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16314 %{
16315   match(Set cr (OverflowMulL op1 op2));
16316   effect(DEF cr, USE_KILL op1, USE op2);
16317 
16318   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16319   ins_encode %{
16320     __ imulq($op1$$Register, $op2$$Register);
16321   %}
16322   ins_pipe(ialu_reg_reg_alu0);
16323 %}
16324 
16325 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16326 %{
16327   match(Set cr (OverflowMulL op1 op2));
16328   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16329 
16330   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16331   ins_encode %{
16332     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16333   %}
16334   ins_pipe(ialu_reg_reg_alu0);
16335 %}
16336 
16337 
16338 //----------Control Flow Instructions------------------------------------------
16339 // Signed compare Instructions
16340 
16341 // XXX more variants!!
16342 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16343 %{
16344   match(Set cr (CmpI op1 op2));
16345   effect(DEF cr, USE op1, USE op2);
16346 
16347   format %{ "cmpl    $op1, $op2" %}
16348   ins_encode %{
16349     __ cmpl($op1$$Register, $op2$$Register);
16350   %}
16351   ins_pipe(ialu_cr_reg_reg);
16352 %}
16353 
16354 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16355 %{
16356   match(Set cr (CmpI op1 op2));
16357 
16358   format %{ "cmpl    $op1, $op2" %}
16359   ins_encode %{
16360     __ cmpl($op1$$Register, $op2$$constant);
16361   %}
16362   ins_pipe(ialu_cr_reg_imm);
16363 %}
16364 
16365 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16366 %{
16367   match(Set cr (CmpI op1 (LoadI op2)));
16368 
16369   ins_cost(500); // XXX
16370   format %{ "cmpl    $op1, $op2" %}
16371   ins_encode %{
16372     __ cmpl($op1$$Register, $op2$$Address);
16373   %}
16374   ins_pipe(ialu_cr_reg_mem);
16375 %}
16376 
16377 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16378 %{
16379   match(Set cr (CmpI src zero));
16380 
16381   format %{ "testl   $src, $src" %}
16382   ins_encode %{
16383     __ testl($src$$Register, $src$$Register);
16384   %}
16385   ins_pipe(ialu_cr_reg_imm);
16386 %}
16387 
16388 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16389 %{
16390   match(Set cr (CmpI (AndI src con) zero));
16391 
16392   format %{ "testl   $src, $con" %}
16393   ins_encode %{
16394     __ testl($src$$Register, $con$$constant);
16395   %}
16396   ins_pipe(ialu_cr_reg_imm);
16397 %}
16398 
16399 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16400 %{
16401   match(Set cr (CmpI (AndI src1 src2) zero));
16402 
16403   format %{ "testl   $src1, $src2" %}
16404   ins_encode %{
16405     __ testl($src1$$Register, $src2$$Register);
16406   %}
16407   ins_pipe(ialu_cr_reg_imm);
16408 %}
16409 
16410 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16411 %{
16412   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16413 
16414   format %{ "testl   $src, $mem" %}
16415   ins_encode %{
16416     __ testl($src$$Register, $mem$$Address);
16417   %}
16418   ins_pipe(ialu_cr_reg_mem);
16419 %}
16420 
16421 // Unsigned compare Instructions; really, same as signed except they
16422 // produce an rFlagsRegU instead of rFlagsReg.
16423 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16424 %{
16425   match(Set cr (CmpU op1 op2));
16426 
16427   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16428   ins_encode %{
16429     __ cmpl($op1$$Register, $op2$$Register);
16430   %}
16431   ins_pipe(ialu_cr_reg_reg);
16432 %}
16433 
16434 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16435 %{
16436   match(Set cr (CmpU op1 op2));
16437 
16438   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16439   ins_encode %{
16440     __ cmpl($op1$$Register, $op2$$constant);
16441   %}
16442   ins_pipe(ialu_cr_reg_imm);
16443 %}
16444 
16445 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16446 %{
16447   match(Set cr (CmpU op1 (LoadI op2)));
16448 
16449   ins_cost(500); // XXX
16450   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16451   ins_encode %{
16452     __ cmpl($op1$$Register, $op2$$Address);
16453   %}
16454   ins_pipe(ialu_cr_reg_mem);
16455 %}
16456 
16457 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16458 %{
16459   match(Set cr (CmpU src zero));
16460 
16461   format %{ "testl   $src, $src\t# unsigned" %}
16462   ins_encode %{
16463     __ testl($src$$Register, $src$$Register);
16464   %}
16465   ins_pipe(ialu_cr_reg_imm);
16466 %}
16467 
16468 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16469 %{
16470   match(Set cr (CmpP op1 op2));
16471 
16472   format %{ "cmpq    $op1, $op2\t# ptr" %}
16473   ins_encode %{
16474     __ cmpq($op1$$Register, $op2$$Register);
16475   %}
16476   ins_pipe(ialu_cr_reg_reg);
16477 %}
16478 
16479 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16480 %{
16481   match(Set cr (CmpP op1 (LoadP op2)));
16482   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16483 
16484   ins_cost(500); // XXX
16485   format %{ "cmpq    $op1, $op2\t# ptr" %}
16486   ins_encode %{
16487     __ cmpq($op1$$Register, $op2$$Address);
16488   %}
16489   ins_pipe(ialu_cr_reg_mem);
16490 %}
16491 
16492 // XXX this is generalized by compP_rReg_mem???
16493 // Compare raw pointer (used in out-of-heap check).
16494 // Only works because non-oop pointers must be raw pointers
16495 // and raw pointers have no anti-dependencies.
16496 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16497 %{
16498   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16499             n->in(2)->as_Load()->barrier_data() == 0);
16500   match(Set cr (CmpP op1 (LoadP op2)));
16501 
16502   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16503   ins_encode %{
16504     __ cmpq($op1$$Register, $op2$$Address);
16505   %}
16506   ins_pipe(ialu_cr_reg_mem);
16507 %}
16508 
16509 // This will generate a signed flags result. This should be OK since
16510 // any compare to a zero should be eq/neq.
16511 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16512 %{
16513   match(Set cr (CmpP src zero));
16514 
16515   format %{ "testq   $src, $src\t# ptr" %}
16516   ins_encode %{
16517     __ testq($src$$Register, $src$$Register);
16518   %}
16519   ins_pipe(ialu_cr_reg_imm);
16520 %}
16521 
16522 // This will generate a signed flags result. This should be OK since
16523 // any compare to a zero should be eq/neq.
16524 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16525 %{
16526   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16527             n->in(1)->as_Load()->barrier_data() == 0);
16528   match(Set cr (CmpP (LoadP op) zero));
16529 
16530   ins_cost(500); // XXX
16531   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16532   ins_encode %{
16533     __ testq($op$$Address, 0xFFFFFFFF);
16534   %}
16535   ins_pipe(ialu_cr_reg_imm);
16536 %}
16537 
16538 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16539 %{
16540   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16541             n->in(1)->as_Load()->barrier_data() == 0);
16542   match(Set cr (CmpP (LoadP mem) zero));
16543 
16544   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16545   ins_encode %{
16546     __ cmpq(r12, $mem$$Address);
16547   %}
16548   ins_pipe(ialu_cr_reg_mem);
16549 %}
16550 
16551 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16552 %{
16553   match(Set cr (CmpN op1 op2));
16554 
16555   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16556   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16557   ins_pipe(ialu_cr_reg_reg);
16558 %}
16559 
16560 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16561 %{
16562   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16563   match(Set cr (CmpN src (LoadN mem)));
16564 
16565   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16566   ins_encode %{
16567     __ cmpl($src$$Register, $mem$$Address);
16568   %}
16569   ins_pipe(ialu_cr_reg_mem);
16570 %}
16571 
16572 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16573   match(Set cr (CmpN op1 op2));
16574 
16575   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16576   ins_encode %{
16577     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16578   %}
16579   ins_pipe(ialu_cr_reg_imm);
16580 %}
16581 
16582 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16583 %{
16584   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16585   match(Set cr (CmpN src (LoadN mem)));
16586 
16587   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16588   ins_encode %{
16589     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16590   %}
16591   ins_pipe(ialu_cr_reg_mem);
16592 %}
16593 
16594 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16595   match(Set cr (CmpN op1 op2));
16596 
16597   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16598   ins_encode %{
16599     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16600   %}
16601   ins_pipe(ialu_cr_reg_imm);
16602 %}
16603 
16604 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16605 %{
16606   predicate(!UseCompactObjectHeaders);
16607   match(Set cr (CmpN src (LoadNKlass mem)));
16608 
16609   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16610   ins_encode %{
16611     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16612   %}
16613   ins_pipe(ialu_cr_reg_mem);
16614 %}
16615 
16616 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16617   match(Set cr (CmpN src zero));
16618 
16619   format %{ "testl   $src, $src\t# compressed ptr" %}
16620   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16621   ins_pipe(ialu_cr_reg_imm);
16622 %}
16623 
16624 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16625 %{
16626   predicate(CompressedOops::base() != nullptr &&
16627             n->in(1)->as_Load()->barrier_data() == 0);
16628   match(Set cr (CmpN (LoadN mem) zero));
16629 
16630   ins_cost(500); // XXX
16631   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16632   ins_encode %{
16633     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16634   %}
16635   ins_pipe(ialu_cr_reg_mem);
16636 %}
16637 
16638 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16639 %{
16640   predicate(CompressedOops::base() == nullptr &&
16641             n->in(1)->as_Load()->barrier_data() == 0);
16642   match(Set cr (CmpN (LoadN mem) zero));
16643 
16644   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16645   ins_encode %{
16646     __ cmpl(r12, $mem$$Address);
16647   %}
16648   ins_pipe(ialu_cr_reg_mem);
16649 %}
16650 
16651 // Yanked all unsigned pointer compare operations.
16652 // Pointer compares are done with CmpP which is already unsigned.
16653 
16654 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16655 %{
16656   match(Set cr (CmpL op1 op2));
16657 
16658   format %{ "cmpq    $op1, $op2" %}
16659   ins_encode %{
16660     __ cmpq($op1$$Register, $op2$$Register);
16661   %}
16662   ins_pipe(ialu_cr_reg_reg);
16663 %}
16664 
16665 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16666 %{
16667   match(Set cr (CmpL op1 op2));
16668 
16669   format %{ "cmpq    $op1, $op2" %}
16670   ins_encode %{
16671     __ cmpq($op1$$Register, $op2$$constant);
16672   %}
16673   ins_pipe(ialu_cr_reg_imm);
16674 %}
16675 
16676 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16677 %{
16678   match(Set cr (CmpL op1 (LoadL op2)));
16679 
16680   format %{ "cmpq    $op1, $op2" %}
16681   ins_encode %{
16682     __ cmpq($op1$$Register, $op2$$Address);
16683   %}
16684   ins_pipe(ialu_cr_reg_mem);
16685 %}
16686 
16687 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16688 %{
16689   match(Set cr (CmpL src zero));
16690 
16691   format %{ "testq   $src, $src" %}
16692   ins_encode %{
16693     __ testq($src$$Register, $src$$Register);
16694   %}
16695   ins_pipe(ialu_cr_reg_imm);
16696 %}
16697 
16698 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16699 %{
16700   match(Set cr (CmpL (AndL src con) zero));
16701 
16702   format %{ "testq   $src, $con\t# long" %}
16703   ins_encode %{
16704     __ testq($src$$Register, $con$$constant);
16705   %}
16706   ins_pipe(ialu_cr_reg_imm);
16707 %}
16708 
16709 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16710 %{
16711   match(Set cr (CmpL (AndL src1 src2) zero));
16712 
16713   format %{ "testq   $src1, $src2\t# long" %}
16714   ins_encode %{
16715     __ testq($src1$$Register, $src2$$Register);
16716   %}
16717   ins_pipe(ialu_cr_reg_imm);
16718 %}
16719 
16720 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16721 %{
16722   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16723 
16724   format %{ "testq   $src, $mem" %}
16725   ins_encode %{
16726     __ testq($src$$Register, $mem$$Address);
16727   %}
16728   ins_pipe(ialu_cr_reg_mem);
16729 %}
16730 
16731 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16732 %{
16733   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16734 
16735   format %{ "testq   $src, $mem" %}
16736   ins_encode %{
16737     __ testq($src$$Register, $mem$$Address);
16738   %}
16739   ins_pipe(ialu_cr_reg_mem);
16740 %}
16741 
16742 // Manifest a CmpU result in an integer register.  Very painful.
16743 // This is the test to avoid.
16744 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16745 %{
16746   match(Set dst (CmpU3 src1 src2));
16747   effect(KILL flags);
16748 
16749   ins_cost(275); // XXX
16750   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16751             "movl    $dst, -1\n\t"
16752             "jb,u    done\n\t"
16753             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16754     "done:" %}
16755   ins_encode %{
16756     Label done;
16757     __ cmpl($src1$$Register, $src2$$Register);
16758     __ movl($dst$$Register, -1);
16759     __ jccb(Assembler::below, done);
16760     __ setcc(Assembler::notZero, $dst$$Register);
16761     __ bind(done);
16762   %}
16763   ins_pipe(pipe_slow);
16764 %}
16765 
16766 // Manifest a CmpL result in an integer register.  Very painful.
16767 // This is the test to avoid.
16768 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16769 %{
16770   match(Set dst (CmpL3 src1 src2));
16771   effect(KILL flags);
16772 
16773   ins_cost(275); // XXX
16774   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16775             "movl    $dst, -1\n\t"
16776             "jl,s    done\n\t"
16777             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16778     "done:" %}
16779   ins_encode %{
16780     Label done;
16781     __ cmpq($src1$$Register, $src2$$Register);
16782     __ movl($dst$$Register, -1);
16783     __ jccb(Assembler::less, done);
16784     __ setcc(Assembler::notZero, $dst$$Register);
16785     __ bind(done);
16786   %}
16787   ins_pipe(pipe_slow);
16788 %}
16789 
16790 // Manifest a CmpUL result in an integer register.  Very painful.
16791 // This is the test to avoid.
16792 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16793 %{
16794   match(Set dst (CmpUL3 src1 src2));
16795   effect(KILL flags);
16796 
16797   ins_cost(275); // XXX
16798   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16799             "movl    $dst, -1\n\t"
16800             "jb,u    done\n\t"
16801             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16802     "done:" %}
16803   ins_encode %{
16804     Label done;
16805     __ cmpq($src1$$Register, $src2$$Register);
16806     __ movl($dst$$Register, -1);
16807     __ jccb(Assembler::below, done);
16808     __ setcc(Assembler::notZero, $dst$$Register);
16809     __ bind(done);
16810   %}
16811   ins_pipe(pipe_slow);
16812 %}
16813 
16814 // Unsigned long compare Instructions; really, same as signed long except they
16815 // produce an rFlagsRegU instead of rFlagsReg.
16816 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16817 %{
16818   match(Set cr (CmpUL op1 op2));
16819 
16820   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16821   ins_encode %{
16822     __ cmpq($op1$$Register, $op2$$Register);
16823   %}
16824   ins_pipe(ialu_cr_reg_reg);
16825 %}
16826 
16827 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16828 %{
16829   match(Set cr (CmpUL op1 op2));
16830 
16831   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16832   ins_encode %{
16833     __ cmpq($op1$$Register, $op2$$constant);
16834   %}
16835   ins_pipe(ialu_cr_reg_imm);
16836 %}
16837 
16838 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16839 %{
16840   match(Set cr (CmpUL op1 (LoadL op2)));
16841 
16842   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16843   ins_encode %{
16844     __ cmpq($op1$$Register, $op2$$Address);
16845   %}
16846   ins_pipe(ialu_cr_reg_mem);
16847 %}
16848 
16849 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16850 %{
16851   match(Set cr (CmpUL src zero));
16852 
16853   format %{ "testq   $src, $src\t# unsigned" %}
16854   ins_encode %{
16855     __ testq($src$$Register, $src$$Register);
16856   %}
16857   ins_pipe(ialu_cr_reg_imm);
16858 %}
16859 
16860 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16861 %{
16862   match(Set cr (CmpI (LoadB mem) imm));
16863 
16864   ins_cost(125);
16865   format %{ "cmpb    $mem, $imm" %}
16866   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16867   ins_pipe(ialu_cr_reg_mem);
16868 %}
16869 
16870 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16871 %{
16872   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16873 
16874   ins_cost(125);
16875   format %{ "testb   $mem, $imm\t# ubyte" %}
16876   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16877   ins_pipe(ialu_cr_reg_mem);
16878 %}
16879 
16880 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16881 %{
16882   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16883 
16884   ins_cost(125);
16885   format %{ "testb   $mem, $imm\t# byte" %}
16886   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16887   ins_pipe(ialu_cr_reg_mem);
16888 %}
16889 
16890 //----------Max and Min--------------------------------------------------------
16891 // Min Instructions
16892 
16893 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16894 %{
16895   predicate(!UseAPX);
16896   effect(USE_DEF dst, USE src, USE cr);
16897 
16898   format %{ "cmovlgt $dst, $src\t# min" %}
16899   ins_encode %{
16900     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16901   %}
16902   ins_pipe(pipe_cmov_reg);
16903 %}
16904 
16905 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16906 %{
16907   predicate(UseAPX);
16908   effect(DEF dst, USE src1, USE src2, USE cr);
16909 
16910   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16911   ins_encode %{
16912     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16913   %}
16914   ins_pipe(pipe_cmov_reg);
16915 %}
16916 
16917 instruct minI_rReg(rRegI dst, rRegI src)
16918 %{
16919   predicate(!UseAPX);
16920   match(Set dst (MinI dst src));
16921 
16922   ins_cost(200);
16923   expand %{
16924     rFlagsReg cr;
16925     compI_rReg(cr, dst, src);
16926     cmovI_reg_g(dst, src, cr);
16927   %}
16928 %}
16929 
16930 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16931 %{
16932   predicate(UseAPX);
16933   match(Set dst (MinI src1 src2));
16934   effect(DEF dst, USE src1, USE src2);
16935   flag(PD::Flag_ndd_demotable);
16936 
16937   ins_cost(200);
16938   expand %{
16939     rFlagsReg cr;
16940     compI_rReg(cr, src1, src2);
16941     cmovI_reg_g_ndd(dst, src1, src2, cr);
16942   %}
16943 %}
16944 
16945 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16946 %{
16947   predicate(!UseAPX);
16948   effect(USE_DEF dst, USE src, USE cr);
16949 
16950   format %{ "cmovllt $dst, $src\t# max" %}
16951   ins_encode %{
16952     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16953   %}
16954   ins_pipe(pipe_cmov_reg);
16955 %}
16956 
16957 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16958 %{
16959   predicate(UseAPX);
16960   effect(DEF dst, USE src1, USE src2, USE cr);
16961 
16962   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16963   ins_encode %{
16964     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16965   %}
16966   ins_pipe(pipe_cmov_reg);
16967 %}
16968 
16969 instruct maxI_rReg(rRegI dst, rRegI src)
16970 %{
16971   predicate(!UseAPX);
16972   match(Set dst (MaxI dst src));
16973 
16974   ins_cost(200);
16975   expand %{
16976     rFlagsReg cr;
16977     compI_rReg(cr, dst, src);
16978     cmovI_reg_l(dst, src, cr);
16979   %}
16980 %}
16981 
16982 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16983 %{
16984   predicate(UseAPX);
16985   match(Set dst (MaxI src1 src2));
16986   effect(DEF dst, USE src1, USE src2);
16987   flag(PD::Flag_ndd_demotable);
16988 
16989   ins_cost(200);
16990   expand %{
16991     rFlagsReg cr;
16992     compI_rReg(cr, src1, src2);
16993     cmovI_reg_l_ndd(dst, src1, src2, cr);
16994   %}
16995 %}
16996 
16997 // ============================================================================
16998 // Branch Instructions
16999 
17000 // Jump Direct - Label defines a relative address from JMP+1
17001 instruct jmpDir(label labl)
17002 %{
17003   match(Goto);
17004   effect(USE labl);
17005 
17006   ins_cost(300);
17007   format %{ "jmp     $labl" %}
17008   size(5);
17009   ins_encode %{
17010     Label* L = $labl$$label;
17011     __ jmp(*L, false); // Always long jump
17012   %}
17013   ins_pipe(pipe_jmp);
17014 %}
17015 
17016 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17017 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17018 %{
17019   match(If cop cr);
17020   effect(USE labl);
17021 
17022   ins_cost(300);
17023   format %{ "j$cop     $labl" %}
17024   size(6);
17025   ins_encode %{
17026     Label* L = $labl$$label;
17027     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17028   %}
17029   ins_pipe(pipe_jcc);
17030 %}
17031 
17032 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17033 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17034 %{
17035   match(CountedLoopEnd cop cr);
17036   effect(USE labl);
17037 
17038   ins_cost(300);
17039   format %{ "j$cop     $labl\t# loop end" %}
17040   size(6);
17041   ins_encode %{
17042     Label* L = $labl$$label;
17043     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17044   %}
17045   ins_pipe(pipe_jcc);
17046 %}
17047 
17048 // Jump Direct Conditional - using unsigned comparison
17049 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17050   match(If cop cmp);
17051   effect(USE labl);
17052 
17053   ins_cost(300);
17054   format %{ "j$cop,u   $labl" %}
17055   size(6);
17056   ins_encode %{
17057     Label* L = $labl$$label;
17058     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17059   %}
17060   ins_pipe(pipe_jcc);
17061 %}
17062 
17063 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17064   match(If cop cmp);
17065   effect(USE labl);
17066 
17067   ins_cost(200);
17068   format %{ "j$cop,u   $labl" %}
17069   size(6);
17070   ins_encode %{
17071     Label* L = $labl$$label;
17072     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17073   %}
17074   ins_pipe(pipe_jcc);
17075 %}
17076 
17077 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17078   match(If cop cmp);
17079   effect(USE labl);
17080 
17081   ins_cost(200);
17082   format %{ $$template
17083     if ($cop$$cmpcode == Assembler::notEqual) {
17084       $$emit$$"jp,u    $labl\n\t"
17085       $$emit$$"j$cop,u   $labl"
17086     } else {
17087       $$emit$$"jp,u    done\n\t"
17088       $$emit$$"j$cop,u   $labl\n\t"
17089       $$emit$$"done:"
17090     }
17091   %}
17092   ins_encode %{
17093     Label* l = $labl$$label;
17094     if ($cop$$cmpcode == Assembler::notEqual) {
17095       __ jcc(Assembler::parity, *l, false);
17096       __ jcc(Assembler::notEqual, *l, false);
17097     } else if ($cop$$cmpcode == Assembler::equal) {
17098       Label done;
17099       __ jccb(Assembler::parity, done);
17100       __ jcc(Assembler::equal, *l, false);
17101       __ bind(done);
17102     } else {
17103        ShouldNotReachHere();
17104     }
17105   %}
17106   ins_pipe(pipe_jcc);
17107 %}
17108 
17109 // ============================================================================
17110 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17111 // superklass array for an instance of the superklass.  Set a hidden
17112 // internal cache on a hit (cache is checked with exposed code in
17113 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17114 // encoding ALSO sets flags.
17115 
17116 instruct partialSubtypeCheck(rdi_RegP result,
17117                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17118                              rFlagsReg cr)
17119 %{
17120   match(Set result (PartialSubtypeCheck sub super));
17121   predicate(!UseSecondarySupersTable);
17122   effect(KILL rcx, KILL cr);
17123 
17124   ins_cost(1100);  // slightly larger than the next version
17125   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17126             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17127             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17128             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17129             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17130             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17131             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17132     "miss:\t" %}
17133 
17134   ins_encode %{
17135     Label miss;
17136     // NB: Callers may assume that, when $result is a valid register,
17137     // check_klass_subtype_slow_path_linear sets it to a nonzero
17138     // value.
17139     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17140                                             $rcx$$Register, $result$$Register,
17141                                             nullptr, &miss,
17142                                             /*set_cond_codes:*/ true);
17143     __ xorptr($result$$Register, $result$$Register);
17144     __ bind(miss);
17145   %}
17146 
17147   ins_pipe(pipe_slow);
17148 %}
17149 
17150 // ============================================================================
17151 // Two versions of hashtable-based partialSubtypeCheck, both used when
17152 // we need to search for a super class in the secondary supers array.
17153 // The first is used when we don't know _a priori_ the class being
17154 // searched for. The second, far more common, is used when we do know:
17155 // this is used for instanceof, checkcast, and any case where C2 can
17156 // determine it by constant propagation.
17157 
17158 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17159                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17160                                        rFlagsReg cr)
17161 %{
17162   match(Set result (PartialSubtypeCheck sub super));
17163   predicate(UseSecondarySupersTable);
17164   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17165 
17166   ins_cost(1000);
17167   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17168 
17169   ins_encode %{
17170     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17171 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17172   %}
17173 
17174   ins_pipe(pipe_slow);
17175 %}
17176 
17177 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17178                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17179                                        rFlagsReg cr)
17180 %{
17181   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17182   predicate(UseSecondarySupersTable);
17183   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17184 
17185   ins_cost(700);  // smaller than the next version
17186   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17187 
17188   ins_encode %{
17189     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17190     if (InlineSecondarySupersTest) {
17191       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17192                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17193                                        super_klass_slot);
17194     } else {
17195       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17196     }
17197   %}
17198 
17199   ins_pipe(pipe_slow);
17200 %}
17201 
17202 // ============================================================================
17203 // Branch Instructions -- short offset versions
17204 //
17205 // These instructions are used to replace jumps of a long offset (the default
17206 // match) with jumps of a shorter offset.  These instructions are all tagged
17207 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17208 // match rules in general matching.  Instead, the ADLC generates a conversion
17209 // method in the MachNode which can be used to do in-place replacement of the
17210 // long variant with the shorter variant.  The compiler will determine if a
17211 // branch can be taken by the is_short_branch_offset() predicate in the machine
17212 // specific code section of the file.
17213 
17214 // Jump Direct - Label defines a relative address from JMP+1
17215 instruct jmpDir_short(label labl) %{
17216   match(Goto);
17217   effect(USE labl);
17218 
17219   ins_cost(300);
17220   format %{ "jmp,s   $labl" %}
17221   size(2);
17222   ins_encode %{
17223     Label* L = $labl$$label;
17224     __ jmpb(*L);
17225   %}
17226   ins_pipe(pipe_jmp);
17227   ins_short_branch(1);
17228 %}
17229 
17230 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17231 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17232   match(If cop cr);
17233   effect(USE labl);
17234 
17235   ins_cost(300);
17236   format %{ "j$cop,s   $labl" %}
17237   size(2);
17238   ins_encode %{
17239     Label* L = $labl$$label;
17240     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17241   %}
17242   ins_pipe(pipe_jcc);
17243   ins_short_branch(1);
17244 %}
17245 
17246 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17247 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17248   match(CountedLoopEnd cop cr);
17249   effect(USE labl);
17250 
17251   ins_cost(300);
17252   format %{ "j$cop,s   $labl\t# loop end" %}
17253   size(2);
17254   ins_encode %{
17255     Label* L = $labl$$label;
17256     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17257   %}
17258   ins_pipe(pipe_jcc);
17259   ins_short_branch(1);
17260 %}
17261 
17262 // Jump Direct Conditional - using unsigned comparison
17263 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17264   match(If cop cmp);
17265   effect(USE labl);
17266 
17267   ins_cost(300);
17268   format %{ "j$cop,us  $labl" %}
17269   size(2);
17270   ins_encode %{
17271     Label* L = $labl$$label;
17272     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17273   %}
17274   ins_pipe(pipe_jcc);
17275   ins_short_branch(1);
17276 %}
17277 
17278 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17279   match(If cop cmp);
17280   effect(USE labl);
17281 
17282   ins_cost(300);
17283   format %{ "j$cop,us  $labl" %}
17284   size(2);
17285   ins_encode %{
17286     Label* L = $labl$$label;
17287     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17288   %}
17289   ins_pipe(pipe_jcc);
17290   ins_short_branch(1);
17291 %}
17292 
17293 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17294   match(If cop cmp);
17295   effect(USE labl);
17296 
17297   ins_cost(300);
17298   format %{ $$template
17299     if ($cop$$cmpcode == Assembler::notEqual) {
17300       $$emit$$"jp,u,s  $labl\n\t"
17301       $$emit$$"j$cop,u,s  $labl"
17302     } else {
17303       $$emit$$"jp,u,s  done\n\t"
17304       $$emit$$"j$cop,u,s  $labl\n\t"
17305       $$emit$$"done:"
17306     }
17307   %}
17308   size(4);
17309   ins_encode %{
17310     Label* l = $labl$$label;
17311     if ($cop$$cmpcode == Assembler::notEqual) {
17312       __ jccb(Assembler::parity, *l);
17313       __ jccb(Assembler::notEqual, *l);
17314     } else if ($cop$$cmpcode == Assembler::equal) {
17315       Label done;
17316       __ jccb(Assembler::parity, done);
17317       __ jccb(Assembler::equal, *l);
17318       __ bind(done);
17319     } else {
17320        ShouldNotReachHere();
17321     }
17322   %}
17323   ins_pipe(pipe_jcc);
17324   ins_short_branch(1);
17325 %}
17326 
17327 // ============================================================================
17328 // inlined locking and unlocking
17329 
17330 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17331   match(Set cr (FastLock object box));
17332   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17333   ins_cost(300);
17334   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17335   ins_encode %{
17336     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17337   %}
17338   ins_pipe(pipe_slow);
17339 %}
17340 
17341 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17342   match(Set cr (FastUnlock object rax_reg));
17343   effect(TEMP tmp, USE_KILL rax_reg);
17344   ins_cost(300);
17345   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17346   ins_encode %{
17347     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17348   %}
17349   ins_pipe(pipe_slow);
17350 %}
17351 
17352 
17353 // ============================================================================
17354 // Safepoint Instructions
17355 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17356 %{
17357   match(SafePoint poll);
17358   effect(KILL cr, USE poll);
17359 
17360   format %{ "testl   rax, [$poll]\t"
17361             "# Safepoint: poll for GC" %}
17362   ins_cost(125);
17363   ins_encode %{
17364     __ relocate(relocInfo::poll_type);
17365     address pre_pc = __ pc();
17366     __ testl(rax, Address($poll$$Register, 0));
17367     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17368   %}
17369   ins_pipe(ialu_reg_mem);
17370 %}
17371 
17372 instruct mask_all_evexL(kReg dst, rRegL src) %{
17373   match(Set dst (MaskAll src));
17374   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17375   ins_encode %{
17376     int mask_len = Matcher::vector_length(this);
17377     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17378   %}
17379   ins_pipe( pipe_slow );
17380 %}
17381 
17382 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17383   predicate(Matcher::vector_length(n) > 32);
17384   match(Set dst (MaskAll src));
17385   effect(TEMP tmp);
17386   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17387   ins_encode %{
17388     int mask_len = Matcher::vector_length(this);
17389     __ movslq($tmp$$Register, $src$$Register);
17390     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17391   %}
17392   ins_pipe( pipe_slow );
17393 %}
17394 
17395 // ============================================================================
17396 // Procedure Call/Return Instructions
17397 // Call Java Static Instruction
17398 // Note: If this code changes, the corresponding ret_addr_offset() and
17399 //       compute_padding() functions will have to be adjusted.
17400 instruct CallStaticJavaDirect(method meth) %{
17401   match(CallStaticJava);
17402   effect(USE meth);
17403 
17404   ins_cost(300);
17405   format %{ "call,static " %}
17406   opcode(0xE8); /* E8 cd */
17407   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17408   ins_pipe(pipe_slow);
17409   ins_alignment(4);
17410 %}
17411 
17412 // Call Java Dynamic Instruction
17413 // Note: If this code changes, the corresponding ret_addr_offset() and
17414 //       compute_padding() functions will have to be adjusted.
17415 instruct CallDynamicJavaDirect(method meth)
17416 %{
17417   match(CallDynamicJava);
17418   effect(USE meth);
17419 
17420   ins_cost(300);
17421   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17422             "call,dynamic " %}
17423   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17424   ins_pipe(pipe_slow);
17425   ins_alignment(4);
17426 %}
17427 
17428 // Call Runtime Instruction
17429 instruct CallRuntimeDirect(method meth)
17430 %{
17431   match(CallRuntime);
17432   effect(USE meth);
17433 
17434   ins_cost(300);
17435   format %{ "call,runtime " %}
17436   ins_encode(clear_avx, Java_To_Runtime(meth));
17437   ins_pipe(pipe_slow);
17438 %}
17439 
17440 // Call runtime without safepoint
17441 instruct CallLeafDirect(method meth)
17442 %{
17443   match(CallLeaf);
17444   effect(USE meth);
17445 
17446   ins_cost(300);
17447   format %{ "call_leaf,runtime " %}
17448   ins_encode(clear_avx, Java_To_Runtime(meth));
17449   ins_pipe(pipe_slow);
17450 %}
17451 
17452 // Call runtime without safepoint and with vector arguments
17453 instruct CallLeafDirectVector(method meth)
17454 %{
17455   match(CallLeafVector);
17456   effect(USE meth);
17457 
17458   ins_cost(300);
17459   format %{ "call_leaf,vector " %}
17460   ins_encode(Java_To_Runtime(meth));
17461   ins_pipe(pipe_slow);
17462 %}
17463 
17464 // Call runtime without safepoint
17465 // entry point is null, target holds the address to call
17466 instruct CallLeafNoFPInDirect(rRegP target)
17467 %{
17468   predicate(n->as_Call()->entry_point() == nullptr);
17469   match(CallLeafNoFP target);
17470 
17471   ins_cost(300);
17472   format %{ "call_leaf_nofp,runtime indirect " %}
17473   ins_encode %{
17474      __ call($target$$Register);
17475   %}
17476 
17477   ins_pipe(pipe_slow);
17478 %}
17479 
17480 // Call runtime without safepoint
17481 instruct CallLeafNoFPDirect(method meth)
17482 %{
17483   predicate(n->as_Call()->entry_point() != nullptr);
17484   match(CallLeafNoFP);
17485   effect(USE meth);
17486 
17487   ins_cost(300);
17488   format %{ "call_leaf_nofp,runtime " %}
17489   ins_encode(clear_avx, Java_To_Runtime(meth));
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 // Return Instruction
17494 // Remove the return address & jump to it.
17495 // Notice: We always emit a nop after a ret to make sure there is room
17496 // for safepoint patching
17497 instruct Ret()
17498 %{
17499   match(Return);
17500 
17501   format %{ "ret" %}
17502   ins_encode %{
17503     __ ret(0);
17504   %}
17505   ins_pipe(pipe_jmp);
17506 %}
17507 
17508 // Tail Call; Jump from runtime stub to Java code.
17509 // Also known as an 'interprocedural jump'.
17510 // Target of jump will eventually return to caller.
17511 // TailJump below removes the return address.
17512 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17513 // emitted just above the TailCall which has reset rbp to the caller state.
17514 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17515 %{
17516   match(TailCall jump_target method_ptr);
17517 
17518   ins_cost(300);
17519   format %{ "jmp     $jump_target\t# rbx holds method" %}
17520   ins_encode %{
17521     __ jmp($jump_target$$Register);
17522   %}
17523   ins_pipe(pipe_jmp);
17524 %}
17525 
17526 // Tail Jump; remove the return address; jump to target.
17527 // TailCall above leaves the return address around.
17528 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17529 %{
17530   match(TailJump jump_target ex_oop);
17531 
17532   ins_cost(300);
17533   format %{ "popq    rdx\t# pop return address\n\t"
17534             "jmp     $jump_target" %}
17535   ins_encode %{
17536     __ popq(as_Register(RDX_enc));
17537     __ jmp($jump_target$$Register);
17538   %}
17539   ins_pipe(pipe_jmp);
17540 %}
17541 
17542 // Forward exception.
17543 instruct ForwardExceptionjmp()
17544 %{
17545   match(ForwardException);
17546 
17547   format %{ "jmp     forward_exception_stub" %}
17548   ins_encode %{
17549     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17550   %}
17551   ins_pipe(pipe_jmp);
17552 %}
17553 
17554 // Create exception oop: created by stack-crawling runtime code.
17555 // Created exception is now available to this handler, and is setup
17556 // just prior to jumping to this handler.  No code emitted.
17557 instruct CreateException(rax_RegP ex_oop)
17558 %{
17559   match(Set ex_oop (CreateEx));
17560 
17561   size(0);
17562   // use the following format syntax
17563   format %{ "# exception oop is in rax; no code emitted" %}
17564   ins_encode();
17565   ins_pipe(empty);
17566 %}
17567 
17568 // Rethrow exception:
17569 // The exception oop will come in the first argument position.
17570 // Then JUMP (not call) to the rethrow stub code.
17571 instruct RethrowException()
17572 %{
17573   match(Rethrow);
17574 
17575   // use the following format syntax
17576   format %{ "jmp     rethrow_stub" %}
17577   ins_encode %{
17578     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17579   %}
17580   ins_pipe(pipe_jmp);
17581 %}
17582 
17583 // ============================================================================
17584 // This name is KNOWN by the ADLC and cannot be changed.
17585 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17586 // for this guy.
17587 instruct tlsLoadP(r15_RegP dst) %{
17588   match(Set dst (ThreadLocal));
17589   effect(DEF dst);
17590 
17591   size(0);
17592   format %{ "# TLS is in R15" %}
17593   ins_encode( /*empty encoding*/ );
17594   ins_pipe(ialu_reg_reg);
17595 %}
17596 
17597 instruct addF_reg(regF dst, regF src) %{
17598   predicate(UseAVX == 0);
17599   match(Set dst (AddF dst src));
17600 
17601   format %{ "addss   $dst, $src" %}
17602   ins_cost(150);
17603   ins_encode %{
17604     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17605   %}
17606   ins_pipe(pipe_slow);
17607 %}
17608 
17609 instruct addF_mem(regF dst, memory src) %{
17610   predicate(UseAVX == 0);
17611   match(Set dst (AddF dst (LoadF src)));
17612 
17613   format %{ "addss   $dst, $src" %}
17614   ins_cost(150);
17615   ins_encode %{
17616     __ addss($dst$$XMMRegister, $src$$Address);
17617   %}
17618   ins_pipe(pipe_slow);
17619 %}
17620 
17621 instruct addF_imm(regF dst, immF con) %{
17622   predicate(UseAVX == 0);
17623   match(Set dst (AddF dst con));
17624   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17625   ins_cost(150);
17626   ins_encode %{
17627     __ addss($dst$$XMMRegister, $constantaddress($con));
17628   %}
17629   ins_pipe(pipe_slow);
17630 %}
17631 
17632 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17633   predicate(UseAVX > 0);
17634   match(Set dst (AddF src1 src2));
17635 
17636   format %{ "vaddss  $dst, $src1, $src2" %}
17637   ins_cost(150);
17638   ins_encode %{
17639     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17640   %}
17641   ins_pipe(pipe_slow);
17642 %}
17643 
17644 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17645   predicate(UseAVX > 0);
17646   match(Set dst (AddF src1 (LoadF src2)));
17647 
17648   format %{ "vaddss  $dst, $src1, $src2" %}
17649   ins_cost(150);
17650   ins_encode %{
17651     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17652   %}
17653   ins_pipe(pipe_slow);
17654 %}
17655 
17656 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17657   predicate(UseAVX > 0);
17658   match(Set dst (AddF src con));
17659 
17660   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17661   ins_cost(150);
17662   ins_encode %{
17663     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17664   %}
17665   ins_pipe(pipe_slow);
17666 %}
17667 
17668 instruct addD_reg(regD dst, regD src) %{
17669   predicate(UseAVX == 0);
17670   match(Set dst (AddD dst src));
17671 
17672   format %{ "addsd   $dst, $src" %}
17673   ins_cost(150);
17674   ins_encode %{
17675     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17676   %}
17677   ins_pipe(pipe_slow);
17678 %}
17679 
17680 instruct addD_mem(regD dst, memory src) %{
17681   predicate(UseAVX == 0);
17682   match(Set dst (AddD dst (LoadD src)));
17683 
17684   format %{ "addsd   $dst, $src" %}
17685   ins_cost(150);
17686   ins_encode %{
17687     __ addsd($dst$$XMMRegister, $src$$Address);
17688   %}
17689   ins_pipe(pipe_slow);
17690 %}
17691 
17692 instruct addD_imm(regD dst, immD con) %{
17693   predicate(UseAVX == 0);
17694   match(Set dst (AddD dst con));
17695   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17696   ins_cost(150);
17697   ins_encode %{
17698     __ addsd($dst$$XMMRegister, $constantaddress($con));
17699   %}
17700   ins_pipe(pipe_slow);
17701 %}
17702 
17703 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17704   predicate(UseAVX > 0);
17705   match(Set dst (AddD src1 src2));
17706 
17707   format %{ "vaddsd  $dst, $src1, $src2" %}
17708   ins_cost(150);
17709   ins_encode %{
17710     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17711   %}
17712   ins_pipe(pipe_slow);
17713 %}
17714 
17715 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17716   predicate(UseAVX > 0);
17717   match(Set dst (AddD src1 (LoadD src2)));
17718 
17719   format %{ "vaddsd  $dst, $src1, $src2" %}
17720   ins_cost(150);
17721   ins_encode %{
17722     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17723   %}
17724   ins_pipe(pipe_slow);
17725 %}
17726 
17727 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17728   predicate(UseAVX > 0);
17729   match(Set dst (AddD src con));
17730 
17731   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17732   ins_cost(150);
17733   ins_encode %{
17734     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17735   %}
17736   ins_pipe(pipe_slow);
17737 %}
17738 
17739 instruct subF_reg(regF dst, regF src) %{
17740   predicate(UseAVX == 0);
17741   match(Set dst (SubF dst src));
17742 
17743   format %{ "subss   $dst, $src" %}
17744   ins_cost(150);
17745   ins_encode %{
17746     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17747   %}
17748   ins_pipe(pipe_slow);
17749 %}
17750 
17751 instruct subF_mem(regF dst, memory src) %{
17752   predicate(UseAVX == 0);
17753   match(Set dst (SubF dst (LoadF src)));
17754 
17755   format %{ "subss   $dst, $src" %}
17756   ins_cost(150);
17757   ins_encode %{
17758     __ subss($dst$$XMMRegister, $src$$Address);
17759   %}
17760   ins_pipe(pipe_slow);
17761 %}
17762 
17763 instruct subF_imm(regF dst, immF con) %{
17764   predicate(UseAVX == 0);
17765   match(Set dst (SubF dst con));
17766   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17767   ins_cost(150);
17768   ins_encode %{
17769     __ subss($dst$$XMMRegister, $constantaddress($con));
17770   %}
17771   ins_pipe(pipe_slow);
17772 %}
17773 
17774 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17775   predicate(UseAVX > 0);
17776   match(Set dst (SubF src1 src2));
17777 
17778   format %{ "vsubss  $dst, $src1, $src2" %}
17779   ins_cost(150);
17780   ins_encode %{
17781     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17782   %}
17783   ins_pipe(pipe_slow);
17784 %}
17785 
17786 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17787   predicate(UseAVX > 0);
17788   match(Set dst (SubF src1 (LoadF src2)));
17789 
17790   format %{ "vsubss  $dst, $src1, $src2" %}
17791   ins_cost(150);
17792   ins_encode %{
17793     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17794   %}
17795   ins_pipe(pipe_slow);
17796 %}
17797 
17798 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17799   predicate(UseAVX > 0);
17800   match(Set dst (SubF src con));
17801 
17802   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17803   ins_cost(150);
17804   ins_encode %{
17805     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17806   %}
17807   ins_pipe(pipe_slow);
17808 %}
17809 
17810 instruct subD_reg(regD dst, regD src) %{
17811   predicate(UseAVX == 0);
17812   match(Set dst (SubD dst src));
17813 
17814   format %{ "subsd   $dst, $src" %}
17815   ins_cost(150);
17816   ins_encode %{
17817     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17818   %}
17819   ins_pipe(pipe_slow);
17820 %}
17821 
17822 instruct subD_mem(regD dst, memory src) %{
17823   predicate(UseAVX == 0);
17824   match(Set dst (SubD dst (LoadD src)));
17825 
17826   format %{ "subsd   $dst, $src" %}
17827   ins_cost(150);
17828   ins_encode %{
17829     __ subsd($dst$$XMMRegister, $src$$Address);
17830   %}
17831   ins_pipe(pipe_slow);
17832 %}
17833 
17834 instruct subD_imm(regD dst, immD con) %{
17835   predicate(UseAVX == 0);
17836   match(Set dst (SubD dst con));
17837   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17838   ins_cost(150);
17839   ins_encode %{
17840     __ subsd($dst$$XMMRegister, $constantaddress($con));
17841   %}
17842   ins_pipe(pipe_slow);
17843 %}
17844 
17845 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17846   predicate(UseAVX > 0);
17847   match(Set dst (SubD src1 src2));
17848 
17849   format %{ "vsubsd  $dst, $src1, $src2" %}
17850   ins_cost(150);
17851   ins_encode %{
17852     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17853   %}
17854   ins_pipe(pipe_slow);
17855 %}
17856 
17857 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17858   predicate(UseAVX > 0);
17859   match(Set dst (SubD src1 (LoadD src2)));
17860 
17861   format %{ "vsubsd  $dst, $src1, $src2" %}
17862   ins_cost(150);
17863   ins_encode %{
17864     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17865   %}
17866   ins_pipe(pipe_slow);
17867 %}
17868 
17869 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17870   predicate(UseAVX > 0);
17871   match(Set dst (SubD src con));
17872 
17873   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17874   ins_cost(150);
17875   ins_encode %{
17876     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17877   %}
17878   ins_pipe(pipe_slow);
17879 %}
17880 
17881 instruct mulF_reg(regF dst, regF src) %{
17882   predicate(UseAVX == 0);
17883   match(Set dst (MulF dst src));
17884 
17885   format %{ "mulss   $dst, $src" %}
17886   ins_cost(150);
17887   ins_encode %{
17888     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17889   %}
17890   ins_pipe(pipe_slow);
17891 %}
17892 
17893 instruct mulF_mem(regF dst, memory src) %{
17894   predicate(UseAVX == 0);
17895   match(Set dst (MulF dst (LoadF src)));
17896 
17897   format %{ "mulss   $dst, $src" %}
17898   ins_cost(150);
17899   ins_encode %{
17900     __ mulss($dst$$XMMRegister, $src$$Address);
17901   %}
17902   ins_pipe(pipe_slow);
17903 %}
17904 
17905 instruct mulF_imm(regF dst, immF con) %{
17906   predicate(UseAVX == 0);
17907   match(Set dst (MulF dst con));
17908   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17909   ins_cost(150);
17910   ins_encode %{
17911     __ mulss($dst$$XMMRegister, $constantaddress($con));
17912   %}
17913   ins_pipe(pipe_slow);
17914 %}
17915 
17916 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17917   predicate(UseAVX > 0);
17918   match(Set dst (MulF src1 src2));
17919 
17920   format %{ "vmulss  $dst, $src1, $src2" %}
17921   ins_cost(150);
17922   ins_encode %{
17923     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17924   %}
17925   ins_pipe(pipe_slow);
17926 %}
17927 
17928 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17929   predicate(UseAVX > 0);
17930   match(Set dst (MulF src1 (LoadF src2)));
17931 
17932   format %{ "vmulss  $dst, $src1, $src2" %}
17933   ins_cost(150);
17934   ins_encode %{
17935     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17936   %}
17937   ins_pipe(pipe_slow);
17938 %}
17939 
17940 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17941   predicate(UseAVX > 0);
17942   match(Set dst (MulF src con));
17943 
17944   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17945   ins_cost(150);
17946   ins_encode %{
17947     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17948   %}
17949   ins_pipe(pipe_slow);
17950 %}
17951 
17952 instruct mulD_reg(regD dst, regD src) %{
17953   predicate(UseAVX == 0);
17954   match(Set dst (MulD dst src));
17955 
17956   format %{ "mulsd   $dst, $src" %}
17957   ins_cost(150);
17958   ins_encode %{
17959     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17960   %}
17961   ins_pipe(pipe_slow);
17962 %}
17963 
17964 instruct mulD_mem(regD dst, memory src) %{
17965   predicate(UseAVX == 0);
17966   match(Set dst (MulD dst (LoadD src)));
17967 
17968   format %{ "mulsd   $dst, $src" %}
17969   ins_cost(150);
17970   ins_encode %{
17971     __ mulsd($dst$$XMMRegister, $src$$Address);
17972   %}
17973   ins_pipe(pipe_slow);
17974 %}
17975 
17976 instruct mulD_imm(regD dst, immD con) %{
17977   predicate(UseAVX == 0);
17978   match(Set dst (MulD dst con));
17979   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17980   ins_cost(150);
17981   ins_encode %{
17982     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17983   %}
17984   ins_pipe(pipe_slow);
17985 %}
17986 
17987 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17988   predicate(UseAVX > 0);
17989   match(Set dst (MulD src1 src2));
17990 
17991   format %{ "vmulsd  $dst, $src1, $src2" %}
17992   ins_cost(150);
17993   ins_encode %{
17994     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17995   %}
17996   ins_pipe(pipe_slow);
17997 %}
17998 
17999 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18000   predicate(UseAVX > 0);
18001   match(Set dst (MulD src1 (LoadD src2)));
18002 
18003   format %{ "vmulsd  $dst, $src1, $src2" %}
18004   ins_cost(150);
18005   ins_encode %{
18006     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18007   %}
18008   ins_pipe(pipe_slow);
18009 %}
18010 
18011 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18012   predicate(UseAVX > 0);
18013   match(Set dst (MulD src con));
18014 
18015   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18016   ins_cost(150);
18017   ins_encode %{
18018     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18019   %}
18020   ins_pipe(pipe_slow);
18021 %}
18022 
18023 instruct divF_reg(regF dst, regF src) %{
18024   predicate(UseAVX == 0);
18025   match(Set dst (DivF dst src));
18026 
18027   format %{ "divss   $dst, $src" %}
18028   ins_cost(150);
18029   ins_encode %{
18030     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18031   %}
18032   ins_pipe(pipe_slow);
18033 %}
18034 
18035 instruct divF_mem(regF dst, memory src) %{
18036   predicate(UseAVX == 0);
18037   match(Set dst (DivF dst (LoadF src)));
18038 
18039   format %{ "divss   $dst, $src" %}
18040   ins_cost(150);
18041   ins_encode %{
18042     __ divss($dst$$XMMRegister, $src$$Address);
18043   %}
18044   ins_pipe(pipe_slow);
18045 %}
18046 
18047 instruct divF_imm(regF dst, immF con) %{
18048   predicate(UseAVX == 0);
18049   match(Set dst (DivF dst con));
18050   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18051   ins_cost(150);
18052   ins_encode %{
18053     __ divss($dst$$XMMRegister, $constantaddress($con));
18054   %}
18055   ins_pipe(pipe_slow);
18056 %}
18057 
18058 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18059   predicate(UseAVX > 0);
18060   match(Set dst (DivF src1 src2));
18061 
18062   format %{ "vdivss  $dst, $src1, $src2" %}
18063   ins_cost(150);
18064   ins_encode %{
18065     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18066   %}
18067   ins_pipe(pipe_slow);
18068 %}
18069 
18070 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18071   predicate(UseAVX > 0);
18072   match(Set dst (DivF src1 (LoadF src2)));
18073 
18074   format %{ "vdivss  $dst, $src1, $src2" %}
18075   ins_cost(150);
18076   ins_encode %{
18077     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18078   %}
18079   ins_pipe(pipe_slow);
18080 %}
18081 
18082 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18083   predicate(UseAVX > 0);
18084   match(Set dst (DivF src con));
18085 
18086   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18087   ins_cost(150);
18088   ins_encode %{
18089     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18090   %}
18091   ins_pipe(pipe_slow);
18092 %}
18093 
18094 instruct divD_reg(regD dst, regD src) %{
18095   predicate(UseAVX == 0);
18096   match(Set dst (DivD dst src));
18097 
18098   format %{ "divsd   $dst, $src" %}
18099   ins_cost(150);
18100   ins_encode %{
18101     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18102   %}
18103   ins_pipe(pipe_slow);
18104 %}
18105 
18106 instruct divD_mem(regD dst, memory src) %{
18107   predicate(UseAVX == 0);
18108   match(Set dst (DivD dst (LoadD src)));
18109 
18110   format %{ "divsd   $dst, $src" %}
18111   ins_cost(150);
18112   ins_encode %{
18113     __ divsd($dst$$XMMRegister, $src$$Address);
18114   %}
18115   ins_pipe(pipe_slow);
18116 %}
18117 
18118 instruct divD_imm(regD dst, immD con) %{
18119   predicate(UseAVX == 0);
18120   match(Set dst (DivD dst con));
18121   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18122   ins_cost(150);
18123   ins_encode %{
18124     __ divsd($dst$$XMMRegister, $constantaddress($con));
18125   %}
18126   ins_pipe(pipe_slow);
18127 %}
18128 
18129 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18130   predicate(UseAVX > 0);
18131   match(Set dst (DivD src1 src2));
18132 
18133   format %{ "vdivsd  $dst, $src1, $src2" %}
18134   ins_cost(150);
18135   ins_encode %{
18136     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18137   %}
18138   ins_pipe(pipe_slow);
18139 %}
18140 
18141 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18142   predicate(UseAVX > 0);
18143   match(Set dst (DivD src1 (LoadD src2)));
18144 
18145   format %{ "vdivsd  $dst, $src1, $src2" %}
18146   ins_cost(150);
18147   ins_encode %{
18148     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18149   %}
18150   ins_pipe(pipe_slow);
18151 %}
18152 
18153 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18154   predicate(UseAVX > 0);
18155   match(Set dst (DivD src con));
18156 
18157   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18158   ins_cost(150);
18159   ins_encode %{
18160     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18161   %}
18162   ins_pipe(pipe_slow);
18163 %}
18164 
18165 instruct absF_reg(regF dst) %{
18166   predicate(UseAVX == 0);
18167   match(Set dst (AbsF dst));
18168   ins_cost(150);
18169   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18170   ins_encode %{
18171     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18172   %}
18173   ins_pipe(pipe_slow);
18174 %}
18175 
18176 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18177   predicate(UseAVX > 0);
18178   match(Set dst (AbsF src));
18179   ins_cost(150);
18180   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18181   ins_encode %{
18182     int vlen_enc = Assembler::AVX_128bit;
18183     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18184               ExternalAddress(float_signmask()), vlen_enc);
18185   %}
18186   ins_pipe(pipe_slow);
18187 %}
18188 
18189 instruct absD_reg(regD dst) %{
18190   predicate(UseAVX == 0);
18191   match(Set dst (AbsD dst));
18192   ins_cost(150);
18193   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18194             "# abs double by sign masking" %}
18195   ins_encode %{
18196     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18197   %}
18198   ins_pipe(pipe_slow);
18199 %}
18200 
18201 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18202   predicate(UseAVX > 0);
18203   match(Set dst (AbsD src));
18204   ins_cost(150);
18205   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18206             "# abs double by sign masking" %}
18207   ins_encode %{
18208     int vlen_enc = Assembler::AVX_128bit;
18209     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18210               ExternalAddress(double_signmask()), vlen_enc);
18211   %}
18212   ins_pipe(pipe_slow);
18213 %}
18214 
18215 instruct negF_reg(regF dst) %{
18216   predicate(UseAVX == 0);
18217   match(Set dst (NegF dst));
18218   ins_cost(150);
18219   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18220   ins_encode %{
18221     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18222   %}
18223   ins_pipe(pipe_slow);
18224 %}
18225 
18226 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18227   predicate(UseAVX > 0);
18228   match(Set dst (NegF src));
18229   ins_cost(150);
18230   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18231   ins_encode %{
18232     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18233                  ExternalAddress(float_signflip()));
18234   %}
18235   ins_pipe(pipe_slow);
18236 %}
18237 
18238 instruct negD_reg(regD dst) %{
18239   predicate(UseAVX == 0);
18240   match(Set dst (NegD dst));
18241   ins_cost(150);
18242   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18243             "# neg double by sign flipping" %}
18244   ins_encode %{
18245     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18246   %}
18247   ins_pipe(pipe_slow);
18248 %}
18249 
18250 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18251   predicate(UseAVX > 0);
18252   match(Set dst (NegD src));
18253   ins_cost(150);
18254   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18255             "# neg double by sign flipping" %}
18256   ins_encode %{
18257     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18258                  ExternalAddress(double_signflip()));
18259   %}
18260   ins_pipe(pipe_slow);
18261 %}
18262 
18263 // sqrtss instruction needs destination register to be pre initialized for best performance
18264 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18265 instruct sqrtF_reg(regF dst) %{
18266   match(Set dst (SqrtF dst));
18267   format %{ "sqrtss  $dst, $dst" %}
18268   ins_encode %{
18269     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18270   %}
18271   ins_pipe(pipe_slow);
18272 %}
18273 
18274 // sqrtsd instruction needs destination register to be pre initialized for best performance
18275 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18276 instruct sqrtD_reg(regD dst) %{
18277   match(Set dst (SqrtD dst));
18278   format %{ "sqrtsd  $dst, $dst" %}
18279   ins_encode %{
18280     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18281   %}
18282   ins_pipe(pipe_slow);
18283 %}
18284 
18285 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18286   effect(TEMP tmp);
18287   match(Set dst (ConvF2HF src));
18288   ins_cost(125);
18289   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18290   ins_encode %{
18291     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18292   %}
18293   ins_pipe( pipe_slow );
18294 %}
18295 
18296 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18297   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18298   effect(TEMP ktmp, TEMP rtmp);
18299   match(Set mem (StoreC mem (ConvF2HF src)));
18300   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18301   ins_encode %{
18302     __ movl($rtmp$$Register, 0x1);
18303     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18304     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18305   %}
18306   ins_pipe( pipe_slow );
18307 %}
18308 
18309 instruct vconvF2HF(vec dst, vec src) %{
18310   match(Set dst (VectorCastF2HF src));
18311   format %{ "vector_conv_F2HF $dst $src" %}
18312   ins_encode %{
18313     int vlen_enc = vector_length_encoding(this, $src);
18314     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18315   %}
18316   ins_pipe( pipe_slow );
18317 %}
18318 
18319 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18320   predicate(n->as_StoreVector()->memory_size() >= 16);
18321   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18322   format %{ "vcvtps2ph $mem,$src" %}
18323   ins_encode %{
18324     int vlen_enc = vector_length_encoding(this, $src);
18325     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18326   %}
18327   ins_pipe( pipe_slow );
18328 %}
18329 
18330 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18331   match(Set dst (ConvHF2F src));
18332   format %{ "vcvtph2ps $dst,$src" %}
18333   ins_encode %{
18334     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18335   %}
18336   ins_pipe( pipe_slow );
18337 %}
18338 
18339 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18340   match(Set dst (VectorCastHF2F (LoadVector mem)));
18341   format %{ "vcvtph2ps $dst,$mem" %}
18342   ins_encode %{
18343     int vlen_enc = vector_length_encoding(this);
18344     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18345   %}
18346   ins_pipe( pipe_slow );
18347 %}
18348 
18349 instruct vconvHF2F(vec dst, vec src) %{
18350   match(Set dst (VectorCastHF2F src));
18351   ins_cost(125);
18352   format %{ "vector_conv_HF2F $dst,$src" %}
18353   ins_encode %{
18354     int vlen_enc = vector_length_encoding(this);
18355     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18356   %}
18357   ins_pipe( pipe_slow );
18358 %}
18359 
18360 // ---------------------------------------- VectorReinterpret ------------------------------------
18361 instruct reinterpret_mask(kReg dst) %{
18362   predicate(n->bottom_type()->isa_vectmask() &&
18363             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18364   match(Set dst (VectorReinterpret dst));
18365   ins_cost(125);
18366   format %{ "vector_reinterpret $dst\t!" %}
18367   ins_encode %{
18368     // empty
18369   %}
18370   ins_pipe( pipe_slow );
18371 %}
18372 
18373 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18374   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18375             n->bottom_type()->isa_vectmask() &&
18376             n->in(1)->bottom_type()->isa_vectmask() &&
18377             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18378             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18379   match(Set dst (VectorReinterpret src));
18380   effect(TEMP xtmp);
18381   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18382   ins_encode %{
18383      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18384      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18385      assert(src_sz == dst_sz , "src and dst size mismatch");
18386      int vlen_enc = vector_length_encoding(src_sz);
18387      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18388      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18389   %}
18390   ins_pipe( pipe_slow );
18391 %}
18392 
18393 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18394   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18395             n->bottom_type()->isa_vectmask() &&
18396             n->in(1)->bottom_type()->isa_vectmask() &&
18397             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18398              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18399             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18400   match(Set dst (VectorReinterpret src));
18401   effect(TEMP xtmp);
18402   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18403   ins_encode %{
18404      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18405      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18406      assert(src_sz == dst_sz , "src and dst size mismatch");
18407      int vlen_enc = vector_length_encoding(src_sz);
18408      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18409      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18410   %}
18411   ins_pipe( pipe_slow );
18412 %}
18413 
18414 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18415   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18416             n->bottom_type()->isa_vectmask() &&
18417             n->in(1)->bottom_type()->isa_vectmask() &&
18418             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18419              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18420             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18421   match(Set dst (VectorReinterpret src));
18422   effect(TEMP xtmp);
18423   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18424   ins_encode %{
18425      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18426      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18427      assert(src_sz == dst_sz , "src and dst size mismatch");
18428      int vlen_enc = vector_length_encoding(src_sz);
18429      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18430      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18431   %}
18432   ins_pipe( pipe_slow );
18433 %}
18434 
18435 instruct reinterpret(vec dst) %{
18436   predicate(!n->bottom_type()->isa_vectmask() &&
18437             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18438   match(Set dst (VectorReinterpret dst));
18439   ins_cost(125);
18440   format %{ "vector_reinterpret $dst\t!" %}
18441   ins_encode %{
18442     // empty
18443   %}
18444   ins_pipe( pipe_slow );
18445 %}
18446 
18447 instruct reinterpret_expand(vec dst, vec src) %{
18448   predicate(UseAVX == 0 &&
18449             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18450   match(Set dst (VectorReinterpret src));
18451   ins_cost(125);
18452   effect(TEMP dst);
18453   format %{ "vector_reinterpret_expand $dst,$src" %}
18454   ins_encode %{
18455     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18456     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18457 
18458     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18459     if (src_vlen_in_bytes == 4) {
18460       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18461     } else {
18462       assert(src_vlen_in_bytes == 8, "");
18463       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18464     }
18465     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18466   %}
18467   ins_pipe( pipe_slow );
18468 %}
18469 
18470 instruct vreinterpret_expand4(legVec dst, vec src) %{
18471   predicate(UseAVX > 0 &&
18472             !n->bottom_type()->isa_vectmask() &&
18473             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18474             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18475   match(Set dst (VectorReinterpret src));
18476   ins_cost(125);
18477   format %{ "vector_reinterpret_expand $dst,$src" %}
18478   ins_encode %{
18479     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18480   %}
18481   ins_pipe( pipe_slow );
18482 %}
18483 
18484 
18485 instruct vreinterpret_expand(legVec dst, vec src) %{
18486   predicate(UseAVX > 0 &&
18487             !n->bottom_type()->isa_vectmask() &&
18488             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18489             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18490   match(Set dst (VectorReinterpret src));
18491   ins_cost(125);
18492   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18493   ins_encode %{
18494     switch (Matcher::vector_length_in_bytes(this, $src)) {
18495       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18496       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18497       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18498       default: ShouldNotReachHere();
18499     }
18500   %}
18501   ins_pipe( pipe_slow );
18502 %}
18503 
18504 instruct reinterpret_shrink(vec dst, legVec src) %{
18505   predicate(!n->bottom_type()->isa_vectmask() &&
18506             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18507   match(Set dst (VectorReinterpret src));
18508   ins_cost(125);
18509   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18510   ins_encode %{
18511     switch (Matcher::vector_length_in_bytes(this)) {
18512       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18513       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18514       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18515       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18516       default: ShouldNotReachHere();
18517     }
18518   %}
18519   ins_pipe( pipe_slow );
18520 %}
18521 
18522 // ----------------------------------------------------------------------------------------------------
18523 
18524 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18525   match(Set dst (RoundDoubleMode src rmode));
18526   format %{ "roundsd $dst,$src" %}
18527   ins_cost(150);
18528   ins_encode %{
18529     assert(UseSSE >= 4, "required");
18530     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18531       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18532     }
18533     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18534   %}
18535   ins_pipe(pipe_slow);
18536 %}
18537 
18538 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18539   match(Set dst (RoundDoubleMode con rmode));
18540   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18541   ins_cost(150);
18542   ins_encode %{
18543     assert(UseSSE >= 4, "required");
18544     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18545   %}
18546   ins_pipe(pipe_slow);
18547 %}
18548 
18549 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18550   predicate(Matcher::vector_length(n) < 8);
18551   match(Set dst (RoundDoubleModeV src rmode));
18552   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18553   ins_encode %{
18554     assert(UseAVX > 0, "required");
18555     int vlen_enc = vector_length_encoding(this);
18556     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18557   %}
18558   ins_pipe( pipe_slow );
18559 %}
18560 
18561 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18562   predicate(Matcher::vector_length(n) == 8);
18563   match(Set dst (RoundDoubleModeV src rmode));
18564   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18565   ins_encode %{
18566     assert(UseAVX > 2, "required");
18567     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18568   %}
18569   ins_pipe( pipe_slow );
18570 %}
18571 
18572 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18573   predicate(Matcher::vector_length(n) < 8);
18574   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18575   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18576   ins_encode %{
18577     assert(UseAVX > 0, "required");
18578     int vlen_enc = vector_length_encoding(this);
18579     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18580   %}
18581   ins_pipe( pipe_slow );
18582 %}
18583 
18584 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18585   predicate(Matcher::vector_length(n) == 8);
18586   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18587   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18588   ins_encode %{
18589     assert(UseAVX > 2, "required");
18590     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18591   %}
18592   ins_pipe( pipe_slow );
18593 %}
18594 
18595 instruct onspinwait() %{
18596   match(OnSpinWait);
18597   ins_cost(200);
18598 
18599   format %{
18600     $$template
18601     $$emit$$"pause\t! membar_onspinwait"
18602   %}
18603   ins_encode %{
18604     __ pause();
18605   %}
18606   ins_pipe(pipe_slow);
18607 %}
18608 
18609 // a * b + c
18610 instruct fmaD_reg(regD a, regD b, regD c) %{
18611   match(Set c (FmaD  c (Binary a b)));
18612   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18613   ins_cost(150);
18614   ins_encode %{
18615     assert(UseFMA, "Needs FMA instructions support.");
18616     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18617   %}
18618   ins_pipe( pipe_slow );
18619 %}
18620 
18621 // a * b + c
18622 instruct fmaF_reg(regF a, regF b, regF c) %{
18623   match(Set c (FmaF  c (Binary a b)));
18624   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18625   ins_cost(150);
18626   ins_encode %{
18627     assert(UseFMA, "Needs FMA instructions support.");
18628     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18629   %}
18630   ins_pipe( pipe_slow );
18631 %}
18632 
18633 // ====================VECTOR INSTRUCTIONS=====================================
18634 
18635 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18636 instruct MoveVec2Leg(legVec dst, vec src) %{
18637   match(Set dst src);
18638   format %{ "" %}
18639   ins_encode %{
18640     ShouldNotReachHere();
18641   %}
18642   ins_pipe( fpu_reg_reg );
18643 %}
18644 
18645 instruct MoveLeg2Vec(vec dst, legVec src) %{
18646   match(Set dst src);
18647   format %{ "" %}
18648   ins_encode %{
18649     ShouldNotReachHere();
18650   %}
18651   ins_pipe( fpu_reg_reg );
18652 %}
18653 
18654 // ============================================================================
18655 
18656 // Load vectors generic operand pattern
18657 instruct loadV(vec dst, memory mem) %{
18658   match(Set dst (LoadVector mem));
18659   ins_cost(125);
18660   format %{ "load_vector $dst,$mem" %}
18661   ins_encode %{
18662     BasicType bt = Matcher::vector_element_basic_type(this);
18663     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18664   %}
18665   ins_pipe( pipe_slow );
18666 %}
18667 
18668 // Store vectors generic operand pattern.
18669 instruct storeV(memory mem, vec src) %{
18670   match(Set mem (StoreVector mem src));
18671   ins_cost(145);
18672   format %{ "store_vector $mem,$src\n\t" %}
18673   ins_encode %{
18674     switch (Matcher::vector_length_in_bytes(this, $src)) {
18675       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18676       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18677       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18678       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18679       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18680       default: ShouldNotReachHere();
18681     }
18682   %}
18683   ins_pipe( pipe_slow );
18684 %}
18685 
18686 // ---------------------------------------- Gather ------------------------------------
18687 
18688 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18689 
18690 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18691   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18692             Matcher::vector_length_in_bytes(n) <= 32);
18693   match(Set dst (LoadVectorGather mem idx));
18694   effect(TEMP dst, TEMP tmp, TEMP mask);
18695   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18696   ins_encode %{
18697     int vlen_enc = vector_length_encoding(this);
18698     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18699     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18700     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18701     __ lea($tmp$$Register, $mem$$Address);
18702     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18703   %}
18704   ins_pipe( pipe_slow );
18705 %}
18706 
18707 
18708 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18709   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18710             !is_subword_type(Matcher::vector_element_basic_type(n)));
18711   match(Set dst (LoadVectorGather mem idx));
18712   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18713   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18714   ins_encode %{
18715     int vlen_enc = vector_length_encoding(this);
18716     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18717     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18718     __ lea($tmp$$Register, $mem$$Address);
18719     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18720   %}
18721   ins_pipe( pipe_slow );
18722 %}
18723 
18724 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18725   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18726             !is_subword_type(Matcher::vector_element_basic_type(n)));
18727   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18728   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18729   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18730   ins_encode %{
18731     assert(UseAVX > 2, "sanity");
18732     int vlen_enc = vector_length_encoding(this);
18733     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18734     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18735     // Note: Since gather instruction partially updates the opmask register used
18736     // for predication hense moving mask operand to a temporary.
18737     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18738     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18739     __ lea($tmp$$Register, $mem$$Address);
18740     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18741   %}
18742   ins_pipe( pipe_slow );
18743 %}
18744 
18745 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18746   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18747   match(Set dst (LoadVectorGather mem idx_base));
18748   effect(TEMP tmp, TEMP rtmp);
18749   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18750   ins_encode %{
18751     int vlen_enc = vector_length_encoding(this);
18752     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18753     __ lea($tmp$$Register, $mem$$Address);
18754     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18755   %}
18756   ins_pipe( pipe_slow );
18757 %}
18758 
18759 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18760                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18761   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18762   match(Set dst (LoadVectorGather mem idx_base));
18763   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18764   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18765   ins_encode %{
18766     int vlen_enc = vector_length_encoding(this);
18767     int vector_len = Matcher::vector_length(this);
18768     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18769     __ lea($tmp$$Register, $mem$$Address);
18770     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18771     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18772                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18773   %}
18774   ins_pipe( pipe_slow );
18775 %}
18776 
18777 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18778   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18779   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18780   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18781   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18782   ins_encode %{
18783     int vlen_enc = vector_length_encoding(this);
18784     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18785     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18786     __ lea($tmp$$Register, $mem$$Address);
18787     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18788     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18789   %}
18790   ins_pipe( pipe_slow );
18791 %}
18792 
18793 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18794                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18795   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18796   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18797   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18798   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18799   ins_encode %{
18800     int vlen_enc = vector_length_encoding(this);
18801     int vector_len = Matcher::vector_length(this);
18802     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18803     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18804     __ lea($tmp$$Register, $mem$$Address);
18805     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18806     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18807     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18808                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18809   %}
18810   ins_pipe( pipe_slow );
18811 %}
18812 
18813 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18814   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18815   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18816   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18817   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18818   ins_encode %{
18819     int vlen_enc = vector_length_encoding(this);
18820     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18821     __ lea($tmp$$Register, $mem$$Address);
18822     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18823     if (elem_bt == T_SHORT) {
18824       __ movl($mask_idx$$Register, 0x55555555);
18825       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18826     }
18827     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18828     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18829   %}
18830   ins_pipe( pipe_slow );
18831 %}
18832 
18833 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18834                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18835   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18836   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18837   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18838   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18839   ins_encode %{
18840     int vlen_enc = vector_length_encoding(this);
18841     int vector_len = Matcher::vector_length(this);
18842     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18843     __ lea($tmp$$Register, $mem$$Address);
18844     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18845     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18846     if (elem_bt == T_SHORT) {
18847       __ movl($mask_idx$$Register, 0x55555555);
18848       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18849     }
18850     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18851     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18852                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18853   %}
18854   ins_pipe( pipe_slow );
18855 %}
18856 
18857 // ====================Scatter=======================================
18858 
18859 // Scatter INT, LONG, FLOAT, DOUBLE
18860 
18861 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18862   predicate(UseAVX > 2);
18863   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18864   effect(TEMP tmp, TEMP ktmp);
18865   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18866   ins_encode %{
18867     int vlen_enc = vector_length_encoding(this, $src);
18868     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18869 
18870     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18871     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18872 
18873     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18874     __ lea($tmp$$Register, $mem$$Address);
18875     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18876   %}
18877   ins_pipe( pipe_slow );
18878 %}
18879 
18880 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18881   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18882   effect(TEMP tmp, TEMP ktmp);
18883   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18884   ins_encode %{
18885     int vlen_enc = vector_length_encoding(this, $src);
18886     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18887     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18888     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18889     // Note: Since scatter instruction partially updates the opmask register used
18890     // for predication hense moving mask operand to a temporary.
18891     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18892     __ lea($tmp$$Register, $mem$$Address);
18893     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18894   %}
18895   ins_pipe( pipe_slow );
18896 %}
18897 
18898 // ====================REPLICATE=======================================
18899 
18900 // Replicate byte scalar to be vector
18901 instruct vReplB_reg(vec dst, rRegI src) %{
18902   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18903   match(Set dst (Replicate src));
18904   format %{ "replicateB $dst,$src" %}
18905   ins_encode %{
18906     uint vlen = Matcher::vector_length(this);
18907     if (UseAVX >= 2) {
18908       int vlen_enc = vector_length_encoding(this);
18909       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18910         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18911         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18912       } else {
18913         __ movdl($dst$$XMMRegister, $src$$Register);
18914         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18915       }
18916     } else {
18917        assert(UseAVX < 2, "");
18918       __ movdl($dst$$XMMRegister, $src$$Register);
18919       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18920       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18921       if (vlen >= 16) {
18922         assert(vlen == 16, "");
18923         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18924       }
18925     }
18926   %}
18927   ins_pipe( pipe_slow );
18928 %}
18929 
18930 instruct ReplB_mem(vec dst, memory mem) %{
18931   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18932   match(Set dst (Replicate (LoadB mem)));
18933   format %{ "replicateB $dst,$mem" %}
18934   ins_encode %{
18935     int vlen_enc = vector_length_encoding(this);
18936     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18937   %}
18938   ins_pipe( pipe_slow );
18939 %}
18940 
18941 // ====================ReplicateS=======================================
18942 
18943 instruct vReplS_reg(vec dst, rRegI src) %{
18944   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18945   match(Set dst (Replicate src));
18946   format %{ "replicateS $dst,$src" %}
18947   ins_encode %{
18948     uint vlen = Matcher::vector_length(this);
18949     int vlen_enc = vector_length_encoding(this);
18950     if (UseAVX >= 2) {
18951       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18952         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18953         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18954       } else {
18955         __ movdl($dst$$XMMRegister, $src$$Register);
18956         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18957       }
18958     } else {
18959       assert(UseAVX < 2, "");
18960       __ movdl($dst$$XMMRegister, $src$$Register);
18961       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18962       if (vlen >= 8) {
18963         assert(vlen == 8, "");
18964         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18965       }
18966     }
18967   %}
18968   ins_pipe( pipe_slow );
18969 %}
18970 
18971 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18972   match(Set dst (Replicate con));
18973   effect(TEMP rtmp);
18974   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18975   ins_encode %{
18976     int vlen_enc = vector_length_encoding(this);
18977     BasicType bt = Matcher::vector_element_basic_type(this);
18978     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18979     __ movl($rtmp$$Register, $con$$constant);
18980     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18981   %}
18982   ins_pipe( pipe_slow );
18983 %}
18984 
18985 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18986   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18987   match(Set dst (Replicate src));
18988   effect(TEMP rtmp);
18989   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18990   ins_encode %{
18991     int vlen_enc = vector_length_encoding(this);
18992     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18993     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18994   %}
18995   ins_pipe( pipe_slow );
18996 %}
18997 
18998 instruct ReplS_mem(vec dst, memory mem) %{
18999   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19000   match(Set dst (Replicate (LoadS mem)));
19001   format %{ "replicateS $dst,$mem" %}
19002   ins_encode %{
19003     int vlen_enc = vector_length_encoding(this);
19004     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19005   %}
19006   ins_pipe( pipe_slow );
19007 %}
19008 
19009 // ====================ReplicateI=======================================
19010 
19011 instruct ReplI_reg(vec dst, rRegI src) %{
19012   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19013   match(Set dst (Replicate src));
19014   format %{ "replicateI $dst,$src" %}
19015   ins_encode %{
19016     uint vlen = Matcher::vector_length(this);
19017     int vlen_enc = vector_length_encoding(this);
19018     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19019       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19020     } else if (VM_Version::supports_avx2()) {
19021       __ movdl($dst$$XMMRegister, $src$$Register);
19022       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19023     } else {
19024       __ movdl($dst$$XMMRegister, $src$$Register);
19025       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19026     }
19027   %}
19028   ins_pipe( pipe_slow );
19029 %}
19030 
19031 instruct ReplI_mem(vec dst, memory mem) %{
19032   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19033   match(Set dst (Replicate (LoadI mem)));
19034   format %{ "replicateI $dst,$mem" %}
19035   ins_encode %{
19036     int vlen_enc = vector_length_encoding(this);
19037     if (VM_Version::supports_avx2()) {
19038       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19039     } else if (VM_Version::supports_avx()) {
19040       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19041     } else {
19042       __ movdl($dst$$XMMRegister, $mem$$Address);
19043       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19044     }
19045   %}
19046   ins_pipe( pipe_slow );
19047 %}
19048 
19049 instruct ReplI_imm(vec dst, immI con) %{
19050   predicate(Matcher::is_non_long_integral_vector(n));
19051   match(Set dst (Replicate con));
19052   format %{ "replicateI $dst,$con" %}
19053   ins_encode %{
19054     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19055                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19056                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19057     BasicType bt = Matcher::vector_element_basic_type(this);
19058     int vlen = Matcher::vector_length_in_bytes(this);
19059     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19060   %}
19061   ins_pipe( pipe_slow );
19062 %}
19063 
19064 // Replicate scalar zero to be vector
19065 instruct ReplI_zero(vec dst, immI_0 zero) %{
19066   predicate(Matcher::is_non_long_integral_vector(n));
19067   match(Set dst (Replicate zero));
19068   format %{ "replicateI $dst,$zero" %}
19069   ins_encode %{
19070     int vlen_enc = vector_length_encoding(this);
19071     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19072       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19073     } else {
19074       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19075     }
19076   %}
19077   ins_pipe( fpu_reg_reg );
19078 %}
19079 
19080 instruct ReplI_M1(vec dst, immI_M1 con) %{
19081   predicate(Matcher::is_non_long_integral_vector(n));
19082   match(Set dst (Replicate con));
19083   format %{ "vallones $dst" %}
19084   ins_encode %{
19085     int vector_len = vector_length_encoding(this);
19086     __ vallones($dst$$XMMRegister, vector_len);
19087   %}
19088   ins_pipe( pipe_slow );
19089 %}
19090 
19091 // ====================ReplicateL=======================================
19092 
19093 // Replicate long (8 byte) scalar to be vector
19094 instruct ReplL_reg(vec dst, rRegL src) %{
19095   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19096   match(Set dst (Replicate src));
19097   format %{ "replicateL $dst,$src" %}
19098   ins_encode %{
19099     int vlen = Matcher::vector_length(this);
19100     int vlen_enc = vector_length_encoding(this);
19101     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19102       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19103     } else if (VM_Version::supports_avx2()) {
19104       __ movdq($dst$$XMMRegister, $src$$Register);
19105       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19106     } else {
19107       __ movdq($dst$$XMMRegister, $src$$Register);
19108       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19109     }
19110   %}
19111   ins_pipe( pipe_slow );
19112 %}
19113 
19114 instruct ReplL_mem(vec dst, memory mem) %{
19115   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19116   match(Set dst (Replicate (LoadL mem)));
19117   format %{ "replicateL $dst,$mem" %}
19118   ins_encode %{
19119     int vlen_enc = vector_length_encoding(this);
19120     if (VM_Version::supports_avx2()) {
19121       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19122     } else if (VM_Version::supports_sse3()) {
19123       __ movddup($dst$$XMMRegister, $mem$$Address);
19124     } else {
19125       __ movq($dst$$XMMRegister, $mem$$Address);
19126       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19127     }
19128   %}
19129   ins_pipe( pipe_slow );
19130 %}
19131 
19132 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19133 instruct ReplL_imm(vec dst, immL con) %{
19134   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19135   match(Set dst (Replicate con));
19136   format %{ "replicateL $dst,$con" %}
19137   ins_encode %{
19138     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19139     int vlen = Matcher::vector_length_in_bytes(this);
19140     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19141   %}
19142   ins_pipe( pipe_slow );
19143 %}
19144 
19145 instruct ReplL_zero(vec dst, immL0 zero) %{
19146   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19147   match(Set dst (Replicate zero));
19148   format %{ "replicateL $dst,$zero" %}
19149   ins_encode %{
19150     int vlen_enc = vector_length_encoding(this);
19151     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19152       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19153     } else {
19154       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19155     }
19156   %}
19157   ins_pipe( fpu_reg_reg );
19158 %}
19159 
19160 instruct ReplL_M1(vec dst, immL_M1 con) %{
19161   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19162   match(Set dst (Replicate con));
19163   format %{ "vallones $dst" %}
19164   ins_encode %{
19165     int vector_len = vector_length_encoding(this);
19166     __ vallones($dst$$XMMRegister, vector_len);
19167   %}
19168   ins_pipe( pipe_slow );
19169 %}
19170 
19171 // ====================ReplicateF=======================================
19172 
19173 instruct vReplF_reg(vec dst, vlRegF src) %{
19174   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19175   match(Set dst (Replicate src));
19176   format %{ "replicateF $dst,$src" %}
19177   ins_encode %{
19178     uint vlen = Matcher::vector_length(this);
19179     int vlen_enc = vector_length_encoding(this);
19180     if (vlen <= 4) {
19181       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19182     } else if (VM_Version::supports_avx2()) {
19183       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19184     } else {
19185       assert(vlen == 8, "sanity");
19186       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19187       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19188     }
19189   %}
19190   ins_pipe( pipe_slow );
19191 %}
19192 
19193 instruct ReplF_reg(vec dst, vlRegF src) %{
19194   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19195   match(Set dst (Replicate src));
19196   format %{ "replicateF $dst,$src" %}
19197   ins_encode %{
19198     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19199   %}
19200   ins_pipe( pipe_slow );
19201 %}
19202 
19203 instruct ReplF_mem(vec dst, memory mem) %{
19204   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19205   match(Set dst (Replicate (LoadF mem)));
19206   format %{ "replicateF $dst,$mem" %}
19207   ins_encode %{
19208     int vlen_enc = vector_length_encoding(this);
19209     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19210   %}
19211   ins_pipe( pipe_slow );
19212 %}
19213 
19214 // Replicate float scalar immediate to be vector by loading from const table.
19215 instruct ReplF_imm(vec dst, immF con) %{
19216   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19217   match(Set dst (Replicate con));
19218   format %{ "replicateF $dst,$con" %}
19219   ins_encode %{
19220     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19221                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19222     int vlen = Matcher::vector_length_in_bytes(this);
19223     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19224   %}
19225   ins_pipe( pipe_slow );
19226 %}
19227 
19228 instruct ReplF_zero(vec dst, immF0 zero) %{
19229   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19230   match(Set dst (Replicate zero));
19231   format %{ "replicateF $dst,$zero" %}
19232   ins_encode %{
19233     int vlen_enc = vector_length_encoding(this);
19234     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19235       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19236     } else {
19237       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19238     }
19239   %}
19240   ins_pipe( fpu_reg_reg );
19241 %}
19242 
19243 // ====================ReplicateD=======================================
19244 
19245 // Replicate double (8 bytes) scalar to be vector
19246 instruct vReplD_reg(vec dst, vlRegD src) %{
19247   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19248   match(Set dst (Replicate src));
19249   format %{ "replicateD $dst,$src" %}
19250   ins_encode %{
19251     uint vlen = Matcher::vector_length(this);
19252     int vlen_enc = vector_length_encoding(this);
19253     if (vlen <= 2) {
19254       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19255     } else if (VM_Version::supports_avx2()) {
19256       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19257     } else {
19258       assert(vlen == 4, "sanity");
19259       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19260       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19261     }
19262   %}
19263   ins_pipe( pipe_slow );
19264 %}
19265 
19266 instruct ReplD_reg(vec dst, vlRegD src) %{
19267   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19268   match(Set dst (Replicate src));
19269   format %{ "replicateD $dst,$src" %}
19270   ins_encode %{
19271     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19272   %}
19273   ins_pipe( pipe_slow );
19274 %}
19275 
19276 instruct ReplD_mem(vec dst, memory mem) %{
19277   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19278   match(Set dst (Replicate (LoadD mem)));
19279   format %{ "replicateD $dst,$mem" %}
19280   ins_encode %{
19281     if (Matcher::vector_length(this) >= 4) {
19282       int vlen_enc = vector_length_encoding(this);
19283       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19284     } else {
19285       __ movddup($dst$$XMMRegister, $mem$$Address);
19286     }
19287   %}
19288   ins_pipe( pipe_slow );
19289 %}
19290 
19291 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19292 instruct ReplD_imm(vec dst, immD con) %{
19293   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19294   match(Set dst (Replicate con));
19295   format %{ "replicateD $dst,$con" %}
19296   ins_encode %{
19297     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19298     int vlen = Matcher::vector_length_in_bytes(this);
19299     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19300   %}
19301   ins_pipe( pipe_slow );
19302 %}
19303 
19304 instruct ReplD_zero(vec dst, immD0 zero) %{
19305   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19306   match(Set dst (Replicate zero));
19307   format %{ "replicateD $dst,$zero" %}
19308   ins_encode %{
19309     int vlen_enc = vector_length_encoding(this);
19310     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19311       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19312     } else {
19313       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19314     }
19315   %}
19316   ins_pipe( fpu_reg_reg );
19317 %}
19318 
19319 // ====================VECTOR INSERT=======================================
19320 
19321 instruct insert(vec dst, rRegI val, immU8 idx) %{
19322   predicate(Matcher::vector_length_in_bytes(n) < 32);
19323   match(Set dst (VectorInsert (Binary dst val) idx));
19324   format %{ "vector_insert $dst,$val,$idx" %}
19325   ins_encode %{
19326     assert(UseSSE >= 4, "required");
19327     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19328 
19329     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19330 
19331     assert(is_integral_type(elem_bt), "");
19332     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19333 
19334     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19335   %}
19336   ins_pipe( pipe_slow );
19337 %}
19338 
19339 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19340   predicate(Matcher::vector_length_in_bytes(n) == 32);
19341   match(Set dst (VectorInsert (Binary src val) idx));
19342   effect(TEMP vtmp);
19343   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19344   ins_encode %{
19345     int vlen_enc = Assembler::AVX_256bit;
19346     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19347     int elem_per_lane = 16/type2aelembytes(elem_bt);
19348     int log2epr = log2(elem_per_lane);
19349 
19350     assert(is_integral_type(elem_bt), "sanity");
19351     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19352 
19353     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19354     uint y_idx = ($idx$$constant >> log2epr) & 1;
19355     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19356     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19357     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19358   %}
19359   ins_pipe( pipe_slow );
19360 %}
19361 
19362 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19363   predicate(Matcher::vector_length_in_bytes(n) == 64);
19364   match(Set dst (VectorInsert (Binary src val) idx));
19365   effect(TEMP vtmp);
19366   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19367   ins_encode %{
19368     assert(UseAVX > 2, "sanity");
19369 
19370     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19371     int elem_per_lane = 16/type2aelembytes(elem_bt);
19372     int log2epr = log2(elem_per_lane);
19373 
19374     assert(is_integral_type(elem_bt), "");
19375     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19376 
19377     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19378     uint y_idx = ($idx$$constant >> log2epr) & 3;
19379     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19380     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19381     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19382   %}
19383   ins_pipe( pipe_slow );
19384 %}
19385 
19386 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19387   predicate(Matcher::vector_length(n) == 2);
19388   match(Set dst (VectorInsert (Binary dst val) idx));
19389   format %{ "vector_insert $dst,$val,$idx" %}
19390   ins_encode %{
19391     assert(UseSSE >= 4, "required");
19392     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19393     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19394 
19395     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19396   %}
19397   ins_pipe( pipe_slow );
19398 %}
19399 
19400 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19401   predicate(Matcher::vector_length(n) == 4);
19402   match(Set dst (VectorInsert (Binary src val) idx));
19403   effect(TEMP vtmp);
19404   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19405   ins_encode %{
19406     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19407     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19408 
19409     uint x_idx = $idx$$constant & right_n_bits(1);
19410     uint y_idx = ($idx$$constant >> 1) & 1;
19411     int vlen_enc = Assembler::AVX_256bit;
19412     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19413     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19414     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19415   %}
19416   ins_pipe( pipe_slow );
19417 %}
19418 
19419 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19420   predicate(Matcher::vector_length(n) == 8);
19421   match(Set dst (VectorInsert (Binary src val) idx));
19422   effect(TEMP vtmp);
19423   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19424   ins_encode %{
19425     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19426     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19427 
19428     uint x_idx = $idx$$constant & right_n_bits(1);
19429     uint y_idx = ($idx$$constant >> 1) & 3;
19430     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19431     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19432     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19433   %}
19434   ins_pipe( pipe_slow );
19435 %}
19436 
19437 instruct insertF(vec dst, regF val, immU8 idx) %{
19438   predicate(Matcher::vector_length(n) < 8);
19439   match(Set dst (VectorInsert (Binary dst val) idx));
19440   format %{ "vector_insert $dst,$val,$idx" %}
19441   ins_encode %{
19442     assert(UseSSE >= 4, "sanity");
19443 
19444     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19445     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19446 
19447     uint x_idx = $idx$$constant & right_n_bits(2);
19448     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19449   %}
19450   ins_pipe( pipe_slow );
19451 %}
19452 
19453 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19454   predicate(Matcher::vector_length(n) >= 8);
19455   match(Set dst (VectorInsert (Binary src val) idx));
19456   effect(TEMP vtmp);
19457   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19458   ins_encode %{
19459     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19460     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19461 
19462     int vlen = Matcher::vector_length(this);
19463     uint x_idx = $idx$$constant & right_n_bits(2);
19464     if (vlen == 8) {
19465       uint y_idx = ($idx$$constant >> 2) & 1;
19466       int vlen_enc = Assembler::AVX_256bit;
19467       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19468       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19469       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19470     } else {
19471       assert(vlen == 16, "sanity");
19472       uint y_idx = ($idx$$constant >> 2) & 3;
19473       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19474       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19475       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19476     }
19477   %}
19478   ins_pipe( pipe_slow );
19479 %}
19480 
19481 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19482   predicate(Matcher::vector_length(n) == 2);
19483   match(Set dst (VectorInsert (Binary dst val) idx));
19484   effect(TEMP tmp);
19485   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19486   ins_encode %{
19487     assert(UseSSE >= 4, "sanity");
19488     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19489     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19490 
19491     __ movq($tmp$$Register, $val$$XMMRegister);
19492     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19493   %}
19494   ins_pipe( pipe_slow );
19495 %}
19496 
19497 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19498   predicate(Matcher::vector_length(n) == 4);
19499   match(Set dst (VectorInsert (Binary src val) idx));
19500   effect(TEMP vtmp, TEMP tmp);
19501   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19502   ins_encode %{
19503     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19504     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19505 
19506     uint x_idx = $idx$$constant & right_n_bits(1);
19507     uint y_idx = ($idx$$constant >> 1) & 1;
19508     int vlen_enc = Assembler::AVX_256bit;
19509     __ movq($tmp$$Register, $val$$XMMRegister);
19510     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19511     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19512     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19513   %}
19514   ins_pipe( pipe_slow );
19515 %}
19516 
19517 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19518   predicate(Matcher::vector_length(n) == 8);
19519   match(Set dst (VectorInsert (Binary src val) idx));
19520   effect(TEMP tmp, TEMP vtmp);
19521   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19522   ins_encode %{
19523     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19524     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19525 
19526     uint x_idx = $idx$$constant & right_n_bits(1);
19527     uint y_idx = ($idx$$constant >> 1) & 3;
19528     __ movq($tmp$$Register, $val$$XMMRegister);
19529     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19530     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19531     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19532   %}
19533   ins_pipe( pipe_slow );
19534 %}
19535 
19536 // ====================REDUCTION ARITHMETIC=======================================
19537 
19538 // =======================Int Reduction==========================================
19539 
19540 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19541   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19542   match(Set dst (AddReductionVI src1 src2));
19543   match(Set dst (MulReductionVI src1 src2));
19544   match(Set dst (AndReductionV  src1 src2));
19545   match(Set dst ( OrReductionV  src1 src2));
19546   match(Set dst (XorReductionV  src1 src2));
19547   match(Set dst (MinReductionV  src1 src2));
19548   match(Set dst (MaxReductionV  src1 src2));
19549   effect(TEMP vtmp1, TEMP vtmp2);
19550   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19551   ins_encode %{
19552     int opcode = this->ideal_Opcode();
19553     int vlen = Matcher::vector_length(this, $src2);
19554     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19555   %}
19556   ins_pipe( pipe_slow );
19557 %}
19558 
19559 // =======================Long Reduction==========================================
19560 
19561 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19562   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19563   match(Set dst (AddReductionVL src1 src2));
19564   match(Set dst (MulReductionVL src1 src2));
19565   match(Set dst (AndReductionV  src1 src2));
19566   match(Set dst ( OrReductionV  src1 src2));
19567   match(Set dst (XorReductionV  src1 src2));
19568   match(Set dst (MinReductionV  src1 src2));
19569   match(Set dst (MaxReductionV  src1 src2));
19570   effect(TEMP vtmp1, TEMP vtmp2);
19571   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19572   ins_encode %{
19573     int opcode = this->ideal_Opcode();
19574     int vlen = Matcher::vector_length(this, $src2);
19575     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19576   %}
19577   ins_pipe( pipe_slow );
19578 %}
19579 
19580 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19581   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19582   match(Set dst (AddReductionVL src1 src2));
19583   match(Set dst (MulReductionVL src1 src2));
19584   match(Set dst (AndReductionV  src1 src2));
19585   match(Set dst ( OrReductionV  src1 src2));
19586   match(Set dst (XorReductionV  src1 src2));
19587   match(Set dst (MinReductionV  src1 src2));
19588   match(Set dst (MaxReductionV  src1 src2));
19589   effect(TEMP vtmp1, TEMP vtmp2);
19590   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19591   ins_encode %{
19592     int opcode = this->ideal_Opcode();
19593     int vlen = Matcher::vector_length(this, $src2);
19594     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19595   %}
19596   ins_pipe( pipe_slow );
19597 %}
19598 
19599 // =======================Float Reduction==========================================
19600 
19601 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19602   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19603   match(Set dst (AddReductionVF dst src));
19604   match(Set dst (MulReductionVF dst src));
19605   effect(TEMP dst, TEMP vtmp);
19606   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19607   ins_encode %{
19608     int opcode = this->ideal_Opcode();
19609     int vlen = Matcher::vector_length(this, $src);
19610     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19611   %}
19612   ins_pipe( pipe_slow );
19613 %}
19614 
19615 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19616   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19617   match(Set dst (AddReductionVF dst src));
19618   match(Set dst (MulReductionVF dst src));
19619   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19620   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19621   ins_encode %{
19622     int opcode = this->ideal_Opcode();
19623     int vlen = Matcher::vector_length(this, $src);
19624     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19625   %}
19626   ins_pipe( pipe_slow );
19627 %}
19628 
19629 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19630   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19631   match(Set dst (AddReductionVF dst src));
19632   match(Set dst (MulReductionVF dst src));
19633   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19634   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19635   ins_encode %{
19636     int opcode = this->ideal_Opcode();
19637     int vlen = Matcher::vector_length(this, $src);
19638     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19639   %}
19640   ins_pipe( pipe_slow );
19641 %}
19642 
19643 
19644 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19645   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19646   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19647   // src1 contains reduction identity
19648   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19649   match(Set dst (AddReductionVF src1 src2));
19650   match(Set dst (MulReductionVF src1 src2));
19651   effect(TEMP dst);
19652   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19653   ins_encode %{
19654     int opcode = this->ideal_Opcode();
19655     int vlen = Matcher::vector_length(this, $src2);
19656     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19657   %}
19658   ins_pipe( pipe_slow );
19659 %}
19660 
19661 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19662   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19663   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19664   // src1 contains reduction identity
19665   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19666   match(Set dst (AddReductionVF src1 src2));
19667   match(Set dst (MulReductionVF src1 src2));
19668   effect(TEMP dst, TEMP vtmp);
19669   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19670   ins_encode %{
19671     int opcode = this->ideal_Opcode();
19672     int vlen = Matcher::vector_length(this, $src2);
19673     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19674   %}
19675   ins_pipe( pipe_slow );
19676 %}
19677 
19678 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19679   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19680   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19681   // src1 contains reduction identity
19682   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19683   match(Set dst (AddReductionVF src1 src2));
19684   match(Set dst (MulReductionVF src1 src2));
19685   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19686   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19687   ins_encode %{
19688     int opcode = this->ideal_Opcode();
19689     int vlen = Matcher::vector_length(this, $src2);
19690     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19691   %}
19692   ins_pipe( pipe_slow );
19693 %}
19694 
19695 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19696   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19697   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19698   // src1 contains reduction identity
19699   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19700   match(Set dst (AddReductionVF src1 src2));
19701   match(Set dst (MulReductionVF src1 src2));
19702   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19703   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19704   ins_encode %{
19705     int opcode = this->ideal_Opcode();
19706     int vlen = Matcher::vector_length(this, $src2);
19707     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19708   %}
19709   ins_pipe( pipe_slow );
19710 %}
19711 
19712 // =======================Double Reduction==========================================
19713 
19714 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19715   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19716   match(Set dst (AddReductionVD dst src));
19717   match(Set dst (MulReductionVD dst src));
19718   effect(TEMP dst, TEMP vtmp);
19719   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19720   ins_encode %{
19721     int opcode = this->ideal_Opcode();
19722     int vlen = Matcher::vector_length(this, $src);
19723     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19724 %}
19725   ins_pipe( pipe_slow );
19726 %}
19727 
19728 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19729   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19730   match(Set dst (AddReductionVD dst src));
19731   match(Set dst (MulReductionVD dst src));
19732   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19733   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19734   ins_encode %{
19735     int opcode = this->ideal_Opcode();
19736     int vlen = Matcher::vector_length(this, $src);
19737     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19738   %}
19739   ins_pipe( pipe_slow );
19740 %}
19741 
19742 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19743   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19744   match(Set dst (AddReductionVD dst src));
19745   match(Set dst (MulReductionVD dst src));
19746   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19747   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19748   ins_encode %{
19749     int opcode = this->ideal_Opcode();
19750     int vlen = Matcher::vector_length(this, $src);
19751     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19752   %}
19753   ins_pipe( pipe_slow );
19754 %}
19755 
19756 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19757   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19758   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19759   // src1 contains reduction identity
19760   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19761   match(Set dst (AddReductionVD src1 src2));
19762   match(Set dst (MulReductionVD src1 src2));
19763   effect(TEMP dst);
19764   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19765   ins_encode %{
19766     int opcode = this->ideal_Opcode();
19767     int vlen = Matcher::vector_length(this, $src2);
19768     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19769 %}
19770   ins_pipe( pipe_slow );
19771 %}
19772 
19773 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19774   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19775   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19776   // src1 contains reduction identity
19777   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19778   match(Set dst (AddReductionVD src1 src2));
19779   match(Set dst (MulReductionVD src1 src2));
19780   effect(TEMP dst, TEMP vtmp);
19781   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19782   ins_encode %{
19783     int opcode = this->ideal_Opcode();
19784     int vlen = Matcher::vector_length(this, $src2);
19785     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19786   %}
19787   ins_pipe( pipe_slow );
19788 %}
19789 
19790 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19791   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19792   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19793   // src1 contains reduction identity
19794   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19795   match(Set dst (AddReductionVD src1 src2));
19796   match(Set dst (MulReductionVD src1 src2));
19797   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19798   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19799   ins_encode %{
19800     int opcode = this->ideal_Opcode();
19801     int vlen = Matcher::vector_length(this, $src2);
19802     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19803   %}
19804   ins_pipe( pipe_slow );
19805 %}
19806 
19807 // =======================Byte Reduction==========================================
19808 
19809 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19810   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19811   match(Set dst (AddReductionVI src1 src2));
19812   match(Set dst (AndReductionV  src1 src2));
19813   match(Set dst ( OrReductionV  src1 src2));
19814   match(Set dst (XorReductionV  src1 src2));
19815   match(Set dst (MinReductionV  src1 src2));
19816   match(Set dst (MaxReductionV  src1 src2));
19817   effect(TEMP vtmp1, TEMP vtmp2);
19818   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19819   ins_encode %{
19820     int opcode = this->ideal_Opcode();
19821     int vlen = Matcher::vector_length(this, $src2);
19822     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19823   %}
19824   ins_pipe( pipe_slow );
19825 %}
19826 
19827 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19828   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19829   match(Set dst (AddReductionVI src1 src2));
19830   match(Set dst (AndReductionV  src1 src2));
19831   match(Set dst ( OrReductionV  src1 src2));
19832   match(Set dst (XorReductionV  src1 src2));
19833   match(Set dst (MinReductionV  src1 src2));
19834   match(Set dst (MaxReductionV  src1 src2));
19835   effect(TEMP vtmp1, TEMP vtmp2);
19836   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19837   ins_encode %{
19838     int opcode = this->ideal_Opcode();
19839     int vlen = Matcher::vector_length(this, $src2);
19840     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19841   %}
19842   ins_pipe( pipe_slow );
19843 %}
19844 
19845 // =======================Short Reduction==========================================
19846 
19847 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19848   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19849   match(Set dst (AddReductionVI src1 src2));
19850   match(Set dst (MulReductionVI src1 src2));
19851   match(Set dst (AndReductionV  src1 src2));
19852   match(Set dst ( OrReductionV  src1 src2));
19853   match(Set dst (XorReductionV  src1 src2));
19854   match(Set dst (MinReductionV  src1 src2));
19855   match(Set dst (MaxReductionV  src1 src2));
19856   effect(TEMP vtmp1, TEMP vtmp2);
19857   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19858   ins_encode %{
19859     int opcode = this->ideal_Opcode();
19860     int vlen = Matcher::vector_length(this, $src2);
19861     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19862   %}
19863   ins_pipe( pipe_slow );
19864 %}
19865 
19866 // =======================Mul Reduction==========================================
19867 
19868 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19869   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19870             Matcher::vector_length(n->in(2)) <= 32); // src2
19871   match(Set dst (MulReductionVI src1 src2));
19872   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19873   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19874   ins_encode %{
19875     int opcode = this->ideal_Opcode();
19876     int vlen = Matcher::vector_length(this, $src2);
19877     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19878   %}
19879   ins_pipe( pipe_slow );
19880 %}
19881 
19882 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19883   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19884             Matcher::vector_length(n->in(2)) == 64); // src2
19885   match(Set dst (MulReductionVI src1 src2));
19886   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19887   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19888   ins_encode %{
19889     int opcode = this->ideal_Opcode();
19890     int vlen = Matcher::vector_length(this, $src2);
19891     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19892   %}
19893   ins_pipe( pipe_slow );
19894 %}
19895 
19896 //--------------------Min/Max Float Reduction --------------------
19897 // Float Min Reduction
19898 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19899                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19900   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19901             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19902              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19903             Matcher::vector_length(n->in(2)) == 2);
19904   match(Set dst (MinReductionV src1 src2));
19905   match(Set dst (MaxReductionV src1 src2));
19906   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19907   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19908   ins_encode %{
19909     assert(UseAVX > 0, "sanity");
19910 
19911     int opcode = this->ideal_Opcode();
19912     int vlen = Matcher::vector_length(this, $src2);
19913     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19914                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19915   %}
19916   ins_pipe( pipe_slow );
19917 %}
19918 
19919 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19920                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19921   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19922             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19923              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19924             Matcher::vector_length(n->in(2)) >= 4);
19925   match(Set dst (MinReductionV src1 src2));
19926   match(Set dst (MaxReductionV src1 src2));
19927   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19928   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19929   ins_encode %{
19930     assert(UseAVX > 0, "sanity");
19931 
19932     int opcode = this->ideal_Opcode();
19933     int vlen = Matcher::vector_length(this, $src2);
19934     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19935                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19936   %}
19937   ins_pipe( pipe_slow );
19938 %}
19939 
19940 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19941                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19942   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19943             Matcher::vector_length(n->in(2)) == 2);
19944   match(Set dst (MinReductionV dst src));
19945   match(Set dst (MaxReductionV dst src));
19946   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19947   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19948   ins_encode %{
19949     assert(UseAVX > 0, "sanity");
19950 
19951     int opcode = this->ideal_Opcode();
19952     int vlen = Matcher::vector_length(this, $src);
19953     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19954                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19955   %}
19956   ins_pipe( pipe_slow );
19957 %}
19958 
19959 
19960 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19961                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19962   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19963             Matcher::vector_length(n->in(2)) >= 4);
19964   match(Set dst (MinReductionV dst src));
19965   match(Set dst (MaxReductionV dst src));
19966   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19967   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19968   ins_encode %{
19969     assert(UseAVX > 0, "sanity");
19970 
19971     int opcode = this->ideal_Opcode();
19972     int vlen = Matcher::vector_length(this, $src);
19973     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19974                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19975   %}
19976   ins_pipe( pipe_slow );
19977 %}
19978 
19979 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19980   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19981             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19982              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19983             Matcher::vector_length(n->in(2)) == 2);
19984   match(Set dst (MinReductionV src1 src2));
19985   match(Set dst (MaxReductionV src1 src2));
19986   effect(TEMP dst, TEMP xtmp1);
19987   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19988   ins_encode %{
19989     int opcode = this->ideal_Opcode();
19990     int vlen = Matcher::vector_length(this, $src2);
19991     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19992                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19993   %}
19994   ins_pipe( pipe_slow );
19995 %}
19996 
19997 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19998   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19999             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20000              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20001             Matcher::vector_length(n->in(2)) >= 4);
20002   match(Set dst (MinReductionV src1 src2));
20003   match(Set dst (MaxReductionV src1 src2));
20004   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20005   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20006   ins_encode %{
20007     int opcode = this->ideal_Opcode();
20008     int vlen = Matcher::vector_length(this, $src2);
20009     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20010                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20011   %}
20012   ins_pipe( pipe_slow );
20013 %}
20014 
20015 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20016   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20017             Matcher::vector_length(n->in(2)) == 2);
20018   match(Set dst (MinReductionV dst src));
20019   match(Set dst (MaxReductionV dst src));
20020   effect(TEMP dst, TEMP xtmp1);
20021   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20022   ins_encode %{
20023     int opcode = this->ideal_Opcode();
20024     int vlen = Matcher::vector_length(this, $src);
20025     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20026                          $xtmp1$$XMMRegister);
20027   %}
20028   ins_pipe( pipe_slow );
20029 %}
20030 
20031 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20032   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20033             Matcher::vector_length(n->in(2)) >= 4);
20034   match(Set dst (MinReductionV dst src));
20035   match(Set dst (MaxReductionV dst src));
20036   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20037   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20038   ins_encode %{
20039     int opcode = this->ideal_Opcode();
20040     int vlen = Matcher::vector_length(this, $src);
20041     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20042                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20043   %}
20044   ins_pipe( pipe_slow );
20045 %}
20046 
20047 //--------------------Min Double Reduction --------------------
20048 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20049                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20050   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20051             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20052              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20053             Matcher::vector_length(n->in(2)) == 2);
20054   match(Set dst (MinReductionV src1 src2));
20055   match(Set dst (MaxReductionV src1 src2));
20056   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20057   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20058   ins_encode %{
20059     assert(UseAVX > 0, "sanity");
20060 
20061     int opcode = this->ideal_Opcode();
20062     int vlen = Matcher::vector_length(this, $src2);
20063     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20064                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20065   %}
20066   ins_pipe( pipe_slow );
20067 %}
20068 
20069 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20070                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20071   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20072             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20073              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20074             Matcher::vector_length(n->in(2)) >= 4);
20075   match(Set dst (MinReductionV src1 src2));
20076   match(Set dst (MaxReductionV src1 src2));
20077   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20078   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20079   ins_encode %{
20080     assert(UseAVX > 0, "sanity");
20081 
20082     int opcode = this->ideal_Opcode();
20083     int vlen = Matcher::vector_length(this, $src2);
20084     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20085                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20086   %}
20087   ins_pipe( pipe_slow );
20088 %}
20089 
20090 
20091 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20092                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20093   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20094             Matcher::vector_length(n->in(2)) == 2);
20095   match(Set dst (MinReductionV dst src));
20096   match(Set dst (MaxReductionV dst src));
20097   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20098   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20099   ins_encode %{
20100     assert(UseAVX > 0, "sanity");
20101 
20102     int opcode = this->ideal_Opcode();
20103     int vlen = Matcher::vector_length(this, $src);
20104     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20105                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20106   %}
20107   ins_pipe( pipe_slow );
20108 %}
20109 
20110 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20111                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20112   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20113             Matcher::vector_length(n->in(2)) >= 4);
20114   match(Set dst (MinReductionV dst src));
20115   match(Set dst (MaxReductionV dst src));
20116   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20117   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20118   ins_encode %{
20119     assert(UseAVX > 0, "sanity");
20120 
20121     int opcode = this->ideal_Opcode();
20122     int vlen = Matcher::vector_length(this, $src);
20123     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20124                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20125   %}
20126   ins_pipe( pipe_slow );
20127 %}
20128 
20129 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20130   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20131             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20132              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20133             Matcher::vector_length(n->in(2)) == 2);
20134   match(Set dst (MinReductionV src1 src2));
20135   match(Set dst (MaxReductionV src1 src2));
20136   effect(TEMP dst, TEMP xtmp1);
20137   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20138   ins_encode %{
20139     int opcode = this->ideal_Opcode();
20140     int vlen = Matcher::vector_length(this, $src2);
20141     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20142                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20143   %}
20144   ins_pipe( pipe_slow );
20145 %}
20146 
20147 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20148   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20149             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20150              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20151             Matcher::vector_length(n->in(2)) >= 4);
20152   match(Set dst (MinReductionV src1 src2));
20153   match(Set dst (MaxReductionV src1 src2));
20154   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20155   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20156   ins_encode %{
20157     int opcode = this->ideal_Opcode();
20158     int vlen = Matcher::vector_length(this, $src2);
20159     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20160                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20161   %}
20162   ins_pipe( pipe_slow );
20163 %}
20164 
20165 
20166 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20167   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20168             Matcher::vector_length(n->in(2)) == 2);
20169   match(Set dst (MinReductionV dst src));
20170   match(Set dst (MaxReductionV dst src));
20171   effect(TEMP dst, TEMP xtmp1);
20172   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20173   ins_encode %{
20174     int opcode = this->ideal_Opcode();
20175     int vlen = Matcher::vector_length(this, $src);
20176     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20177                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20178   %}
20179   ins_pipe( pipe_slow );
20180 %}
20181 
20182 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20183   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20184             Matcher::vector_length(n->in(2)) >= 4);
20185   match(Set dst (MinReductionV dst src));
20186   match(Set dst (MaxReductionV dst src));
20187   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20188   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20189   ins_encode %{
20190     int opcode = this->ideal_Opcode();
20191     int vlen = Matcher::vector_length(this, $src);
20192     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20193                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20194   %}
20195   ins_pipe( pipe_slow );
20196 %}
20197 
20198 // ====================VECTOR ARITHMETIC=======================================
20199 
20200 // --------------------------------- ADD --------------------------------------
20201 
20202 // Bytes vector add
20203 instruct vaddB(vec dst, vec src) %{
20204   predicate(UseAVX == 0);
20205   match(Set dst (AddVB dst src));
20206   format %{ "paddb   $dst,$src\t! add packedB" %}
20207   ins_encode %{
20208     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20209   %}
20210   ins_pipe( pipe_slow );
20211 %}
20212 
20213 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20214   predicate(UseAVX > 0);
20215   match(Set dst (AddVB src1 src2));
20216   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20217   ins_encode %{
20218     int vlen_enc = vector_length_encoding(this);
20219     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20220   %}
20221   ins_pipe( pipe_slow );
20222 %}
20223 
20224 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20225   predicate((UseAVX > 0) &&
20226             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20227   match(Set dst (AddVB src (LoadVector mem)));
20228   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20229   ins_encode %{
20230     int vlen_enc = vector_length_encoding(this);
20231     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20232   %}
20233   ins_pipe( pipe_slow );
20234 %}
20235 
20236 // Shorts/Chars vector add
20237 instruct vaddS(vec dst, vec src) %{
20238   predicate(UseAVX == 0);
20239   match(Set dst (AddVS dst src));
20240   format %{ "paddw   $dst,$src\t! add packedS" %}
20241   ins_encode %{
20242     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20243   %}
20244   ins_pipe( pipe_slow );
20245 %}
20246 
20247 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20248   predicate(UseAVX > 0);
20249   match(Set dst (AddVS src1 src2));
20250   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20251   ins_encode %{
20252     int vlen_enc = vector_length_encoding(this);
20253     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20254   %}
20255   ins_pipe( pipe_slow );
20256 %}
20257 
20258 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20259   predicate((UseAVX > 0) &&
20260             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20261   match(Set dst (AddVS src (LoadVector mem)));
20262   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20263   ins_encode %{
20264     int vlen_enc = vector_length_encoding(this);
20265     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20266   %}
20267   ins_pipe( pipe_slow );
20268 %}
20269 
20270 // Integers vector add
20271 instruct vaddI(vec dst, vec src) %{
20272   predicate(UseAVX == 0);
20273   match(Set dst (AddVI dst src));
20274   format %{ "paddd   $dst,$src\t! add packedI" %}
20275   ins_encode %{
20276     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20277   %}
20278   ins_pipe( pipe_slow );
20279 %}
20280 
20281 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20282   predicate(UseAVX > 0);
20283   match(Set dst (AddVI src1 src2));
20284   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20285   ins_encode %{
20286     int vlen_enc = vector_length_encoding(this);
20287     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20288   %}
20289   ins_pipe( pipe_slow );
20290 %}
20291 
20292 
20293 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20294   predicate((UseAVX > 0) &&
20295             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20296   match(Set dst (AddVI src (LoadVector mem)));
20297   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20298   ins_encode %{
20299     int vlen_enc = vector_length_encoding(this);
20300     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20301   %}
20302   ins_pipe( pipe_slow );
20303 %}
20304 
20305 // Longs vector add
20306 instruct vaddL(vec dst, vec src) %{
20307   predicate(UseAVX == 0);
20308   match(Set dst (AddVL dst src));
20309   format %{ "paddq   $dst,$src\t! add packedL" %}
20310   ins_encode %{
20311     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20312   %}
20313   ins_pipe( pipe_slow );
20314 %}
20315 
20316 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20317   predicate(UseAVX > 0);
20318   match(Set dst (AddVL src1 src2));
20319   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20320   ins_encode %{
20321     int vlen_enc = vector_length_encoding(this);
20322     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20323   %}
20324   ins_pipe( pipe_slow );
20325 %}
20326 
20327 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20328   predicate((UseAVX > 0) &&
20329             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20330   match(Set dst (AddVL src (LoadVector mem)));
20331   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20332   ins_encode %{
20333     int vlen_enc = vector_length_encoding(this);
20334     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20335   %}
20336   ins_pipe( pipe_slow );
20337 %}
20338 
20339 // Floats vector add
20340 instruct vaddF(vec dst, vec src) %{
20341   predicate(UseAVX == 0);
20342   match(Set dst (AddVF dst src));
20343   format %{ "addps   $dst,$src\t! add packedF" %}
20344   ins_encode %{
20345     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20346   %}
20347   ins_pipe( pipe_slow );
20348 %}
20349 
20350 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20351   predicate(UseAVX > 0);
20352   match(Set dst (AddVF src1 src2));
20353   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20354   ins_encode %{
20355     int vlen_enc = vector_length_encoding(this);
20356     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20357   %}
20358   ins_pipe( pipe_slow );
20359 %}
20360 
20361 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20362   predicate((UseAVX > 0) &&
20363             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20364   match(Set dst (AddVF src (LoadVector mem)));
20365   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20366   ins_encode %{
20367     int vlen_enc = vector_length_encoding(this);
20368     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20369   %}
20370   ins_pipe( pipe_slow );
20371 %}
20372 
20373 // Doubles vector add
20374 instruct vaddD(vec dst, vec src) %{
20375   predicate(UseAVX == 0);
20376   match(Set dst (AddVD dst src));
20377   format %{ "addpd   $dst,$src\t! add packedD" %}
20378   ins_encode %{
20379     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20380   %}
20381   ins_pipe( pipe_slow );
20382 %}
20383 
20384 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20385   predicate(UseAVX > 0);
20386   match(Set dst (AddVD src1 src2));
20387   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20388   ins_encode %{
20389     int vlen_enc = vector_length_encoding(this);
20390     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20391   %}
20392   ins_pipe( pipe_slow );
20393 %}
20394 
20395 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20396   predicate((UseAVX > 0) &&
20397             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20398   match(Set dst (AddVD src (LoadVector mem)));
20399   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20400   ins_encode %{
20401     int vlen_enc = vector_length_encoding(this);
20402     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20403   %}
20404   ins_pipe( pipe_slow );
20405 %}
20406 
20407 // --------------------------------- SUB --------------------------------------
20408 
20409 // Bytes vector sub
20410 instruct vsubB(vec dst, vec src) %{
20411   predicate(UseAVX == 0);
20412   match(Set dst (SubVB dst src));
20413   format %{ "psubb   $dst,$src\t! sub packedB" %}
20414   ins_encode %{
20415     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20416   %}
20417   ins_pipe( pipe_slow );
20418 %}
20419 
20420 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20421   predicate(UseAVX > 0);
20422   match(Set dst (SubVB src1 src2));
20423   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20424   ins_encode %{
20425     int vlen_enc = vector_length_encoding(this);
20426     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20427   %}
20428   ins_pipe( pipe_slow );
20429 %}
20430 
20431 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20432   predicate((UseAVX > 0) &&
20433             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20434   match(Set dst (SubVB src (LoadVector mem)));
20435   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20436   ins_encode %{
20437     int vlen_enc = vector_length_encoding(this);
20438     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20439   %}
20440   ins_pipe( pipe_slow );
20441 %}
20442 
20443 // Shorts/Chars vector sub
20444 instruct vsubS(vec dst, vec src) %{
20445   predicate(UseAVX == 0);
20446   match(Set dst (SubVS dst src));
20447   format %{ "psubw   $dst,$src\t! sub packedS" %}
20448   ins_encode %{
20449     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20450   %}
20451   ins_pipe( pipe_slow );
20452 %}
20453 
20454 
20455 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20456   predicate(UseAVX > 0);
20457   match(Set dst (SubVS src1 src2));
20458   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20459   ins_encode %{
20460     int vlen_enc = vector_length_encoding(this);
20461     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20462   %}
20463   ins_pipe( pipe_slow );
20464 %}
20465 
20466 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20467   predicate((UseAVX > 0) &&
20468             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20469   match(Set dst (SubVS src (LoadVector mem)));
20470   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20471   ins_encode %{
20472     int vlen_enc = vector_length_encoding(this);
20473     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20474   %}
20475   ins_pipe( pipe_slow );
20476 %}
20477 
20478 // Integers vector sub
20479 instruct vsubI(vec dst, vec src) %{
20480   predicate(UseAVX == 0);
20481   match(Set dst (SubVI dst src));
20482   format %{ "psubd   $dst,$src\t! sub packedI" %}
20483   ins_encode %{
20484     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20485   %}
20486   ins_pipe( pipe_slow );
20487 %}
20488 
20489 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20490   predicate(UseAVX > 0);
20491   match(Set dst (SubVI src1 src2));
20492   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20493   ins_encode %{
20494     int vlen_enc = vector_length_encoding(this);
20495     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20496   %}
20497   ins_pipe( pipe_slow );
20498 %}
20499 
20500 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20501   predicate((UseAVX > 0) &&
20502             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20503   match(Set dst (SubVI src (LoadVector mem)));
20504   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20505   ins_encode %{
20506     int vlen_enc = vector_length_encoding(this);
20507     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20508   %}
20509   ins_pipe( pipe_slow );
20510 %}
20511 
20512 // Longs vector sub
20513 instruct vsubL(vec dst, vec src) %{
20514   predicate(UseAVX == 0);
20515   match(Set dst (SubVL dst src));
20516   format %{ "psubq   $dst,$src\t! sub packedL" %}
20517   ins_encode %{
20518     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20519   %}
20520   ins_pipe( pipe_slow );
20521 %}
20522 
20523 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20524   predicate(UseAVX > 0);
20525   match(Set dst (SubVL src1 src2));
20526   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20527   ins_encode %{
20528     int vlen_enc = vector_length_encoding(this);
20529     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20530   %}
20531   ins_pipe( pipe_slow );
20532 %}
20533 
20534 
20535 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20536   predicate((UseAVX > 0) &&
20537             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20538   match(Set dst (SubVL src (LoadVector mem)));
20539   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20540   ins_encode %{
20541     int vlen_enc = vector_length_encoding(this);
20542     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20543   %}
20544   ins_pipe( pipe_slow );
20545 %}
20546 
20547 // Floats vector sub
20548 instruct vsubF(vec dst, vec src) %{
20549   predicate(UseAVX == 0);
20550   match(Set dst (SubVF dst src));
20551   format %{ "subps   $dst,$src\t! sub packedF" %}
20552   ins_encode %{
20553     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20554   %}
20555   ins_pipe( pipe_slow );
20556 %}
20557 
20558 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20559   predicate(UseAVX > 0);
20560   match(Set dst (SubVF src1 src2));
20561   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20562   ins_encode %{
20563     int vlen_enc = vector_length_encoding(this);
20564     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20565   %}
20566   ins_pipe( pipe_slow );
20567 %}
20568 
20569 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20570   predicate((UseAVX > 0) &&
20571             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20572   match(Set dst (SubVF src (LoadVector mem)));
20573   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20574   ins_encode %{
20575     int vlen_enc = vector_length_encoding(this);
20576     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20577   %}
20578   ins_pipe( pipe_slow );
20579 %}
20580 
20581 // Doubles vector sub
20582 instruct vsubD(vec dst, vec src) %{
20583   predicate(UseAVX == 0);
20584   match(Set dst (SubVD dst src));
20585   format %{ "subpd   $dst,$src\t! sub packedD" %}
20586   ins_encode %{
20587     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20588   %}
20589   ins_pipe( pipe_slow );
20590 %}
20591 
20592 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20593   predicate(UseAVX > 0);
20594   match(Set dst (SubVD src1 src2));
20595   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20596   ins_encode %{
20597     int vlen_enc = vector_length_encoding(this);
20598     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20599   %}
20600   ins_pipe( pipe_slow );
20601 %}
20602 
20603 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20604   predicate((UseAVX > 0) &&
20605             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20606   match(Set dst (SubVD src (LoadVector mem)));
20607   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20608   ins_encode %{
20609     int vlen_enc = vector_length_encoding(this);
20610     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20611   %}
20612   ins_pipe( pipe_slow );
20613 %}
20614 
20615 // --------------------------------- MUL --------------------------------------
20616 
20617 // Byte vector mul
20618 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20619   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20620   match(Set dst (MulVB src1 src2));
20621   effect(TEMP dst, TEMP xtmp);
20622   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20623   ins_encode %{
20624     assert(UseSSE > 3, "required");
20625     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20626     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20627     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20628     __ psllw($dst$$XMMRegister, 8);
20629     __ psrlw($dst$$XMMRegister, 8);
20630     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20631   %}
20632   ins_pipe( pipe_slow );
20633 %}
20634 
20635 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20636   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20637   match(Set dst (MulVB src1 src2));
20638   effect(TEMP dst, TEMP xtmp);
20639   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20640   ins_encode %{
20641     assert(UseSSE > 3, "required");
20642     // Odd-index elements
20643     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20644     __ psrlw($dst$$XMMRegister, 8);
20645     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20646     __ psrlw($xtmp$$XMMRegister, 8);
20647     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20648     __ psllw($dst$$XMMRegister, 8);
20649     // Even-index elements
20650     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20651     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20652     __ psllw($xtmp$$XMMRegister, 8);
20653     __ psrlw($xtmp$$XMMRegister, 8);
20654     // Combine
20655     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20656   %}
20657   ins_pipe( pipe_slow );
20658 %}
20659 
20660 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20661   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20662   match(Set dst (MulVB src1 src2));
20663   effect(TEMP xtmp1, TEMP xtmp2);
20664   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20665   ins_encode %{
20666     int vlen_enc = vector_length_encoding(this);
20667     // Odd-index elements
20668     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20669     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20670     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20671     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20672     // Even-index elements
20673     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20674     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20675     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20676     // Combine
20677     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20678   %}
20679   ins_pipe( pipe_slow );
20680 %}
20681 
20682 // Shorts/Chars vector mul
20683 instruct vmulS(vec dst, vec src) %{
20684   predicate(UseAVX == 0);
20685   match(Set dst (MulVS dst src));
20686   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20687   ins_encode %{
20688     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20689   %}
20690   ins_pipe( pipe_slow );
20691 %}
20692 
20693 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20694   predicate(UseAVX > 0);
20695   match(Set dst (MulVS src1 src2));
20696   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20697   ins_encode %{
20698     int vlen_enc = vector_length_encoding(this);
20699     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20700   %}
20701   ins_pipe( pipe_slow );
20702 %}
20703 
20704 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20705   predicate((UseAVX > 0) &&
20706             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20707   match(Set dst (MulVS src (LoadVector mem)));
20708   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20709   ins_encode %{
20710     int vlen_enc = vector_length_encoding(this);
20711     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20712   %}
20713   ins_pipe( pipe_slow );
20714 %}
20715 
20716 // Integers vector mul
20717 instruct vmulI(vec dst, vec src) %{
20718   predicate(UseAVX == 0);
20719   match(Set dst (MulVI dst src));
20720   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20721   ins_encode %{
20722     assert(UseSSE > 3, "required");
20723     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20724   %}
20725   ins_pipe( pipe_slow );
20726 %}
20727 
20728 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20729   predicate(UseAVX > 0);
20730   match(Set dst (MulVI src1 src2));
20731   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20732   ins_encode %{
20733     int vlen_enc = vector_length_encoding(this);
20734     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20735   %}
20736   ins_pipe( pipe_slow );
20737 %}
20738 
20739 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20740   predicate((UseAVX > 0) &&
20741             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20742   match(Set dst (MulVI src (LoadVector mem)));
20743   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20744   ins_encode %{
20745     int vlen_enc = vector_length_encoding(this);
20746     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20747   %}
20748   ins_pipe( pipe_slow );
20749 %}
20750 
20751 // Longs vector mul
20752 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20753   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20754              VM_Version::supports_avx512dq()) ||
20755             VM_Version::supports_avx512vldq());
20756   match(Set dst (MulVL src1 src2));
20757   ins_cost(500);
20758   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20759   ins_encode %{
20760     assert(UseAVX > 2, "required");
20761     int vlen_enc = vector_length_encoding(this);
20762     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20763   %}
20764   ins_pipe( pipe_slow );
20765 %}
20766 
20767 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20768   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20769              VM_Version::supports_avx512dq()) ||
20770             (Matcher::vector_length_in_bytes(n) > 8 &&
20771              VM_Version::supports_avx512vldq()));
20772   match(Set dst (MulVL src (LoadVector mem)));
20773   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20774   ins_cost(500);
20775   ins_encode %{
20776     assert(UseAVX > 2, "required");
20777     int vlen_enc = vector_length_encoding(this);
20778     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20779   %}
20780   ins_pipe( pipe_slow );
20781 %}
20782 
20783 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20784   predicate(UseAVX == 0);
20785   match(Set dst (MulVL src1 src2));
20786   ins_cost(500);
20787   effect(TEMP dst, TEMP xtmp);
20788   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20789   ins_encode %{
20790     assert(VM_Version::supports_sse4_1(), "required");
20791     // Get the lo-hi products, only the lower 32 bits is in concerns
20792     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20793     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20794     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20795     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20796     __ psllq($dst$$XMMRegister, 32);
20797     // Get the lo-lo products
20798     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20799     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20800     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20801   %}
20802   ins_pipe( pipe_slow );
20803 %}
20804 
20805 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20806   predicate(UseAVX > 0 &&
20807             ((Matcher::vector_length_in_bytes(n) == 64 &&
20808               !VM_Version::supports_avx512dq()) ||
20809              (Matcher::vector_length_in_bytes(n) < 64 &&
20810               !VM_Version::supports_avx512vldq())));
20811   match(Set dst (MulVL src1 src2));
20812   effect(TEMP xtmp1, TEMP xtmp2);
20813   ins_cost(500);
20814   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20815   ins_encode %{
20816     int vlen_enc = vector_length_encoding(this);
20817     // Get the lo-hi products, only the lower 32 bits is in concerns
20818     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20819     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20820     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20821     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20822     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20823     // Get the lo-lo products
20824     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20825     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20826   %}
20827   ins_pipe( pipe_slow );
20828 %}
20829 
20830 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20831   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20832   match(Set dst (MulVL src1 src2));
20833   ins_cost(100);
20834   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20835   ins_encode %{
20836     int vlen_enc = vector_length_encoding(this);
20837     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20838   %}
20839   ins_pipe( pipe_slow );
20840 %}
20841 
20842 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20843   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20844   match(Set dst (MulVL src1 src2));
20845   ins_cost(100);
20846   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20847   ins_encode %{
20848     int vlen_enc = vector_length_encoding(this);
20849     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20850   %}
20851   ins_pipe( pipe_slow );
20852 %}
20853 
20854 // Floats vector mul
20855 instruct vmulF(vec dst, vec src) %{
20856   predicate(UseAVX == 0);
20857   match(Set dst (MulVF dst src));
20858   format %{ "mulps   $dst,$src\t! mul packedF" %}
20859   ins_encode %{
20860     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20861   %}
20862   ins_pipe( pipe_slow );
20863 %}
20864 
20865 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20866   predicate(UseAVX > 0);
20867   match(Set dst (MulVF src1 src2));
20868   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20869   ins_encode %{
20870     int vlen_enc = vector_length_encoding(this);
20871     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20872   %}
20873   ins_pipe( pipe_slow );
20874 %}
20875 
20876 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20877   predicate((UseAVX > 0) &&
20878             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20879   match(Set dst (MulVF src (LoadVector mem)));
20880   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20881   ins_encode %{
20882     int vlen_enc = vector_length_encoding(this);
20883     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20884   %}
20885   ins_pipe( pipe_slow );
20886 %}
20887 
20888 // Doubles vector mul
20889 instruct vmulD(vec dst, vec src) %{
20890   predicate(UseAVX == 0);
20891   match(Set dst (MulVD dst src));
20892   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20893   ins_encode %{
20894     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20895   %}
20896   ins_pipe( pipe_slow );
20897 %}
20898 
20899 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20900   predicate(UseAVX > 0);
20901   match(Set dst (MulVD src1 src2));
20902   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20903   ins_encode %{
20904     int vlen_enc = vector_length_encoding(this);
20905     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20906   %}
20907   ins_pipe( pipe_slow );
20908 %}
20909 
20910 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20911   predicate((UseAVX > 0) &&
20912             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20913   match(Set dst (MulVD src (LoadVector mem)));
20914   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20915   ins_encode %{
20916     int vlen_enc = vector_length_encoding(this);
20917     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20918   %}
20919   ins_pipe( pipe_slow );
20920 %}
20921 
20922 // --------------------------------- DIV --------------------------------------
20923 
20924 // Floats vector div
20925 instruct vdivF(vec dst, vec src) %{
20926   predicate(UseAVX == 0);
20927   match(Set dst (DivVF dst src));
20928   format %{ "divps   $dst,$src\t! div packedF" %}
20929   ins_encode %{
20930     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20931   %}
20932   ins_pipe( pipe_slow );
20933 %}
20934 
20935 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20936   predicate(UseAVX > 0);
20937   match(Set dst (DivVF src1 src2));
20938   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20939   ins_encode %{
20940     int vlen_enc = vector_length_encoding(this);
20941     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20942   %}
20943   ins_pipe( pipe_slow );
20944 %}
20945 
20946 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20947   predicate((UseAVX > 0) &&
20948             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20949   match(Set dst (DivVF src (LoadVector mem)));
20950   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20951   ins_encode %{
20952     int vlen_enc = vector_length_encoding(this);
20953     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20954   %}
20955   ins_pipe( pipe_slow );
20956 %}
20957 
20958 // Doubles vector div
20959 instruct vdivD(vec dst, vec src) %{
20960   predicate(UseAVX == 0);
20961   match(Set dst (DivVD dst src));
20962   format %{ "divpd   $dst,$src\t! div packedD" %}
20963   ins_encode %{
20964     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20965   %}
20966   ins_pipe( pipe_slow );
20967 %}
20968 
20969 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20970   predicate(UseAVX > 0);
20971   match(Set dst (DivVD src1 src2));
20972   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20973   ins_encode %{
20974     int vlen_enc = vector_length_encoding(this);
20975     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20976   %}
20977   ins_pipe( pipe_slow );
20978 %}
20979 
20980 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20981   predicate((UseAVX > 0) &&
20982             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20983   match(Set dst (DivVD src (LoadVector mem)));
20984   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20985   ins_encode %{
20986     int vlen_enc = vector_length_encoding(this);
20987     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20988   %}
20989   ins_pipe( pipe_slow );
20990 %}
20991 
20992 // ------------------------------ MinMax ---------------------------------------
20993 
20994 // Byte, Short, Int vector Min/Max
20995 instruct minmax_reg_sse(vec dst, vec src) %{
20996   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20997             UseAVX == 0);
20998   match(Set dst (MinV dst src));
20999   match(Set dst (MaxV dst src));
21000   format %{ "vector_minmax  $dst,$src\t!  " %}
21001   ins_encode %{
21002     assert(UseSSE >= 4, "required");
21003 
21004     int opcode = this->ideal_Opcode();
21005     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21006     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21007   %}
21008   ins_pipe( pipe_slow );
21009 %}
21010 
21011 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21012   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21013             UseAVX > 0);
21014   match(Set dst (MinV src1 src2));
21015   match(Set dst (MaxV src1 src2));
21016   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21017   ins_encode %{
21018     int opcode = this->ideal_Opcode();
21019     int vlen_enc = vector_length_encoding(this);
21020     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21021 
21022     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21023   %}
21024   ins_pipe( pipe_slow );
21025 %}
21026 
21027 // Long vector Min/Max
21028 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21029   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21030             UseAVX == 0);
21031   match(Set dst (MinV dst src));
21032   match(Set dst (MaxV src dst));
21033   effect(TEMP dst, TEMP tmp);
21034   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21035   ins_encode %{
21036     assert(UseSSE >= 4, "required");
21037 
21038     int opcode = this->ideal_Opcode();
21039     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21040     assert(elem_bt == T_LONG, "sanity");
21041 
21042     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21043   %}
21044   ins_pipe( pipe_slow );
21045 %}
21046 
21047 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21048   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21049             UseAVX > 0 && !VM_Version::supports_avx512vl());
21050   match(Set dst (MinV src1 src2));
21051   match(Set dst (MaxV src1 src2));
21052   effect(TEMP dst);
21053   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21054   ins_encode %{
21055     int vlen_enc = vector_length_encoding(this);
21056     int opcode = this->ideal_Opcode();
21057     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21058     assert(elem_bt == T_LONG, "sanity");
21059 
21060     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21061   %}
21062   ins_pipe( pipe_slow );
21063 %}
21064 
21065 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21066   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21067             Matcher::vector_element_basic_type(n) == T_LONG);
21068   match(Set dst (MinV src1 src2));
21069   match(Set dst (MaxV src1 src2));
21070   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21071   ins_encode %{
21072     assert(UseAVX > 2, "required");
21073 
21074     int vlen_enc = vector_length_encoding(this);
21075     int opcode = this->ideal_Opcode();
21076     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21077     assert(elem_bt == T_LONG, "sanity");
21078 
21079     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21080   %}
21081   ins_pipe( pipe_slow );
21082 %}
21083 
21084 // Float/Double vector Min/Max
21085 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21086   predicate(VM_Version::supports_avx10_2() &&
21087             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21088   match(Set dst (MinV a b));
21089   match(Set dst (MaxV a b));
21090   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21091   ins_encode %{
21092     int vlen_enc = vector_length_encoding(this);
21093     int opcode = this->ideal_Opcode();
21094     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21095     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21096   %}
21097   ins_pipe( pipe_slow );
21098 %}
21099 
21100 // Float/Double vector Min/Max
21101 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21102   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21103             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21104             UseAVX > 0);
21105   match(Set dst (MinV a b));
21106   match(Set dst (MaxV a b));
21107   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21108   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21109   ins_encode %{
21110     assert(UseAVX > 0, "required");
21111 
21112     int opcode = this->ideal_Opcode();
21113     int vlen_enc = vector_length_encoding(this);
21114     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21115 
21116     __ vminmax_fp(opcode, elem_bt,
21117                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21118                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21119   %}
21120   ins_pipe( pipe_slow );
21121 %}
21122 
21123 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21124   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21125             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21126   match(Set dst (MinV a b));
21127   match(Set dst (MaxV a b));
21128   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21129   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21130   ins_encode %{
21131     assert(UseAVX > 2, "required");
21132 
21133     int opcode = this->ideal_Opcode();
21134     int vlen_enc = vector_length_encoding(this);
21135     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21136 
21137     __ evminmax_fp(opcode, elem_bt,
21138                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21139                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21140   %}
21141   ins_pipe( pipe_slow );
21142 %}
21143 
21144 // ------------------------------ Unsigned vector Min/Max ----------------------
21145 
21146 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21147   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21148   match(Set dst (UMinV a b));
21149   match(Set dst (UMaxV a b));
21150   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21151   ins_encode %{
21152     int opcode = this->ideal_Opcode();
21153     int vlen_enc = vector_length_encoding(this);
21154     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21155     assert(is_integral_type(elem_bt), "");
21156     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21157   %}
21158   ins_pipe( pipe_slow );
21159 %}
21160 
21161 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21162   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21163   match(Set dst (UMinV a (LoadVector b)));
21164   match(Set dst (UMaxV a (LoadVector b)));
21165   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21166   ins_encode %{
21167     int opcode = this->ideal_Opcode();
21168     int vlen_enc = vector_length_encoding(this);
21169     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21170     assert(is_integral_type(elem_bt), "");
21171     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21172   %}
21173   ins_pipe( pipe_slow );
21174 %}
21175 
21176 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21177   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21178   match(Set dst (UMinV a b));
21179   match(Set dst (UMaxV a b));
21180   effect(TEMP xtmp1, TEMP xtmp2);
21181   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21182   ins_encode %{
21183     int opcode = this->ideal_Opcode();
21184     int vlen_enc = vector_length_encoding(this);
21185     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21186   %}
21187   ins_pipe( pipe_slow );
21188 %}
21189 
21190 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21191   match(Set dst (UMinV (Binary dst src2) mask));
21192   match(Set dst (UMaxV (Binary dst src2) mask));
21193   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21194   ins_encode %{
21195     int vlen_enc = vector_length_encoding(this);
21196     BasicType bt = Matcher::vector_element_basic_type(this);
21197     int opc = this->ideal_Opcode();
21198     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21199                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21200   %}
21201   ins_pipe( pipe_slow );
21202 %}
21203 
21204 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21205   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21206   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21207   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21208   ins_encode %{
21209     int vlen_enc = vector_length_encoding(this);
21210     BasicType bt = Matcher::vector_element_basic_type(this);
21211     int opc = this->ideal_Opcode();
21212     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21213                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21214   %}
21215   ins_pipe( pipe_slow );
21216 %}
21217 
21218 // --------------------------------- Signum/CopySign ---------------------------
21219 
21220 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21221   match(Set dst (SignumF dst (Binary zero one)));
21222   effect(KILL cr);
21223   format %{ "signumF $dst, $dst" %}
21224   ins_encode %{
21225     int opcode = this->ideal_Opcode();
21226     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21227   %}
21228   ins_pipe( pipe_slow );
21229 %}
21230 
21231 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21232   match(Set dst (SignumD dst (Binary zero one)));
21233   effect(KILL cr);
21234   format %{ "signumD $dst, $dst" %}
21235   ins_encode %{
21236     int opcode = this->ideal_Opcode();
21237     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21238   %}
21239   ins_pipe( pipe_slow );
21240 %}
21241 
21242 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21243   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21244   match(Set dst (SignumVF src (Binary zero one)));
21245   match(Set dst (SignumVD src (Binary zero one)));
21246   effect(TEMP dst, TEMP xtmp1);
21247   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21248   ins_encode %{
21249     int opcode = this->ideal_Opcode();
21250     int vec_enc = vector_length_encoding(this);
21251     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21252                          $xtmp1$$XMMRegister, vec_enc);
21253   %}
21254   ins_pipe( pipe_slow );
21255 %}
21256 
21257 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21258   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21259   match(Set dst (SignumVF src (Binary zero one)));
21260   match(Set dst (SignumVD src (Binary zero one)));
21261   effect(TEMP dst, TEMP ktmp1);
21262   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21263   ins_encode %{
21264     int opcode = this->ideal_Opcode();
21265     int vec_enc = vector_length_encoding(this);
21266     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21267                           $ktmp1$$KRegister, vec_enc);
21268   %}
21269   ins_pipe( pipe_slow );
21270 %}
21271 
21272 // ---------------------------------------
21273 // For copySign use 0xE4 as writemask for vpternlog
21274 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21275 // C (xmm2) is set to 0x7FFFFFFF
21276 // Wherever xmm2 is 0, we want to pick from B (sign)
21277 // Wherever xmm2 is 1, we want to pick from A (src)
21278 //
21279 // A B C Result
21280 // 0 0 0 0
21281 // 0 0 1 0
21282 // 0 1 0 1
21283 // 0 1 1 0
21284 // 1 0 0 0
21285 // 1 0 1 1
21286 // 1 1 0 1
21287 // 1 1 1 1
21288 //
21289 // Result going from high bit to low bit is 0x11100100 = 0xe4
21290 // ---------------------------------------
21291 
21292 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21293   match(Set dst (CopySignF dst src));
21294   effect(TEMP tmp1, TEMP tmp2);
21295   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21296   ins_encode %{
21297     __ movl($tmp2$$Register, 0x7FFFFFFF);
21298     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21299     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21300   %}
21301   ins_pipe( pipe_slow );
21302 %}
21303 
21304 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21305   match(Set dst (CopySignD dst (Binary src zero)));
21306   ins_cost(100);
21307   effect(TEMP tmp1, TEMP tmp2);
21308   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21309   ins_encode %{
21310     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21311     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21312     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21313   %}
21314   ins_pipe( pipe_slow );
21315 %}
21316 
21317 //----------------------------- CompressBits/ExpandBits ------------------------
21318 
21319 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21320   predicate(n->bottom_type()->isa_int());
21321   match(Set dst (CompressBits src mask));
21322   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21323   ins_encode %{
21324     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21330   predicate(n->bottom_type()->isa_int());
21331   match(Set dst (ExpandBits src mask));
21332   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21333   ins_encode %{
21334     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21335   %}
21336   ins_pipe( pipe_slow );
21337 %}
21338 
21339 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21340   predicate(n->bottom_type()->isa_int());
21341   match(Set dst (CompressBits src (LoadI mask)));
21342   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21343   ins_encode %{
21344     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21345   %}
21346   ins_pipe( pipe_slow );
21347 %}
21348 
21349 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21350   predicate(n->bottom_type()->isa_int());
21351   match(Set dst (ExpandBits src (LoadI mask)));
21352   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21353   ins_encode %{
21354     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21355   %}
21356   ins_pipe( pipe_slow );
21357 %}
21358 
21359 // --------------------------------- Sqrt --------------------------------------
21360 
21361 instruct vsqrtF_reg(vec dst, vec src) %{
21362   match(Set dst (SqrtVF src));
21363   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21364   ins_encode %{
21365     assert(UseAVX > 0, "required");
21366     int vlen_enc = vector_length_encoding(this);
21367     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21368   %}
21369   ins_pipe( pipe_slow );
21370 %}
21371 
21372 instruct vsqrtF_mem(vec dst, memory mem) %{
21373   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21374   match(Set dst (SqrtVF (LoadVector mem)));
21375   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21376   ins_encode %{
21377     assert(UseAVX > 0, "required");
21378     int vlen_enc = vector_length_encoding(this);
21379     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21380   %}
21381   ins_pipe( pipe_slow );
21382 %}
21383 
21384 // Floating point vector sqrt
21385 instruct vsqrtD_reg(vec dst, vec src) %{
21386   match(Set dst (SqrtVD src));
21387   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21388   ins_encode %{
21389     assert(UseAVX > 0, "required");
21390     int vlen_enc = vector_length_encoding(this);
21391     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21392   %}
21393   ins_pipe( pipe_slow );
21394 %}
21395 
21396 instruct vsqrtD_mem(vec dst, memory mem) %{
21397   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21398   match(Set dst (SqrtVD (LoadVector mem)));
21399   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21400   ins_encode %{
21401     assert(UseAVX > 0, "required");
21402     int vlen_enc = vector_length_encoding(this);
21403     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21404   %}
21405   ins_pipe( pipe_slow );
21406 %}
21407 
21408 // ------------------------------ Shift ---------------------------------------
21409 
21410 // Left and right shift count vectors are the same on x86
21411 // (only lowest bits of xmm reg are used for count).
21412 instruct vshiftcnt(vec dst, rRegI cnt) %{
21413   match(Set dst (LShiftCntV cnt));
21414   match(Set dst (RShiftCntV cnt));
21415   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21416   ins_encode %{
21417     __ movdl($dst$$XMMRegister, $cnt$$Register);
21418   %}
21419   ins_pipe( pipe_slow );
21420 %}
21421 
21422 // Byte vector shift
21423 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21424   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21425   match(Set dst ( LShiftVB src shift));
21426   match(Set dst ( RShiftVB src shift));
21427   match(Set dst (URShiftVB src shift));
21428   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21429   format %{"vector_byte_shift $dst,$src,$shift" %}
21430   ins_encode %{
21431     assert(UseSSE > 3, "required");
21432     int opcode = this->ideal_Opcode();
21433     bool sign = (opcode != Op_URShiftVB);
21434     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21435     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21436     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21437     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21438     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21439   %}
21440   ins_pipe( pipe_slow );
21441 %}
21442 
21443 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21444   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21445             UseAVX <= 1);
21446   match(Set dst ( LShiftVB src shift));
21447   match(Set dst ( RShiftVB src shift));
21448   match(Set dst (URShiftVB src shift));
21449   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21450   format %{"vector_byte_shift $dst,$src,$shift" %}
21451   ins_encode %{
21452     assert(UseSSE > 3, "required");
21453     int opcode = this->ideal_Opcode();
21454     bool sign = (opcode != Op_URShiftVB);
21455     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21456     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21457     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21458     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21459     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21460     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21461     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21462     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21463     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21464   %}
21465   ins_pipe( pipe_slow );
21466 %}
21467 
21468 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21469   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21470             UseAVX > 1);
21471   match(Set dst ( LShiftVB src shift));
21472   match(Set dst ( RShiftVB src shift));
21473   match(Set dst (URShiftVB src shift));
21474   effect(TEMP dst, TEMP tmp);
21475   format %{"vector_byte_shift $dst,$src,$shift" %}
21476   ins_encode %{
21477     int opcode = this->ideal_Opcode();
21478     bool sign = (opcode != Op_URShiftVB);
21479     int vlen_enc = Assembler::AVX_256bit;
21480     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21481     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21482     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21483     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21484     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21485   %}
21486   ins_pipe( pipe_slow );
21487 %}
21488 
21489 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21490   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21491   match(Set dst ( LShiftVB src shift));
21492   match(Set dst ( RShiftVB src shift));
21493   match(Set dst (URShiftVB src shift));
21494   effect(TEMP dst, TEMP tmp);
21495   format %{"vector_byte_shift $dst,$src,$shift" %}
21496   ins_encode %{
21497     assert(UseAVX > 1, "required");
21498     int opcode = this->ideal_Opcode();
21499     bool sign = (opcode != Op_URShiftVB);
21500     int vlen_enc = Assembler::AVX_256bit;
21501     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21502     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21503     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21504     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21505     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21506     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21507     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21508     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21509     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21510   %}
21511   ins_pipe( pipe_slow );
21512 %}
21513 
21514 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21515   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21516   match(Set dst ( LShiftVB src shift));
21517   match(Set dst  (RShiftVB src shift));
21518   match(Set dst (URShiftVB src shift));
21519   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21520   format %{"vector_byte_shift $dst,$src,$shift" %}
21521   ins_encode %{
21522     assert(UseAVX > 2, "required");
21523     int opcode = this->ideal_Opcode();
21524     bool sign = (opcode != Op_URShiftVB);
21525     int vlen_enc = Assembler::AVX_512bit;
21526     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21527     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21528     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21529     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21530     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21532     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21533     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21534     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21535     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21536     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21537     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21538   %}
21539   ins_pipe( pipe_slow );
21540 %}
21541 
21542 // Shorts vector logical right shift produces incorrect Java result
21543 // for negative data because java code convert short value into int with
21544 // sign extension before a shift. But char vectors are fine since chars are
21545 // unsigned values.
21546 // Shorts/Chars vector left shift
21547 instruct vshiftS(vec dst, vec src, vec shift) %{
21548   predicate(!n->as_ShiftV()->is_var_shift());
21549   match(Set dst ( LShiftVS src shift));
21550   match(Set dst ( RShiftVS src shift));
21551   match(Set dst (URShiftVS src shift));
21552   effect(TEMP dst, USE src, USE shift);
21553   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21554   ins_encode %{
21555     int opcode = this->ideal_Opcode();
21556     if (UseAVX > 0) {
21557       int vlen_enc = vector_length_encoding(this);
21558       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21559     } else {
21560       int vlen = Matcher::vector_length(this);
21561       if (vlen == 2) {
21562         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21563         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21564       } else if (vlen == 4) {
21565         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21566         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21567       } else {
21568         assert (vlen == 8, "sanity");
21569         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21570         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21571       }
21572     }
21573   %}
21574   ins_pipe( pipe_slow );
21575 %}
21576 
21577 // Integers vector left shift
21578 instruct vshiftI(vec dst, vec src, vec shift) %{
21579   predicate(!n->as_ShiftV()->is_var_shift());
21580   match(Set dst ( LShiftVI src shift));
21581   match(Set dst ( RShiftVI src shift));
21582   match(Set dst (URShiftVI src shift));
21583   effect(TEMP dst, USE src, USE shift);
21584   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21585   ins_encode %{
21586     int opcode = this->ideal_Opcode();
21587     if (UseAVX > 0) {
21588       int vlen_enc = vector_length_encoding(this);
21589       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21590     } else {
21591       int vlen = Matcher::vector_length(this);
21592       if (vlen == 2) {
21593         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21594         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21595       } else {
21596         assert(vlen == 4, "sanity");
21597         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21598         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21599       }
21600     }
21601   %}
21602   ins_pipe( pipe_slow );
21603 %}
21604 
21605 // Integers vector left constant shift
21606 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21607   match(Set dst (LShiftVI src (LShiftCntV shift)));
21608   match(Set dst (RShiftVI src (RShiftCntV shift)));
21609   match(Set dst (URShiftVI src (RShiftCntV shift)));
21610   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21611   ins_encode %{
21612     int opcode = this->ideal_Opcode();
21613     if (UseAVX > 0) {
21614       int vector_len = vector_length_encoding(this);
21615       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21616     } else {
21617       int vlen = Matcher::vector_length(this);
21618       if (vlen == 2) {
21619         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21620         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21621       } else {
21622         assert(vlen == 4, "sanity");
21623         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21624         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21625       }
21626     }
21627   %}
21628   ins_pipe( pipe_slow );
21629 %}
21630 
21631 // Longs vector shift
21632 instruct vshiftL(vec dst, vec src, vec shift) %{
21633   predicate(!n->as_ShiftV()->is_var_shift());
21634   match(Set dst ( LShiftVL src shift));
21635   match(Set dst (URShiftVL src shift));
21636   effect(TEMP dst, USE src, USE shift);
21637   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21638   ins_encode %{
21639     int opcode = this->ideal_Opcode();
21640     if (UseAVX > 0) {
21641       int vlen_enc = vector_length_encoding(this);
21642       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21643     } else {
21644       assert(Matcher::vector_length(this) == 2, "");
21645       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21646       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21647     }
21648   %}
21649   ins_pipe( pipe_slow );
21650 %}
21651 
21652 // Longs vector constant shift
21653 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21654   match(Set dst (LShiftVL src (LShiftCntV shift)));
21655   match(Set dst (URShiftVL src (RShiftCntV shift)));
21656   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21657   ins_encode %{
21658     int opcode = this->ideal_Opcode();
21659     if (UseAVX > 0) {
21660       int vector_len = vector_length_encoding(this);
21661       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21662     } else {
21663       assert(Matcher::vector_length(this) == 2, "");
21664       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21665       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21666     }
21667   %}
21668   ins_pipe( pipe_slow );
21669 %}
21670 
21671 // -------------------ArithmeticRightShift -----------------------------------
21672 // Long vector arithmetic right shift
21673 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21674   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21675   match(Set dst (RShiftVL src shift));
21676   effect(TEMP dst, TEMP tmp);
21677   format %{ "vshiftq $dst,$src,$shift" %}
21678   ins_encode %{
21679     uint vlen = Matcher::vector_length(this);
21680     if (vlen == 2) {
21681       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21682       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21683       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21684       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21685       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21686       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21687     } else {
21688       assert(vlen == 4, "sanity");
21689       assert(UseAVX > 1, "required");
21690       int vlen_enc = Assembler::AVX_256bit;
21691       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21692       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21693       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21694       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21695       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21696     }
21697   %}
21698   ins_pipe( pipe_slow );
21699 %}
21700 
21701 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21702   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21703   match(Set dst (RShiftVL src shift));
21704   format %{ "vshiftq $dst,$src,$shift" %}
21705   ins_encode %{
21706     int vlen_enc = vector_length_encoding(this);
21707     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21708   %}
21709   ins_pipe( pipe_slow );
21710 %}
21711 
21712 // ------------------- Variable Shift -----------------------------
21713 // Byte variable shift
21714 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21715   predicate(Matcher::vector_length(n) <= 8 &&
21716             n->as_ShiftV()->is_var_shift() &&
21717             !VM_Version::supports_avx512bw());
21718   match(Set dst ( LShiftVB src shift));
21719   match(Set dst ( RShiftVB src shift));
21720   match(Set dst (URShiftVB src shift));
21721   effect(TEMP dst, TEMP vtmp);
21722   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21723   ins_encode %{
21724     assert(UseAVX >= 2, "required");
21725 
21726     int opcode = this->ideal_Opcode();
21727     int vlen_enc = Assembler::AVX_128bit;
21728     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21729     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21730   %}
21731   ins_pipe( pipe_slow );
21732 %}
21733 
21734 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21735   predicate(Matcher::vector_length(n) == 16 &&
21736             n->as_ShiftV()->is_var_shift() &&
21737             !VM_Version::supports_avx512bw());
21738   match(Set dst ( LShiftVB src shift));
21739   match(Set dst ( RShiftVB src shift));
21740   match(Set dst (URShiftVB src shift));
21741   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21742   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21743   ins_encode %{
21744     assert(UseAVX >= 2, "required");
21745 
21746     int opcode = this->ideal_Opcode();
21747     int vlen_enc = Assembler::AVX_128bit;
21748     // Shift lower half and get word result in dst
21749     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21750 
21751     // Shift upper half and get word result in vtmp1
21752     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21753     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21754     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21755 
21756     // Merge and down convert the two word results to byte in dst
21757     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21758   %}
21759   ins_pipe( pipe_slow );
21760 %}
21761 
21762 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21763   predicate(Matcher::vector_length(n) == 32 &&
21764             n->as_ShiftV()->is_var_shift() &&
21765             !VM_Version::supports_avx512bw());
21766   match(Set dst ( LShiftVB src shift));
21767   match(Set dst ( RShiftVB src shift));
21768   match(Set dst (URShiftVB src shift));
21769   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21770   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21771   ins_encode %{
21772     assert(UseAVX >= 2, "required");
21773 
21774     int opcode = this->ideal_Opcode();
21775     int vlen_enc = Assembler::AVX_128bit;
21776     // Process lower 128 bits and get result in dst
21777     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21778     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21779     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21780     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21781     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21782 
21783     // Process higher 128 bits and get result in vtmp3
21784     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21785     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21786     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21787     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21788     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21789     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21790     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21791 
21792     // Merge the two results in dst
21793     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21794   %}
21795   ins_pipe( pipe_slow );
21796 %}
21797 
21798 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21799   predicate(Matcher::vector_length(n) <= 32 &&
21800             n->as_ShiftV()->is_var_shift() &&
21801             VM_Version::supports_avx512bw());
21802   match(Set dst ( LShiftVB src shift));
21803   match(Set dst ( RShiftVB src shift));
21804   match(Set dst (URShiftVB src shift));
21805   effect(TEMP dst, TEMP vtmp);
21806   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21807   ins_encode %{
21808     assert(UseAVX > 2, "required");
21809 
21810     int opcode = this->ideal_Opcode();
21811     int vlen_enc = vector_length_encoding(this);
21812     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21813   %}
21814   ins_pipe( pipe_slow );
21815 %}
21816 
21817 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21818   predicate(Matcher::vector_length(n) == 64 &&
21819             n->as_ShiftV()->is_var_shift() &&
21820             VM_Version::supports_avx512bw());
21821   match(Set dst ( LShiftVB src shift));
21822   match(Set dst ( RShiftVB src shift));
21823   match(Set dst (URShiftVB src shift));
21824   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21825   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21826   ins_encode %{
21827     assert(UseAVX > 2, "required");
21828 
21829     int opcode = this->ideal_Opcode();
21830     int vlen_enc = Assembler::AVX_256bit;
21831     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21832     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21833     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21834     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21835     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21836   %}
21837   ins_pipe( pipe_slow );
21838 %}
21839 
21840 // Short variable shift
21841 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21842   predicate(Matcher::vector_length(n) <= 8 &&
21843             n->as_ShiftV()->is_var_shift() &&
21844             !VM_Version::supports_avx512bw());
21845   match(Set dst ( LShiftVS src shift));
21846   match(Set dst ( RShiftVS src shift));
21847   match(Set dst (URShiftVS src shift));
21848   effect(TEMP dst, TEMP vtmp);
21849   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21850   ins_encode %{
21851     assert(UseAVX >= 2, "required");
21852 
21853     int opcode = this->ideal_Opcode();
21854     bool sign = (opcode != Op_URShiftVS);
21855     int vlen_enc = Assembler::AVX_256bit;
21856     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21857     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21858     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21859     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21860     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21861     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21862   %}
21863   ins_pipe( pipe_slow );
21864 %}
21865 
21866 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21867   predicate(Matcher::vector_length(n) == 16 &&
21868             n->as_ShiftV()->is_var_shift() &&
21869             !VM_Version::supports_avx512bw());
21870   match(Set dst ( LShiftVS src shift));
21871   match(Set dst ( RShiftVS src shift));
21872   match(Set dst (URShiftVS src shift));
21873   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21874   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21875   ins_encode %{
21876     assert(UseAVX >= 2, "required");
21877 
21878     int opcode = this->ideal_Opcode();
21879     bool sign = (opcode != Op_URShiftVS);
21880     int vlen_enc = Assembler::AVX_256bit;
21881     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21882     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21883     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21884     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21885     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21886 
21887     // Shift upper half, with result in dst using vtmp1 as TEMP
21888     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21889     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21890     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21891     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21892     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21893     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21894 
21895     // Merge lower and upper half result into dst
21896     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21897     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21898   %}
21899   ins_pipe( pipe_slow );
21900 %}
21901 
21902 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21903   predicate(n->as_ShiftV()->is_var_shift() &&
21904             VM_Version::supports_avx512bw());
21905   match(Set dst ( LShiftVS src shift));
21906   match(Set dst ( RShiftVS src shift));
21907   match(Set dst (URShiftVS src shift));
21908   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21909   ins_encode %{
21910     assert(UseAVX > 2, "required");
21911 
21912     int opcode = this->ideal_Opcode();
21913     int vlen_enc = vector_length_encoding(this);
21914     if (!VM_Version::supports_avx512vl()) {
21915       vlen_enc = Assembler::AVX_512bit;
21916     }
21917     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21918   %}
21919   ins_pipe( pipe_slow );
21920 %}
21921 
21922 //Integer variable shift
21923 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21924   predicate(n->as_ShiftV()->is_var_shift());
21925   match(Set dst ( LShiftVI src shift));
21926   match(Set dst ( RShiftVI src shift));
21927   match(Set dst (URShiftVI src shift));
21928   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21929   ins_encode %{
21930     assert(UseAVX >= 2, "required");
21931 
21932     int opcode = this->ideal_Opcode();
21933     int vlen_enc = vector_length_encoding(this);
21934     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21935   %}
21936   ins_pipe( pipe_slow );
21937 %}
21938 
21939 //Long variable shift
21940 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21941   predicate(n->as_ShiftV()->is_var_shift());
21942   match(Set dst ( LShiftVL src shift));
21943   match(Set dst (URShiftVL src shift));
21944   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21945   ins_encode %{
21946     assert(UseAVX >= 2, "required");
21947 
21948     int opcode = this->ideal_Opcode();
21949     int vlen_enc = vector_length_encoding(this);
21950     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21951   %}
21952   ins_pipe( pipe_slow );
21953 %}
21954 
21955 //Long variable right shift arithmetic
21956 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21957   predicate(Matcher::vector_length(n) <= 4 &&
21958             n->as_ShiftV()->is_var_shift() &&
21959             UseAVX == 2);
21960   match(Set dst (RShiftVL src shift));
21961   effect(TEMP dst, TEMP vtmp);
21962   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21963   ins_encode %{
21964     int opcode = this->ideal_Opcode();
21965     int vlen_enc = vector_length_encoding(this);
21966     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21967                  $vtmp$$XMMRegister);
21968   %}
21969   ins_pipe( pipe_slow );
21970 %}
21971 
21972 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21973   predicate(n->as_ShiftV()->is_var_shift() &&
21974             UseAVX > 2);
21975   match(Set dst (RShiftVL src shift));
21976   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21977   ins_encode %{
21978     int opcode = this->ideal_Opcode();
21979     int vlen_enc = vector_length_encoding(this);
21980     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21981   %}
21982   ins_pipe( pipe_slow );
21983 %}
21984 
21985 // --------------------------------- AND --------------------------------------
21986 
21987 instruct vand(vec dst, vec src) %{
21988   predicate(UseAVX == 0);
21989   match(Set dst (AndV dst src));
21990   format %{ "pand    $dst,$src\t! and vectors" %}
21991   ins_encode %{
21992     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21993   %}
21994   ins_pipe( pipe_slow );
21995 %}
21996 
21997 instruct vand_reg(vec dst, vec src1, vec src2) %{
21998   predicate(UseAVX > 0);
21999   match(Set dst (AndV src1 src2));
22000   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22001   ins_encode %{
22002     int vlen_enc = vector_length_encoding(this);
22003     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22004   %}
22005   ins_pipe( pipe_slow );
22006 %}
22007 
22008 instruct vand_mem(vec dst, vec src, memory mem) %{
22009   predicate((UseAVX > 0) &&
22010             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22011   match(Set dst (AndV src (LoadVector mem)));
22012   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22013   ins_encode %{
22014     int vlen_enc = vector_length_encoding(this);
22015     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22016   %}
22017   ins_pipe( pipe_slow );
22018 %}
22019 
22020 // --------------------------------- OR ---------------------------------------
22021 
22022 instruct vor(vec dst, vec src) %{
22023   predicate(UseAVX == 0);
22024   match(Set dst (OrV dst src));
22025   format %{ "por     $dst,$src\t! or vectors" %}
22026   ins_encode %{
22027     __ por($dst$$XMMRegister, $src$$XMMRegister);
22028   %}
22029   ins_pipe( pipe_slow );
22030 %}
22031 
22032 instruct vor_reg(vec dst, vec src1, vec src2) %{
22033   predicate(UseAVX > 0);
22034   match(Set dst (OrV src1 src2));
22035   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22036   ins_encode %{
22037     int vlen_enc = vector_length_encoding(this);
22038     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22039   %}
22040   ins_pipe( pipe_slow );
22041 %}
22042 
22043 instruct vor_mem(vec dst, vec src, memory mem) %{
22044   predicate((UseAVX > 0) &&
22045             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22046   match(Set dst (OrV src (LoadVector mem)));
22047   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22048   ins_encode %{
22049     int vlen_enc = vector_length_encoding(this);
22050     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22051   %}
22052   ins_pipe( pipe_slow );
22053 %}
22054 
22055 // --------------------------------- XOR --------------------------------------
22056 
22057 instruct vxor(vec dst, vec src) %{
22058   predicate(UseAVX == 0);
22059   match(Set dst (XorV dst src));
22060   format %{ "pxor    $dst,$src\t! xor vectors" %}
22061   ins_encode %{
22062     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22063   %}
22064   ins_pipe( pipe_slow );
22065 %}
22066 
22067 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22068   predicate(UseAVX > 0);
22069   match(Set dst (XorV src1 src2));
22070   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22071   ins_encode %{
22072     int vlen_enc = vector_length_encoding(this);
22073     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22074   %}
22075   ins_pipe( pipe_slow );
22076 %}
22077 
22078 instruct vxor_mem(vec dst, vec src, memory mem) %{
22079   predicate((UseAVX > 0) &&
22080             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22081   match(Set dst (XorV src (LoadVector mem)));
22082   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22083   ins_encode %{
22084     int vlen_enc = vector_length_encoding(this);
22085     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22086   %}
22087   ins_pipe( pipe_slow );
22088 %}
22089 
22090 // --------------------------------- VectorCast --------------------------------------
22091 
22092 instruct vcastBtoX(vec dst, vec src) %{
22093   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22094   match(Set dst (VectorCastB2X src));
22095   format %{ "vector_cast_b2x $dst,$src\t!" %}
22096   ins_encode %{
22097     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22098     int vlen_enc = vector_length_encoding(this);
22099     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22100   %}
22101   ins_pipe( pipe_slow );
22102 %}
22103 
22104 instruct vcastBtoD(legVec dst, legVec src) %{
22105   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22106   match(Set dst (VectorCastB2X src));
22107   format %{ "vector_cast_b2x $dst,$src\t!" %}
22108   ins_encode %{
22109     int vlen_enc = vector_length_encoding(this);
22110     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22111   %}
22112   ins_pipe( pipe_slow );
22113 %}
22114 
22115 instruct castStoX(vec dst, vec src) %{
22116   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22117             Matcher::vector_length(n->in(1)) <= 8 && // src
22118             Matcher::vector_element_basic_type(n) == T_BYTE);
22119   match(Set dst (VectorCastS2X src));
22120   format %{ "vector_cast_s2x $dst,$src" %}
22121   ins_encode %{
22122     assert(UseAVX > 0, "required");
22123 
22124     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22125     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22126   %}
22127   ins_pipe( pipe_slow );
22128 %}
22129 
22130 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22131   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22132             Matcher::vector_length(n->in(1)) == 16 && // src
22133             Matcher::vector_element_basic_type(n) == T_BYTE);
22134   effect(TEMP dst, TEMP vtmp);
22135   match(Set dst (VectorCastS2X src));
22136   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22137   ins_encode %{
22138     assert(UseAVX > 0, "required");
22139 
22140     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22141     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22142     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22143     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22144   %}
22145   ins_pipe( pipe_slow );
22146 %}
22147 
22148 instruct vcastStoX_evex(vec dst, vec src) %{
22149   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22150             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22151   match(Set dst (VectorCastS2X src));
22152   format %{ "vector_cast_s2x $dst,$src\t!" %}
22153   ins_encode %{
22154     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22155     int src_vlen_enc = vector_length_encoding(this, $src);
22156     int vlen_enc = vector_length_encoding(this);
22157     switch (to_elem_bt) {
22158       case T_BYTE:
22159         if (!VM_Version::supports_avx512vl()) {
22160           vlen_enc = Assembler::AVX_512bit;
22161         }
22162         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22163         break;
22164       case T_INT:
22165         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22166         break;
22167       case T_FLOAT:
22168         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22169         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22170         break;
22171       case T_LONG:
22172         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22173         break;
22174       case T_DOUBLE: {
22175         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22176         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22177         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22178         break;
22179       }
22180       default:
22181         ShouldNotReachHere();
22182     }
22183   %}
22184   ins_pipe( pipe_slow );
22185 %}
22186 
22187 instruct castItoX(vec dst, vec src) %{
22188   predicate(UseAVX <= 2 &&
22189             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22190             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22191   match(Set dst (VectorCastI2X src));
22192   format %{ "vector_cast_i2x $dst,$src" %}
22193   ins_encode %{
22194     assert(UseAVX > 0, "required");
22195 
22196     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22197     int vlen_enc = vector_length_encoding(this, $src);
22198 
22199     if (to_elem_bt == T_BYTE) {
22200       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22201       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22202       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22203     } else {
22204       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22205       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22206       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22207     }
22208   %}
22209   ins_pipe( pipe_slow );
22210 %}
22211 
22212 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22213   predicate(UseAVX <= 2 &&
22214             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22215             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22216   match(Set dst (VectorCastI2X src));
22217   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22218   effect(TEMP dst, TEMP vtmp);
22219   ins_encode %{
22220     assert(UseAVX > 0, "required");
22221 
22222     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22223     int vlen_enc = vector_length_encoding(this, $src);
22224 
22225     if (to_elem_bt == T_BYTE) {
22226       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22227       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22228       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22229       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22230     } else {
22231       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22232       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22233       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22234       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22235     }
22236   %}
22237   ins_pipe( pipe_slow );
22238 %}
22239 
22240 instruct vcastItoX_evex(vec dst, vec src) %{
22241   predicate(UseAVX > 2 ||
22242             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22243   match(Set dst (VectorCastI2X src));
22244   format %{ "vector_cast_i2x $dst,$src\t!" %}
22245   ins_encode %{
22246     assert(UseAVX > 0, "required");
22247 
22248     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22249     int src_vlen_enc = vector_length_encoding(this, $src);
22250     int dst_vlen_enc = vector_length_encoding(this);
22251     switch (dst_elem_bt) {
22252       case T_BYTE:
22253         if (!VM_Version::supports_avx512vl()) {
22254           src_vlen_enc = Assembler::AVX_512bit;
22255         }
22256         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22257         break;
22258       case T_SHORT:
22259         if (!VM_Version::supports_avx512vl()) {
22260           src_vlen_enc = Assembler::AVX_512bit;
22261         }
22262         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22263         break;
22264       case T_FLOAT:
22265         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22266         break;
22267       case T_LONG:
22268         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22269         break;
22270       case T_DOUBLE:
22271         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22272         break;
22273       default:
22274         ShouldNotReachHere();
22275     }
22276   %}
22277   ins_pipe( pipe_slow );
22278 %}
22279 
22280 instruct vcastLtoBS(vec dst, vec src) %{
22281   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22282             UseAVX <= 2);
22283   match(Set dst (VectorCastL2X src));
22284   format %{ "vector_cast_l2x  $dst,$src" %}
22285   ins_encode %{
22286     assert(UseAVX > 0, "required");
22287 
22288     int vlen = Matcher::vector_length_in_bytes(this, $src);
22289     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22290     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22291                                                       : ExternalAddress(vector_int_to_short_mask());
22292     if (vlen <= 16) {
22293       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22294       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22295       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22296     } else {
22297       assert(vlen <= 32, "required");
22298       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22299       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22300       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22301       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22302     }
22303     if (to_elem_bt == T_BYTE) {
22304       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22305     }
22306   %}
22307   ins_pipe( pipe_slow );
22308 %}
22309 
22310 instruct vcastLtoX_evex(vec dst, vec src) %{
22311   predicate(UseAVX > 2 ||
22312             (Matcher::vector_element_basic_type(n) == T_INT ||
22313              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22314              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22315   match(Set dst (VectorCastL2X src));
22316   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22317   ins_encode %{
22318     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22319     int vlen = Matcher::vector_length_in_bytes(this, $src);
22320     int vlen_enc = vector_length_encoding(this, $src);
22321     switch (to_elem_bt) {
22322       case T_BYTE:
22323         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22324           vlen_enc = Assembler::AVX_512bit;
22325         }
22326         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22327         break;
22328       case T_SHORT:
22329         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22330           vlen_enc = Assembler::AVX_512bit;
22331         }
22332         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22333         break;
22334       case T_INT:
22335         if (vlen == 8) {
22336           if ($dst$$XMMRegister != $src$$XMMRegister) {
22337             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22338           }
22339         } else if (vlen == 16) {
22340           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22341         } else if (vlen == 32) {
22342           if (UseAVX > 2) {
22343             if (!VM_Version::supports_avx512vl()) {
22344               vlen_enc = Assembler::AVX_512bit;
22345             }
22346             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347           } else {
22348             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22349             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22350           }
22351         } else { // vlen == 64
22352           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22353         }
22354         break;
22355       case T_FLOAT:
22356         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22357         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22358         break;
22359       case T_DOUBLE:
22360         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22361         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22362         break;
22363 
22364       default: assert(false, "%s", type2name(to_elem_bt));
22365     }
22366   %}
22367   ins_pipe( pipe_slow );
22368 %}
22369 
22370 instruct vcastFtoD_reg(vec dst, vec src) %{
22371   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22372   match(Set dst (VectorCastF2X src));
22373   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22374   ins_encode %{
22375     int vlen_enc = vector_length_encoding(this);
22376     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22377   %}
22378   ins_pipe( pipe_slow );
22379 %}
22380 
22381 
22382 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22383   predicate(!VM_Version::supports_avx10_2() &&
22384             !VM_Version::supports_avx512vl() &&
22385             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22386             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22387             is_integral_type(Matcher::vector_element_basic_type(n)));
22388   match(Set dst (VectorCastF2X src));
22389   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22390   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22391   ins_encode %{
22392     int vlen_enc = vector_length_encoding(this, $src);
22393     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22394     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22395     // 32 bit addresses for register indirect addressing mode since stub constants
22396     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22397     // However, targets are free to increase this limit, but having a large code cache size
22398     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22399     // cap we save a temporary register allocation which in limiting case can prevent
22400     // spilling in high register pressure blocks.
22401     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22402                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22403                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22404   %}
22405   ins_pipe( pipe_slow );
22406 %}
22407 
22408 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22409   predicate(!VM_Version::supports_avx10_2() &&
22410             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22411             is_integral_type(Matcher::vector_element_basic_type(n)));
22412   match(Set dst (VectorCastF2X src));
22413   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22414   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22415   ins_encode %{
22416     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22417     if (to_elem_bt == T_LONG) {
22418       int vlen_enc = vector_length_encoding(this);
22419       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22420                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22421                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22422     } else {
22423       int vlen_enc = vector_length_encoding(this, $src);
22424       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22425                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22426                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22427     }
22428   %}
22429   ins_pipe( pipe_slow );
22430 %}
22431 
22432 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22433   predicate(VM_Version::supports_avx10_2() &&
22434             is_integral_type(Matcher::vector_element_basic_type(n)));
22435   match(Set dst (VectorCastF2X src));
22436   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22437   ins_encode %{
22438     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22439     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22440     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22441   %}
22442   ins_pipe( pipe_slow );
22443 %}
22444 
22445 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22446   predicate(VM_Version::supports_avx10_2() &&
22447             is_integral_type(Matcher::vector_element_basic_type(n)));
22448   match(Set dst (VectorCastF2X (LoadVector src)));
22449   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22450   ins_encode %{
22451     int vlen = Matcher::vector_length(this);
22452     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22453     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22454     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22455   %}
22456   ins_pipe( pipe_slow );
22457 %}
22458 
22459 instruct vcastDtoF_reg(vec dst, vec src) %{
22460   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22461   match(Set dst (VectorCastD2X src));
22462   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22463   ins_encode %{
22464     int vlen_enc = vector_length_encoding(this, $src);
22465     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22466   %}
22467   ins_pipe( pipe_slow );
22468 %}
22469 
22470 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22471   predicate(!VM_Version::supports_avx10_2() &&
22472             !VM_Version::supports_avx512vl() &&
22473             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22474             is_integral_type(Matcher::vector_element_basic_type(n)));
22475   match(Set dst (VectorCastD2X src));
22476   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22477   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22478   ins_encode %{
22479     int vlen_enc = vector_length_encoding(this, $src);
22480     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22481     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22482                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22483                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22484   %}
22485   ins_pipe( pipe_slow );
22486 %}
22487 
22488 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22489   predicate(!VM_Version::supports_avx10_2() &&
22490             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22491             is_integral_type(Matcher::vector_element_basic_type(n)));
22492   match(Set dst (VectorCastD2X src));
22493   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22494   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22495   ins_encode %{
22496     int vlen_enc = vector_length_encoding(this, $src);
22497     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22498     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22499                               ExternalAddress(vector_float_signflip());
22500     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22501                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22502   %}
22503   ins_pipe( pipe_slow );
22504 %}
22505 
22506 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22507   predicate(VM_Version::supports_avx10_2() &&
22508             is_integral_type(Matcher::vector_element_basic_type(n)));
22509   match(Set dst (VectorCastD2X src));
22510   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22511   ins_encode %{
22512     int vlen_enc = vector_length_encoding(this, $src);
22513     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22514     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22515   %}
22516   ins_pipe( pipe_slow );
22517 %}
22518 
22519 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22520   predicate(VM_Version::supports_avx10_2() &&
22521             is_integral_type(Matcher::vector_element_basic_type(n)));
22522   match(Set dst (VectorCastD2X (LoadVector src)));
22523   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22524   ins_encode %{
22525     int vlen = Matcher::vector_length(this);
22526     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22527     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22528     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22529   %}
22530   ins_pipe( pipe_slow );
22531 %}
22532 
22533 instruct vucast(vec dst, vec src) %{
22534   match(Set dst (VectorUCastB2X src));
22535   match(Set dst (VectorUCastS2X src));
22536   match(Set dst (VectorUCastI2X src));
22537   format %{ "vector_ucast $dst,$src\t!" %}
22538   ins_encode %{
22539     assert(UseAVX > 0, "required");
22540 
22541     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22542     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22543     int vlen_enc = vector_length_encoding(this);
22544     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22545   %}
22546   ins_pipe( pipe_slow );
22547 %}
22548 
22549 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22550   predicate(!VM_Version::supports_avx512vl() &&
22551             Matcher::vector_length_in_bytes(n) < 64 &&
22552             Matcher::vector_element_basic_type(n) == T_INT);
22553   match(Set dst (RoundVF src));
22554   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22555   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22556   ins_encode %{
22557     int vlen_enc = vector_length_encoding(this);
22558     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22559     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22560                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22561                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22562   %}
22563   ins_pipe( pipe_slow );
22564 %}
22565 
22566 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22567   predicate((VM_Version::supports_avx512vl() ||
22568              Matcher::vector_length_in_bytes(n) == 64) &&
22569              Matcher::vector_element_basic_type(n) == T_INT);
22570   match(Set dst (RoundVF src));
22571   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22572   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22573   ins_encode %{
22574     int vlen_enc = vector_length_encoding(this);
22575     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22576     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22577                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22578                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22579   %}
22580   ins_pipe( pipe_slow );
22581 %}
22582 
22583 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22584   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22585   match(Set dst (RoundVD src));
22586   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22587   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22588   ins_encode %{
22589     int vlen_enc = vector_length_encoding(this);
22590     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22591     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22592                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22593                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22594   %}
22595   ins_pipe( pipe_slow );
22596 %}
22597 
22598 // --------------------------------- VectorMaskCmp --------------------------------------
22599 
22600 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22601   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22602             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22603             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22604             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22605   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22606   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22607   ins_encode %{
22608     int vlen_enc = vector_length_encoding(this, $src1);
22609     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22610     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22611       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22612     } else {
22613       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22614     }
22615   %}
22616   ins_pipe( pipe_slow );
22617 %}
22618 
22619 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22620   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22621             n->bottom_type()->isa_vectmask() == nullptr &&
22622             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22623   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22624   effect(TEMP ktmp);
22625   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22626   ins_encode %{
22627     int vlen_enc = Assembler::AVX_512bit;
22628     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22629     KRegister mask = k0; // The comparison itself is not being masked.
22630     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22631       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22632       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22633     } else {
22634       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22635       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22636     }
22637   %}
22638   ins_pipe( pipe_slow );
22639 %}
22640 
22641 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22642   predicate(n->bottom_type()->isa_vectmask() &&
22643             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22644   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22645   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22646   ins_encode %{
22647     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22648     int vlen_enc = vector_length_encoding(this, $src1);
22649     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22650     KRegister mask = k0; // The comparison itself is not being masked.
22651     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22652       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22653     } else {
22654       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22655     }
22656   %}
22657   ins_pipe( pipe_slow );
22658 %}
22659 
22660 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22661   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22662             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22663             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22664             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22665             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22666             (n->in(2)->get_int() == BoolTest::eq ||
22667              n->in(2)->get_int() == BoolTest::lt ||
22668              n->in(2)->get_int() == BoolTest::gt)); // cond
22669   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22670   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22671   ins_encode %{
22672     int vlen_enc = vector_length_encoding(this, $src1);
22673     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22674     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22675     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22676   %}
22677   ins_pipe( pipe_slow );
22678 %}
22679 
22680 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22681   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22682             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22683             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22684             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22685             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22686             (n->in(2)->get_int() == BoolTest::ne ||
22687              n->in(2)->get_int() == BoolTest::le ||
22688              n->in(2)->get_int() == BoolTest::ge)); // cond
22689   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22690   effect(TEMP dst, TEMP xtmp);
22691   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22692   ins_encode %{
22693     int vlen_enc = vector_length_encoding(this, $src1);
22694     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22695     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22696     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22697   %}
22698   ins_pipe( pipe_slow );
22699 %}
22700 
22701 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22702   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22703             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22704             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22705             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22706             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22707   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22708   effect(TEMP dst, TEMP xtmp);
22709   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22710   ins_encode %{
22711     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22712     int vlen_enc = vector_length_encoding(this, $src1);
22713     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22714     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22715 
22716     if (vlen_enc == Assembler::AVX_128bit) {
22717       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22718     } else {
22719       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22720     }
22721     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22722     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22723     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22724   %}
22725   ins_pipe( pipe_slow );
22726 %}
22727 
22728 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22729   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22730              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22731              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22732   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22733   effect(TEMP ktmp);
22734   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22735   ins_encode %{
22736     assert(UseAVX > 2, "required");
22737 
22738     int vlen_enc = vector_length_encoding(this, $src1);
22739     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22740     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22741     KRegister mask = k0; // The comparison itself is not being masked.
22742     bool merge = false;
22743     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22744 
22745     switch (src1_elem_bt) {
22746       case T_INT: {
22747         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22748         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22749         break;
22750       }
22751       case T_LONG: {
22752         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22753         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22754         break;
22755       }
22756       default: assert(false, "%s", type2name(src1_elem_bt));
22757     }
22758   %}
22759   ins_pipe( pipe_slow );
22760 %}
22761 
22762 
22763 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22764   predicate(n->bottom_type()->isa_vectmask() &&
22765             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22766   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22767   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22768   ins_encode %{
22769     assert(UseAVX > 2, "required");
22770     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22771 
22772     int vlen_enc = vector_length_encoding(this, $src1);
22773     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22774     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22775     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22776 
22777     // Comparison i
22778     switch (src1_elem_bt) {
22779       case T_BYTE: {
22780         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22781         break;
22782       }
22783       case T_SHORT: {
22784         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22785         break;
22786       }
22787       case T_INT: {
22788         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22789         break;
22790       }
22791       case T_LONG: {
22792         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22793         break;
22794       }
22795       default: assert(false, "%s", type2name(src1_elem_bt));
22796     }
22797   %}
22798   ins_pipe( pipe_slow );
22799 %}
22800 
22801 // Extract
22802 
22803 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22804   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22805   match(Set dst (ExtractI src idx));
22806   match(Set dst (ExtractS src idx));
22807   match(Set dst (ExtractB src idx));
22808   format %{ "extractI $dst,$src,$idx\t!" %}
22809   ins_encode %{
22810     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22811 
22812     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22813     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22814   %}
22815   ins_pipe( pipe_slow );
22816 %}
22817 
22818 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22819   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22820             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22821   match(Set dst (ExtractI src idx));
22822   match(Set dst (ExtractS src idx));
22823   match(Set dst (ExtractB src idx));
22824   effect(TEMP vtmp);
22825   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22826   ins_encode %{
22827     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22828 
22829     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22830     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22831     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22832   %}
22833   ins_pipe( pipe_slow );
22834 %}
22835 
22836 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22837   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22838   match(Set dst (ExtractL src idx));
22839   format %{ "extractL $dst,$src,$idx\t!" %}
22840   ins_encode %{
22841     assert(UseSSE >= 4, "required");
22842     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22843 
22844     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22845   %}
22846   ins_pipe( pipe_slow );
22847 %}
22848 
22849 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22850   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22851             Matcher::vector_length(n->in(1)) == 8);  // src
22852   match(Set dst (ExtractL src idx));
22853   effect(TEMP vtmp);
22854   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22855   ins_encode %{
22856     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22857 
22858     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22859     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22860   %}
22861   ins_pipe( pipe_slow );
22862 %}
22863 
22864 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22865   predicate(Matcher::vector_length(n->in(1)) <= 4);
22866   match(Set dst (ExtractF src idx));
22867   effect(TEMP dst, TEMP vtmp);
22868   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22869   ins_encode %{
22870     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22871 
22872     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22873   %}
22874   ins_pipe( pipe_slow );
22875 %}
22876 
22877 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22878   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22879             Matcher::vector_length(n->in(1)/*src*/) == 16);
22880   match(Set dst (ExtractF src idx));
22881   effect(TEMP vtmp);
22882   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22883   ins_encode %{
22884     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22885 
22886     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22887     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22888   %}
22889   ins_pipe( pipe_slow );
22890 %}
22891 
22892 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22893   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22894   match(Set dst (ExtractD src idx));
22895   format %{ "extractD $dst,$src,$idx\t!" %}
22896   ins_encode %{
22897     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22898 
22899     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22900   %}
22901   ins_pipe( pipe_slow );
22902 %}
22903 
22904 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22905   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22906             Matcher::vector_length(n->in(1)) == 8);  // src
22907   match(Set dst (ExtractD src idx));
22908   effect(TEMP vtmp);
22909   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22910   ins_encode %{
22911     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22912 
22913     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22914     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22915   %}
22916   ins_pipe( pipe_slow );
22917 %}
22918 
22919 // --------------------------------- Vector Blend --------------------------------------
22920 
22921 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22922   predicate(UseAVX == 0);
22923   match(Set dst (VectorBlend (Binary dst src) mask));
22924   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22925   effect(TEMP tmp);
22926   ins_encode %{
22927     assert(UseSSE >= 4, "required");
22928 
22929     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22930       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22931     }
22932     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22933   %}
22934   ins_pipe( pipe_slow );
22935 %}
22936 
22937 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22938   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22939             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22940             Matcher::vector_length_in_bytes(n) <= 32 &&
22941             is_integral_type(Matcher::vector_element_basic_type(n)));
22942   match(Set dst (VectorBlend (Binary src1 src2) mask));
22943   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22944   ins_encode %{
22945     int vlen_enc = vector_length_encoding(this);
22946     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22947   %}
22948   ins_pipe( pipe_slow );
22949 %}
22950 
22951 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22952   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22953             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22954             Matcher::vector_length_in_bytes(n) <= 32 &&
22955             !is_integral_type(Matcher::vector_element_basic_type(n)));
22956   match(Set dst (VectorBlend (Binary src1 src2) mask));
22957   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22958   ins_encode %{
22959     int vlen_enc = vector_length_encoding(this);
22960     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22961   %}
22962   ins_pipe( pipe_slow );
22963 %}
22964 
22965 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22966   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22967             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22968             Matcher::vector_length_in_bytes(n) <= 32);
22969   match(Set dst (VectorBlend (Binary src1 src2) mask));
22970   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22971   effect(TEMP vtmp, TEMP dst);
22972   ins_encode %{
22973     int vlen_enc = vector_length_encoding(this);
22974     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22975     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22976     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22977   %}
22978   ins_pipe( pipe_slow );
22979 %}
22980 
22981 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22982   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22983             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22984   match(Set dst (VectorBlend (Binary src1 src2) mask));
22985   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22986   effect(TEMP ktmp);
22987   ins_encode %{
22988      int vlen_enc = Assembler::AVX_512bit;
22989      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22990     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22991     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22992   %}
22993   ins_pipe( pipe_slow );
22994 %}
22995 
22996 
22997 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22998   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22999             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23000              VM_Version::supports_avx512bw()));
23001   match(Set dst (VectorBlend (Binary src1 src2) mask));
23002   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23003   ins_encode %{
23004     int vlen_enc = vector_length_encoding(this);
23005     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23006     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23007   %}
23008   ins_pipe( pipe_slow );
23009 %}
23010 
23011 // --------------------------------- ABS --------------------------------------
23012 // a = |a|
23013 instruct vabsB_reg(vec dst, vec src) %{
23014   match(Set dst (AbsVB  src));
23015   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23016   ins_encode %{
23017     uint vlen = Matcher::vector_length(this);
23018     if (vlen <= 16) {
23019       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23020     } else {
23021       int vlen_enc = vector_length_encoding(this);
23022       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23023     }
23024   %}
23025   ins_pipe( pipe_slow );
23026 %}
23027 
23028 instruct vabsS_reg(vec dst, vec src) %{
23029   match(Set dst (AbsVS  src));
23030   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23031   ins_encode %{
23032     uint vlen = Matcher::vector_length(this);
23033     if (vlen <= 8) {
23034       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23035     } else {
23036       int vlen_enc = vector_length_encoding(this);
23037       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23038     }
23039   %}
23040   ins_pipe( pipe_slow );
23041 %}
23042 
23043 instruct vabsI_reg(vec dst, vec src) %{
23044   match(Set dst (AbsVI  src));
23045   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23046   ins_encode %{
23047     uint vlen = Matcher::vector_length(this);
23048     if (vlen <= 4) {
23049       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23050     } else {
23051       int vlen_enc = vector_length_encoding(this);
23052       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23053     }
23054   %}
23055   ins_pipe( pipe_slow );
23056 %}
23057 
23058 instruct vabsL_reg(vec dst, vec src) %{
23059   match(Set dst (AbsVL  src));
23060   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23061   ins_encode %{
23062     assert(UseAVX > 2, "required");
23063     int vlen_enc = vector_length_encoding(this);
23064     if (!VM_Version::supports_avx512vl()) {
23065       vlen_enc = Assembler::AVX_512bit;
23066     }
23067     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23068   %}
23069   ins_pipe( pipe_slow );
23070 %}
23071 
23072 // --------------------------------- ABSNEG --------------------------------------
23073 
23074 instruct vabsnegF(vec dst, vec src) %{
23075   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23076   match(Set dst (AbsVF src));
23077   match(Set dst (NegVF src));
23078   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23079   ins_cost(150);
23080   ins_encode %{
23081     int opcode = this->ideal_Opcode();
23082     int vlen = Matcher::vector_length(this);
23083     if (vlen == 2) {
23084       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23085     } else {
23086       assert(vlen == 8 || vlen == 16, "required");
23087       int vlen_enc = vector_length_encoding(this);
23088       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23089     }
23090   %}
23091   ins_pipe( pipe_slow );
23092 %}
23093 
23094 instruct vabsneg4F(vec dst) %{
23095   predicate(Matcher::vector_length(n) == 4);
23096   match(Set dst (AbsVF dst));
23097   match(Set dst (NegVF dst));
23098   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23099   ins_cost(150);
23100   ins_encode %{
23101     int opcode = this->ideal_Opcode();
23102     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23103   %}
23104   ins_pipe( pipe_slow );
23105 %}
23106 
23107 instruct vabsnegD(vec dst, vec src) %{
23108   match(Set dst (AbsVD  src));
23109   match(Set dst (NegVD  src));
23110   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23111   ins_encode %{
23112     int opcode = this->ideal_Opcode();
23113     uint vlen = Matcher::vector_length(this);
23114     if (vlen == 2) {
23115       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23116     } else {
23117       int vlen_enc = vector_length_encoding(this);
23118       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23119     }
23120   %}
23121   ins_pipe( pipe_slow );
23122 %}
23123 
23124 //------------------------------------- VectorTest --------------------------------------------
23125 
23126 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23127   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23128   match(Set cr (VectorTest src1 src2));
23129   effect(TEMP vtmp);
23130   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23131   ins_encode %{
23132     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23133     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23134     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23135   %}
23136   ins_pipe( pipe_slow );
23137 %}
23138 
23139 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23140   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23141   match(Set cr (VectorTest src1 src2));
23142   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23143   ins_encode %{
23144     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23145     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23146     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23147   %}
23148   ins_pipe( pipe_slow );
23149 %}
23150 
23151 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23152   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23153              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23154             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23155   match(Set cr (VectorTest src1 src2));
23156   effect(TEMP tmp);
23157   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23158   ins_encode %{
23159     uint masklen = Matcher::vector_length(this, $src1);
23160     __ kmovwl($tmp$$Register, $src1$$KRegister);
23161     __ andl($tmp$$Register, (1 << masklen) - 1);
23162     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23163   %}
23164   ins_pipe( pipe_slow );
23165 %}
23166 
23167 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23168   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23169              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23170             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23171   match(Set cr (VectorTest src1 src2));
23172   effect(TEMP tmp);
23173   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23174   ins_encode %{
23175     uint masklen = Matcher::vector_length(this, $src1);
23176     __ kmovwl($tmp$$Register, $src1$$KRegister);
23177     __ andl($tmp$$Register, (1 << masklen) - 1);
23178   %}
23179   ins_pipe( pipe_slow );
23180 %}
23181 
23182 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23183   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23184             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23185   match(Set cr (VectorTest src1 src2));
23186   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23187   ins_encode %{
23188     uint masklen = Matcher::vector_length(this, $src1);
23189     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23190   %}
23191   ins_pipe( pipe_slow );
23192 %}
23193 
23194 //------------------------------------- LoadMask --------------------------------------------
23195 
23196 instruct loadMask(legVec dst, legVec src) %{
23197   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23198   match(Set dst (VectorLoadMask src));
23199   effect(TEMP dst);
23200   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23201   ins_encode %{
23202     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23203     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23204     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23205   %}
23206   ins_pipe( pipe_slow );
23207 %}
23208 
23209 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23210   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23211   match(Set dst (VectorLoadMask src));
23212   effect(TEMP xtmp);
23213   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23214   ins_encode %{
23215     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23216                         true, Assembler::AVX_512bit);
23217   %}
23218   ins_pipe( pipe_slow );
23219 %}
23220 
23221 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23222   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23223   match(Set dst (VectorLoadMask src));
23224   effect(TEMP xtmp);
23225   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23226   ins_encode %{
23227     int vlen_enc = vector_length_encoding(in(1));
23228     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23229                         false, vlen_enc);
23230   %}
23231   ins_pipe( pipe_slow );
23232 %}
23233 
23234 //------------------------------------- StoreMask --------------------------------------------
23235 
23236 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23237   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23238   match(Set dst (VectorStoreMask src size));
23239   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23240   ins_encode %{
23241     int vlen = Matcher::vector_length(this);
23242     if (vlen <= 16 && UseAVX <= 2) {
23243       assert(UseSSE >= 3, "required");
23244       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23245     } else {
23246       assert(UseAVX > 0, "required");
23247       int src_vlen_enc = vector_length_encoding(this, $src);
23248       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23249     }
23250   %}
23251   ins_pipe( pipe_slow );
23252 %}
23253 
23254 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23255   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23256   match(Set dst (VectorStoreMask src size));
23257   effect(TEMP_DEF dst, TEMP xtmp);
23258   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23259   ins_encode %{
23260     int vlen_enc = Assembler::AVX_128bit;
23261     int vlen = Matcher::vector_length(this);
23262     if (vlen <= 8) {
23263       assert(UseSSE >= 3, "required");
23264       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23265       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23266       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23267     } else {
23268       assert(UseAVX > 0, "required");
23269       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23270       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23271       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23272     }
23273   %}
23274   ins_pipe( pipe_slow );
23275 %}
23276 
23277 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23278   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23279   match(Set dst (VectorStoreMask src size));
23280   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23281   effect(TEMP_DEF dst, TEMP xtmp);
23282   ins_encode %{
23283     int vlen_enc = Assembler::AVX_128bit;
23284     int vlen = Matcher::vector_length(this);
23285     if (vlen <= 4) {
23286       assert(UseSSE >= 3, "required");
23287       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23288       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23289       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23290       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23291     } else {
23292       assert(UseAVX > 0, "required");
23293       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23294       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23295       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23296       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23297       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23298     }
23299   %}
23300   ins_pipe( pipe_slow );
23301 %}
23302 
23303 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23304   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23305   match(Set dst (VectorStoreMask src size));
23306   effect(TEMP_DEF dst, TEMP xtmp);
23307   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23308   ins_encode %{
23309     assert(UseSSE >= 3, "required");
23310     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23311     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23312     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23313     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23314     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23315   %}
23316   ins_pipe( pipe_slow );
23317 %}
23318 
23319 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23320   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23321   match(Set dst (VectorStoreMask src size));
23322   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23323   effect(TEMP_DEF dst, TEMP vtmp);
23324   ins_encode %{
23325     int vlen_enc = Assembler::AVX_128bit;
23326     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23327     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23328     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23329     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23330     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23333   %}
23334   ins_pipe( pipe_slow );
23335 %}
23336 
23337 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23338   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23339   match(Set dst (VectorStoreMask src size));
23340   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23341   ins_encode %{
23342     int src_vlen_enc = vector_length_encoding(this, $src);
23343     int dst_vlen_enc = vector_length_encoding(this);
23344     if (!VM_Version::supports_avx512vl()) {
23345       src_vlen_enc = Assembler::AVX_512bit;
23346     }
23347     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23348     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23349   %}
23350   ins_pipe( pipe_slow );
23351 %}
23352 
23353 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23354   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23355   match(Set dst (VectorStoreMask src size));
23356   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23357   ins_encode %{
23358     int src_vlen_enc = vector_length_encoding(this, $src);
23359     int dst_vlen_enc = vector_length_encoding(this);
23360     if (!VM_Version::supports_avx512vl()) {
23361       src_vlen_enc = Assembler::AVX_512bit;
23362     }
23363     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23364     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23365   %}
23366   ins_pipe( pipe_slow );
23367 %}
23368 
23369 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23370   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23371   match(Set dst (VectorStoreMask mask size));
23372   effect(TEMP_DEF dst);
23373   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23374   ins_encode %{
23375     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23376     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23377                  false, Assembler::AVX_512bit, noreg);
23378     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23379   %}
23380   ins_pipe( pipe_slow );
23381 %}
23382 
23383 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23384   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23385   match(Set dst (VectorStoreMask mask size));
23386   effect(TEMP_DEF dst);
23387   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23388   ins_encode %{
23389     int dst_vlen_enc = vector_length_encoding(this);
23390     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23391     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23392   %}
23393   ins_pipe( pipe_slow );
23394 %}
23395 
23396 instruct vmaskcast_evex(kReg dst) %{
23397   match(Set dst (VectorMaskCast dst));
23398   ins_cost(0);
23399   format %{ "vector_mask_cast $dst" %}
23400   ins_encode %{
23401     // empty
23402   %}
23403   ins_pipe(empty);
23404 %}
23405 
23406 instruct vmaskcast(vec dst) %{
23407   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23408   match(Set dst (VectorMaskCast dst));
23409   ins_cost(0);
23410   format %{ "vector_mask_cast $dst" %}
23411   ins_encode %{
23412     // empty
23413   %}
23414   ins_pipe(empty);
23415 %}
23416 
23417 instruct vmaskcast_avx(vec dst, vec src) %{
23418   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23419   match(Set dst (VectorMaskCast src));
23420   format %{ "vector_mask_cast $dst, $src" %}
23421   ins_encode %{
23422     int vlen = Matcher::vector_length(this);
23423     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23424     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23425     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23426   %}
23427   ins_pipe(pipe_slow);
23428 %}
23429 
23430 //-------------------------------- Load Iota Indices ----------------------------------
23431 
23432 instruct loadIotaIndices(vec dst, immI_0 src) %{
23433   match(Set dst (VectorLoadConst src));
23434   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23435   ins_encode %{
23436      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23437      BasicType bt = Matcher::vector_element_basic_type(this);
23438      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23439   %}
23440   ins_pipe( pipe_slow );
23441 %}
23442 
23443 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23444   match(Set dst (PopulateIndex src1 src2));
23445   effect(TEMP dst, TEMP vtmp);
23446   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23447   ins_encode %{
23448      assert($src2$$constant == 1, "required");
23449      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23450      int vlen_enc = vector_length_encoding(this);
23451      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23452      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23453      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23454      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23455   %}
23456   ins_pipe( pipe_slow );
23457 %}
23458 
23459 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23460   match(Set dst (PopulateIndex src1 src2));
23461   effect(TEMP dst, TEMP vtmp);
23462   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23463   ins_encode %{
23464      assert($src2$$constant == 1, "required");
23465      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23466      int vlen_enc = vector_length_encoding(this);
23467      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23468      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23469      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23470      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23471   %}
23472   ins_pipe( pipe_slow );
23473 %}
23474 
23475 //-------------------------------- Rearrange ----------------------------------
23476 
23477 // LoadShuffle/Rearrange for Byte
23478 instruct rearrangeB(vec dst, vec shuffle) %{
23479   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23480             Matcher::vector_length(n) < 32);
23481   match(Set dst (VectorRearrange dst shuffle));
23482   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23483   ins_encode %{
23484     assert(UseSSE >= 4, "required");
23485     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23486   %}
23487   ins_pipe( pipe_slow );
23488 %}
23489 
23490 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23491   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23492             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23493   match(Set dst (VectorRearrange src shuffle));
23494   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23495   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23496   ins_encode %{
23497     assert(UseAVX >= 2, "required");
23498     // Swap src into vtmp1
23499     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23500     // Shuffle swapped src to get entries from other 128 bit lane
23501     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23502     // Shuffle original src to get entries from self 128 bit lane
23503     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23504     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23505     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23506     // Perform the blend
23507     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23508   %}
23509   ins_pipe( pipe_slow );
23510 %}
23511 
23512 
23513 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23514   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23515             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23516   match(Set dst (VectorRearrange src shuffle));
23517   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23518   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23519   ins_encode %{
23520     int vlen_enc = vector_length_encoding(this);
23521     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23522                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23523                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23524   %}
23525   ins_pipe( pipe_slow );
23526 %}
23527 
23528 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23529   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23530             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23531   match(Set dst (VectorRearrange src shuffle));
23532   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23533   ins_encode %{
23534     int vlen_enc = vector_length_encoding(this);
23535     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23536   %}
23537   ins_pipe( pipe_slow );
23538 %}
23539 
23540 // LoadShuffle/Rearrange for Short
23541 
23542 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23543   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23544             !VM_Version::supports_avx512bw());
23545   match(Set dst (VectorLoadShuffle src));
23546   effect(TEMP dst, TEMP vtmp);
23547   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23548   ins_encode %{
23549     // Create a byte shuffle mask from short shuffle mask
23550     // only byte shuffle instruction available on these platforms
23551     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23552     if (UseAVX == 0) {
23553       assert(vlen_in_bytes <= 16, "required");
23554       // Multiply each shuffle by two to get byte index
23555       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23556       __ psllw($vtmp$$XMMRegister, 1);
23557 
23558       // Duplicate to create 2 copies of byte index
23559       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23560       __ psllw($dst$$XMMRegister, 8);
23561       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23562 
23563       // Add one to get alternate byte index
23564       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23565       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23566     } else {
23567       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23568       int vlen_enc = vector_length_encoding(this);
23569       // Multiply each shuffle by two to get byte index
23570       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23571 
23572       // Duplicate to create 2 copies of byte index
23573       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23574       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23575 
23576       // Add one to get alternate byte index
23577       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23578     }
23579   %}
23580   ins_pipe( pipe_slow );
23581 %}
23582 
23583 instruct rearrangeS(vec dst, vec shuffle) %{
23584   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23585             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23586   match(Set dst (VectorRearrange dst shuffle));
23587   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23588   ins_encode %{
23589     assert(UseSSE >= 4, "required");
23590     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23591   %}
23592   ins_pipe( pipe_slow );
23593 %}
23594 
23595 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23596   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23597             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23598   match(Set dst (VectorRearrange src shuffle));
23599   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23600   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23601   ins_encode %{
23602     assert(UseAVX >= 2, "required");
23603     // Swap src into vtmp1
23604     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23605     // Shuffle swapped src to get entries from other 128 bit lane
23606     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23607     // Shuffle original src to get entries from self 128 bit lane
23608     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23609     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23610     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23611     // Perform the blend
23612     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23613   %}
23614   ins_pipe( pipe_slow );
23615 %}
23616 
23617 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23618   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23619             VM_Version::supports_avx512bw());
23620   match(Set dst (VectorRearrange src shuffle));
23621   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23622   ins_encode %{
23623     int vlen_enc = vector_length_encoding(this);
23624     if (!VM_Version::supports_avx512vl()) {
23625       vlen_enc = Assembler::AVX_512bit;
23626     }
23627     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23628   %}
23629   ins_pipe( pipe_slow );
23630 %}
23631 
23632 // LoadShuffle/Rearrange for Integer and Float
23633 
23634 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23635   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23636             Matcher::vector_length(n) == 4 && UseAVX == 0);
23637   match(Set dst (VectorLoadShuffle src));
23638   effect(TEMP dst, TEMP vtmp);
23639   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23640   ins_encode %{
23641     assert(UseSSE >= 4, "required");
23642 
23643     // Create a byte shuffle mask from int shuffle mask
23644     // only byte shuffle instruction available on these platforms
23645 
23646     // Duplicate and multiply each shuffle by 4
23647     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23648     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23649     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23650     __ psllw($vtmp$$XMMRegister, 2);
23651 
23652     // Duplicate again to create 4 copies of byte index
23653     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23654     __ psllw($dst$$XMMRegister, 8);
23655     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23656 
23657     // Add 3,2,1,0 to get alternate byte index
23658     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23659     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23660   %}
23661   ins_pipe( pipe_slow );
23662 %}
23663 
23664 instruct rearrangeI(vec dst, vec shuffle) %{
23665   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23666             UseAVX == 0);
23667   match(Set dst (VectorRearrange dst shuffle));
23668   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23669   ins_encode %{
23670     assert(UseSSE >= 4, "required");
23671     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23672   %}
23673   ins_pipe( pipe_slow );
23674 %}
23675 
23676 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23677   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23678             UseAVX > 0);
23679   match(Set dst (VectorRearrange src shuffle));
23680   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23681   ins_encode %{
23682     int vlen_enc = vector_length_encoding(this);
23683     BasicType bt = Matcher::vector_element_basic_type(this);
23684     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23685   %}
23686   ins_pipe( pipe_slow );
23687 %}
23688 
23689 // LoadShuffle/Rearrange for Long and Double
23690 
23691 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23692   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23693             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23694   match(Set dst (VectorLoadShuffle src));
23695   effect(TEMP dst, TEMP vtmp);
23696   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23697   ins_encode %{
23698     assert(UseAVX >= 2, "required");
23699 
23700     int vlen_enc = vector_length_encoding(this);
23701     // Create a double word shuffle mask from long shuffle mask
23702     // only double word shuffle instruction available on these platforms
23703 
23704     // Multiply each shuffle by two to get double word index
23705     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23706 
23707     // Duplicate each double word shuffle
23708     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23709     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23710 
23711     // Add one to get alternate double word index
23712     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23713   %}
23714   ins_pipe( pipe_slow );
23715 %}
23716 
23717 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23718   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23719             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23720   match(Set dst (VectorRearrange src shuffle));
23721   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23722   ins_encode %{
23723     assert(UseAVX >= 2, "required");
23724 
23725     int vlen_enc = vector_length_encoding(this);
23726     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23727   %}
23728   ins_pipe( pipe_slow );
23729 %}
23730 
23731 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23732   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23733             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23734   match(Set dst (VectorRearrange src shuffle));
23735   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23736   ins_encode %{
23737     assert(UseAVX > 2, "required");
23738 
23739     int vlen_enc = vector_length_encoding(this);
23740     if (vlen_enc == Assembler::AVX_128bit) {
23741       vlen_enc = Assembler::AVX_256bit;
23742     }
23743     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23744   %}
23745   ins_pipe( pipe_slow );
23746 %}
23747 
23748 // --------------------------------- FMA --------------------------------------
23749 // a * b + c
23750 
23751 instruct vfmaF_reg(vec a, vec b, vec c) %{
23752   match(Set c (FmaVF  c (Binary a b)));
23753   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23754   ins_cost(150);
23755   ins_encode %{
23756     assert(UseFMA, "not enabled");
23757     int vlen_enc = vector_length_encoding(this);
23758     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23759   %}
23760   ins_pipe( pipe_slow );
23761 %}
23762 
23763 instruct vfmaF_mem(vec a, memory b, vec c) %{
23764   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23765   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23766   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23767   ins_cost(150);
23768   ins_encode %{
23769     assert(UseFMA, "not enabled");
23770     int vlen_enc = vector_length_encoding(this);
23771     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23772   %}
23773   ins_pipe( pipe_slow );
23774 %}
23775 
23776 instruct vfmaD_reg(vec a, vec b, vec c) %{
23777   match(Set c (FmaVD  c (Binary a b)));
23778   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23779   ins_cost(150);
23780   ins_encode %{
23781     assert(UseFMA, "not enabled");
23782     int vlen_enc = vector_length_encoding(this);
23783     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23784   %}
23785   ins_pipe( pipe_slow );
23786 %}
23787 
23788 instruct vfmaD_mem(vec a, memory b, vec c) %{
23789   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23790   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23791   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23792   ins_cost(150);
23793   ins_encode %{
23794     assert(UseFMA, "not enabled");
23795     int vlen_enc = vector_length_encoding(this);
23796     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23797   %}
23798   ins_pipe( pipe_slow );
23799 %}
23800 
23801 // --------------------------------- Vector Multiply Add --------------------------------------
23802 
23803 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23804   predicate(UseAVX == 0);
23805   match(Set dst (MulAddVS2VI dst src1));
23806   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23807   ins_encode %{
23808     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23809   %}
23810   ins_pipe( pipe_slow );
23811 %}
23812 
23813 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23814   predicate(UseAVX > 0);
23815   match(Set dst (MulAddVS2VI src1 src2));
23816   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23817   ins_encode %{
23818     int vlen_enc = vector_length_encoding(this);
23819     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23820   %}
23821   ins_pipe( pipe_slow );
23822 %}
23823 
23824 // --------------------------------- Vector Multiply Add Add ----------------------------------
23825 
23826 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23827   predicate(VM_Version::supports_avx512_vnni());
23828   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23829   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23830   ins_encode %{
23831     assert(UseAVX > 2, "required");
23832     int vlen_enc = vector_length_encoding(this);
23833     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23834   %}
23835   ins_pipe( pipe_slow );
23836   ins_cost(10);
23837 %}
23838 
23839 // --------------------------------- PopCount --------------------------------------
23840 
23841 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23842   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23843   match(Set dst (PopCountVI src));
23844   match(Set dst (PopCountVL src));
23845   format %{ "vector_popcount_integral $dst, $src" %}
23846   ins_encode %{
23847     int opcode = this->ideal_Opcode();
23848     int vlen_enc = vector_length_encoding(this, $src);
23849     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23850     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23851   %}
23852   ins_pipe( pipe_slow );
23853 %}
23854 
23855 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23856   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23857   match(Set dst (PopCountVI src mask));
23858   match(Set dst (PopCountVL src mask));
23859   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23860   ins_encode %{
23861     int vlen_enc = vector_length_encoding(this, $src);
23862     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23863     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23864     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23865   %}
23866   ins_pipe( pipe_slow );
23867 %}
23868 
23869 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23870   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23871   match(Set dst (PopCountVI src));
23872   match(Set dst (PopCountVL src));
23873   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23874   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23875   ins_encode %{
23876     int opcode = this->ideal_Opcode();
23877     int vlen_enc = vector_length_encoding(this, $src);
23878     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23879     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23880                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23881   %}
23882   ins_pipe( pipe_slow );
23883 %}
23884 
23885 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23886 
23887 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23888   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23889                                               Matcher::vector_length_in_bytes(n->in(1))));
23890   match(Set dst (CountTrailingZerosV src));
23891   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23892   ins_cost(400);
23893   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23894   ins_encode %{
23895     int vlen_enc = vector_length_encoding(this, $src);
23896     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23897     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23898                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23899   %}
23900   ins_pipe( pipe_slow );
23901 %}
23902 
23903 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23904   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23905             VM_Version::supports_avx512cd() &&
23906             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23907   match(Set dst (CountTrailingZerosV src));
23908   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23909   ins_cost(400);
23910   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23911   ins_encode %{
23912     int vlen_enc = vector_length_encoding(this, $src);
23913     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23914     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23915                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23916   %}
23917   ins_pipe( pipe_slow );
23918 %}
23919 
23920 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23921   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23922   match(Set dst (CountTrailingZerosV src));
23923   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23924   ins_cost(400);
23925   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23926   ins_encode %{
23927     int vlen_enc = vector_length_encoding(this, $src);
23928     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23929     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23930                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23931                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23932   %}
23933   ins_pipe( pipe_slow );
23934 %}
23935 
23936 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23937   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23938   match(Set dst (CountTrailingZerosV src));
23939   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23940   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23941   ins_encode %{
23942     int vlen_enc = vector_length_encoding(this, $src);
23943     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23944     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23945                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23946   %}
23947   ins_pipe( pipe_slow );
23948 %}
23949 
23950 
23951 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23952 
23953 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23954   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23955   effect(TEMP dst);
23956   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23957   ins_encode %{
23958     int vector_len = vector_length_encoding(this);
23959     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23960   %}
23961   ins_pipe( pipe_slow );
23962 %}
23963 
23964 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23965   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23966   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23967   effect(TEMP dst);
23968   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23969   ins_encode %{
23970     int vector_len = vector_length_encoding(this);
23971     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23972   %}
23973   ins_pipe( pipe_slow );
23974 %}
23975 
23976 // --------------------------------- Rotation Operations ----------------------------------
23977 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23978   match(Set dst (RotateLeftV src shift));
23979   match(Set dst (RotateRightV src shift));
23980   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23981   ins_encode %{
23982     int opcode      = this->ideal_Opcode();
23983     int vector_len  = vector_length_encoding(this);
23984     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23985     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23986   %}
23987   ins_pipe( pipe_slow );
23988 %}
23989 
23990 instruct vprorate(vec dst, vec src, vec shift) %{
23991   match(Set dst (RotateLeftV src shift));
23992   match(Set dst (RotateRightV src shift));
23993   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23994   ins_encode %{
23995     int opcode      = this->ideal_Opcode();
23996     int vector_len  = vector_length_encoding(this);
23997     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23998     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23999   %}
24000   ins_pipe( pipe_slow );
24001 %}
24002 
24003 // ---------------------------------- Masked Operations ------------------------------------
24004 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24005   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24006   match(Set dst (LoadVectorMasked mem mask));
24007   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24008   ins_encode %{
24009     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24010     int vlen_enc = vector_length_encoding(this);
24011     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24012   %}
24013   ins_pipe( pipe_slow );
24014 %}
24015 
24016 
24017 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24018   predicate(n->in(3)->bottom_type()->isa_vectmask());
24019   match(Set dst (LoadVectorMasked mem mask));
24020   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24021   ins_encode %{
24022     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24023     int vector_len = vector_length_encoding(this);
24024     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24025   %}
24026   ins_pipe( pipe_slow );
24027 %}
24028 
24029 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24030   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24031   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24032   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24033   ins_encode %{
24034     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24035     int vlen_enc = vector_length_encoding(src_node);
24036     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24037     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24038   %}
24039   ins_pipe( pipe_slow );
24040 %}
24041 
24042 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24043   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24044   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24045   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24046   ins_encode %{
24047     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24048     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24049     int vlen_enc = vector_length_encoding(src_node);
24050     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24051   %}
24052   ins_pipe( pipe_slow );
24053 %}
24054 
24055 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24056   match(Set addr (VerifyVectorAlignment addr mask));
24057   effect(KILL cr);
24058   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24059   ins_encode %{
24060     Label Lskip;
24061     // check if masked bits of addr are zero
24062     __ testq($addr$$Register, $mask$$constant);
24063     __ jccb(Assembler::equal, Lskip);
24064     __ stop("verify_vector_alignment found a misaligned vector memory access");
24065     __ bind(Lskip);
24066   %}
24067   ins_pipe(pipe_slow);
24068 %}
24069 
24070 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24071   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24072   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24073   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24074   ins_encode %{
24075     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24076     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24077 
24078     Label DONE;
24079     int vlen_enc = vector_length_encoding(this, $src1);
24080     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24081 
24082     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24083     __ mov64($dst$$Register, -1L);
24084     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24085     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24086     __ jccb(Assembler::carrySet, DONE);
24087     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24088     __ notq($dst$$Register);
24089     __ tzcntq($dst$$Register, $dst$$Register);
24090     __ bind(DONE);
24091   %}
24092   ins_pipe( pipe_slow );
24093 %}
24094 
24095 
24096 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24097   match(Set dst (VectorMaskGen len));
24098   effect(TEMP temp, KILL cr);
24099   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24100   ins_encode %{
24101     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24102   %}
24103   ins_pipe( pipe_slow );
24104 %}
24105 
24106 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24107   match(Set dst (VectorMaskGen len));
24108   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24109   effect(TEMP temp);
24110   ins_encode %{
24111     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24112     __ kmovql($dst$$KRegister, $temp$$Register);
24113   %}
24114   ins_pipe( pipe_slow );
24115 %}
24116 
24117 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24118   predicate(n->in(1)->bottom_type()->isa_vectmask());
24119   match(Set dst (VectorMaskToLong mask));
24120   effect(TEMP dst, KILL cr);
24121   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24122   ins_encode %{
24123     int opcode = this->ideal_Opcode();
24124     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24125     int mask_len = Matcher::vector_length(this, $mask);
24126     int mask_size = mask_len * type2aelembytes(mbt);
24127     int vlen_enc = vector_length_encoding(this, $mask);
24128     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24129                              $dst$$Register, mask_len, mask_size, vlen_enc);
24130   %}
24131   ins_pipe( pipe_slow );
24132 %}
24133 
24134 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24135   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24136   match(Set dst (VectorMaskToLong mask));
24137   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24138   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24139   ins_encode %{
24140     int opcode = this->ideal_Opcode();
24141     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24142     int mask_len = Matcher::vector_length(this, $mask);
24143     int vlen_enc = vector_length_encoding(this, $mask);
24144     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24145                              $dst$$Register, mask_len, mbt, vlen_enc);
24146   %}
24147   ins_pipe( pipe_slow );
24148 %}
24149 
24150 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24151   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24152   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24153   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24154   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24155   ins_encode %{
24156     int opcode = this->ideal_Opcode();
24157     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24158     int mask_len = Matcher::vector_length(this, $mask);
24159     int vlen_enc = vector_length_encoding(this, $mask);
24160     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24161                              $dst$$Register, mask_len, mbt, vlen_enc);
24162   %}
24163   ins_pipe( pipe_slow );
24164 %}
24165 
24166 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24167   predicate(n->in(1)->bottom_type()->isa_vectmask());
24168   match(Set dst (VectorMaskTrueCount mask));
24169   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24170   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24171   ins_encode %{
24172     int opcode = this->ideal_Opcode();
24173     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24174     int mask_len = Matcher::vector_length(this, $mask);
24175     int mask_size = mask_len * type2aelembytes(mbt);
24176     int vlen_enc = vector_length_encoding(this, $mask);
24177     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24178                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24179   %}
24180   ins_pipe( pipe_slow );
24181 %}
24182 
24183 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24184   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24185   match(Set dst (VectorMaskTrueCount mask));
24186   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24187   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24188   ins_encode %{
24189     int opcode = this->ideal_Opcode();
24190     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24191     int mask_len = Matcher::vector_length(this, $mask);
24192     int vlen_enc = vector_length_encoding(this, $mask);
24193     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24194                              $tmp$$Register, mask_len, mbt, vlen_enc);
24195   %}
24196   ins_pipe( pipe_slow );
24197 %}
24198 
24199 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24200   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24201   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24202   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24203   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24204   ins_encode %{
24205     int opcode = this->ideal_Opcode();
24206     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24207     int mask_len = Matcher::vector_length(this, $mask);
24208     int vlen_enc = vector_length_encoding(this, $mask);
24209     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24210                              $tmp$$Register, mask_len, mbt, vlen_enc);
24211   %}
24212   ins_pipe( pipe_slow );
24213 %}
24214 
24215 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24216   predicate(n->in(1)->bottom_type()->isa_vectmask());
24217   match(Set dst (VectorMaskFirstTrue mask));
24218   match(Set dst (VectorMaskLastTrue mask));
24219   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24220   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24221   ins_encode %{
24222     int opcode = this->ideal_Opcode();
24223     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24224     int mask_len = Matcher::vector_length(this, $mask);
24225     int mask_size = mask_len * type2aelembytes(mbt);
24226     int vlen_enc = vector_length_encoding(this, $mask);
24227     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24228                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24229   %}
24230   ins_pipe( pipe_slow );
24231 %}
24232 
24233 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24234   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24235   match(Set dst (VectorMaskFirstTrue mask));
24236   match(Set dst (VectorMaskLastTrue mask));
24237   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24238   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24239   ins_encode %{
24240     int opcode = this->ideal_Opcode();
24241     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24242     int mask_len = Matcher::vector_length(this, $mask);
24243     int vlen_enc = vector_length_encoding(this, $mask);
24244     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24245                              $tmp$$Register, mask_len, mbt, vlen_enc);
24246   %}
24247   ins_pipe( pipe_slow );
24248 %}
24249 
24250 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24251   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24252   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24253   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24254   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24255   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24256   ins_encode %{
24257     int opcode = this->ideal_Opcode();
24258     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24259     int mask_len = Matcher::vector_length(this, $mask);
24260     int vlen_enc = vector_length_encoding(this, $mask);
24261     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24262                              $tmp$$Register, mask_len, mbt, vlen_enc);
24263   %}
24264   ins_pipe( pipe_slow );
24265 %}
24266 
24267 // --------------------------------- Compress/Expand Operations ---------------------------
24268 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24269   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24270   match(Set dst (CompressV src mask));
24271   match(Set dst (ExpandV src mask));
24272   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24273   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24274   ins_encode %{
24275     int opcode = this->ideal_Opcode();
24276     int vlen_enc = vector_length_encoding(this);
24277     BasicType bt  = Matcher::vector_element_basic_type(this);
24278     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24279                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24280   %}
24281   ins_pipe( pipe_slow );
24282 %}
24283 
24284 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24285   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24286   match(Set dst (CompressV src mask));
24287   match(Set dst (ExpandV src mask));
24288   format %{ "vector_compress_expand $dst, $src, $mask" %}
24289   ins_encode %{
24290     int opcode = this->ideal_Opcode();
24291     int vector_len = vector_length_encoding(this);
24292     BasicType bt  = Matcher::vector_element_basic_type(this);
24293     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24294   %}
24295   ins_pipe( pipe_slow );
24296 %}
24297 
24298 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24299   match(Set dst (CompressM mask));
24300   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24301   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24302   ins_encode %{
24303     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24304     int mask_len = Matcher::vector_length(this);
24305     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24306   %}
24307   ins_pipe( pipe_slow );
24308 %}
24309 
24310 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24311 
24312 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24313   predicate(!VM_Version::supports_gfni());
24314   match(Set dst (ReverseV src));
24315   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24316   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24317   ins_encode %{
24318     int vec_enc = vector_length_encoding(this);
24319     BasicType bt = Matcher::vector_element_basic_type(this);
24320     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24321                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24322   %}
24323   ins_pipe( pipe_slow );
24324 %}
24325 
24326 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24327   predicate(VM_Version::supports_gfni());
24328   match(Set dst (ReverseV src));
24329   effect(TEMP dst, TEMP xtmp);
24330   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24331   ins_encode %{
24332     int vec_enc = vector_length_encoding(this);
24333     BasicType bt  = Matcher::vector_element_basic_type(this);
24334     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24335     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24336                                $xtmp$$XMMRegister);
24337   %}
24338   ins_pipe( pipe_slow );
24339 %}
24340 
24341 instruct vreverse_byte_reg(vec dst, vec src) %{
24342   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24343   match(Set dst (ReverseBytesV src));
24344   effect(TEMP dst);
24345   format %{ "vector_reverse_byte $dst, $src" %}
24346   ins_encode %{
24347     int vec_enc = vector_length_encoding(this);
24348     BasicType bt = Matcher::vector_element_basic_type(this);
24349     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24350   %}
24351   ins_pipe( pipe_slow );
24352 %}
24353 
24354 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24355   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24356   match(Set dst (ReverseBytesV src));
24357   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24358   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24359   ins_encode %{
24360     int vec_enc = vector_length_encoding(this);
24361     BasicType bt = Matcher::vector_element_basic_type(this);
24362     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24363                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24364   %}
24365   ins_pipe( pipe_slow );
24366 %}
24367 
24368 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24369 
24370 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24371   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24372                                               Matcher::vector_length_in_bytes(n->in(1))));
24373   match(Set dst (CountLeadingZerosV src));
24374   format %{ "vector_count_leading_zeros $dst, $src" %}
24375   ins_encode %{
24376      int vlen_enc = vector_length_encoding(this, $src);
24377      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24378      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24379                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24380   %}
24381   ins_pipe( pipe_slow );
24382 %}
24383 
24384 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24385   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24386                                               Matcher::vector_length_in_bytes(n->in(1))));
24387   match(Set dst (CountLeadingZerosV src mask));
24388   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24389   ins_encode %{
24390     int vlen_enc = vector_length_encoding(this, $src);
24391     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24392     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24393     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24394                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24395   %}
24396   ins_pipe( pipe_slow );
24397 %}
24398 
24399 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24400   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24401             VM_Version::supports_avx512cd() &&
24402             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24403   match(Set dst (CountLeadingZerosV src));
24404   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24405   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24406   ins_encode %{
24407     int vlen_enc = vector_length_encoding(this, $src);
24408     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24409     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24410                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24411   %}
24412   ins_pipe( pipe_slow );
24413 %}
24414 
24415 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24416   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24417   match(Set dst (CountLeadingZerosV src));
24418   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24419   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24420   ins_encode %{
24421     int vlen_enc = vector_length_encoding(this, $src);
24422     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24423     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24424                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24425                                        $rtmp$$Register, true, vlen_enc);
24426   %}
24427   ins_pipe( pipe_slow );
24428 %}
24429 
24430 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24431   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24432             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24433   match(Set dst (CountLeadingZerosV src));
24434   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24435   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24436   ins_encode %{
24437     int vlen_enc = vector_length_encoding(this, $src);
24438     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24439     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24440                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24441   %}
24442   ins_pipe( pipe_slow );
24443 %}
24444 
24445 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24446   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24447             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24448   match(Set dst (CountLeadingZerosV src));
24449   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24450   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24451   ins_encode %{
24452     int vlen_enc = vector_length_encoding(this, $src);
24453     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24454     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24455                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24456   %}
24457   ins_pipe( pipe_slow );
24458 %}
24459 
24460 // ---------------------------------- Vector Masked Operations ------------------------------------
24461 
24462 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24463   match(Set dst (AddVB (Binary dst src2) mask));
24464   match(Set dst (AddVS (Binary dst src2) mask));
24465   match(Set dst (AddVI (Binary dst src2) mask));
24466   match(Set dst (AddVL (Binary dst src2) mask));
24467   match(Set dst (AddVF (Binary dst src2) mask));
24468   match(Set dst (AddVD (Binary dst src2) mask));
24469   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24470   ins_encode %{
24471     int vlen_enc = vector_length_encoding(this);
24472     BasicType bt = Matcher::vector_element_basic_type(this);
24473     int opc = this->ideal_Opcode();
24474     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24476   %}
24477   ins_pipe( pipe_slow );
24478 %}
24479 
24480 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24481   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24482   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24483   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24484   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24485   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24486   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24487   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24488   ins_encode %{
24489     int vlen_enc = vector_length_encoding(this);
24490     BasicType bt = Matcher::vector_element_basic_type(this);
24491     int opc = this->ideal_Opcode();
24492     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24493                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24494   %}
24495   ins_pipe( pipe_slow );
24496 %}
24497 
24498 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24499   match(Set dst (XorV (Binary dst src2) mask));
24500   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24501   ins_encode %{
24502     int vlen_enc = vector_length_encoding(this);
24503     BasicType bt = Matcher::vector_element_basic_type(this);
24504     int opc = this->ideal_Opcode();
24505     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24507   %}
24508   ins_pipe( pipe_slow );
24509 %}
24510 
24511 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24512   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24513   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24514   ins_encode %{
24515     int vlen_enc = vector_length_encoding(this);
24516     BasicType bt = Matcher::vector_element_basic_type(this);
24517     int opc = this->ideal_Opcode();
24518     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24519                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24520   %}
24521   ins_pipe( pipe_slow );
24522 %}
24523 
24524 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24525   match(Set dst (OrV (Binary dst src2) mask));
24526   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24527   ins_encode %{
24528     int vlen_enc = vector_length_encoding(this);
24529     BasicType bt = Matcher::vector_element_basic_type(this);
24530     int opc = this->ideal_Opcode();
24531     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24532                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24533   %}
24534   ins_pipe( pipe_slow );
24535 %}
24536 
24537 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24538   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24539   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24540   ins_encode %{
24541     int vlen_enc = vector_length_encoding(this);
24542     BasicType bt = Matcher::vector_element_basic_type(this);
24543     int opc = this->ideal_Opcode();
24544     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24545                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24546   %}
24547   ins_pipe( pipe_slow );
24548 %}
24549 
24550 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24551   match(Set dst (AndV (Binary dst src2) mask));
24552   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24553   ins_encode %{
24554     int vlen_enc = vector_length_encoding(this);
24555     BasicType bt = Matcher::vector_element_basic_type(this);
24556     int opc = this->ideal_Opcode();
24557     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24559   %}
24560   ins_pipe( pipe_slow );
24561 %}
24562 
24563 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24564   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24565   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24566   ins_encode %{
24567     int vlen_enc = vector_length_encoding(this);
24568     BasicType bt = Matcher::vector_element_basic_type(this);
24569     int opc = this->ideal_Opcode();
24570     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24571                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24572   %}
24573   ins_pipe( pipe_slow );
24574 %}
24575 
24576 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24577   match(Set dst (SubVB (Binary dst src2) mask));
24578   match(Set dst (SubVS (Binary dst src2) mask));
24579   match(Set dst (SubVI (Binary dst src2) mask));
24580   match(Set dst (SubVL (Binary dst src2) mask));
24581   match(Set dst (SubVF (Binary dst src2) mask));
24582   match(Set dst (SubVD (Binary dst src2) mask));
24583   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24584   ins_encode %{
24585     int vlen_enc = vector_length_encoding(this);
24586     BasicType bt = Matcher::vector_element_basic_type(this);
24587     int opc = this->ideal_Opcode();
24588     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24590   %}
24591   ins_pipe( pipe_slow );
24592 %}
24593 
24594 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24595   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24596   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24597   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24598   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24599   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24600   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24601   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24602   ins_encode %{
24603     int vlen_enc = vector_length_encoding(this);
24604     BasicType bt = Matcher::vector_element_basic_type(this);
24605     int opc = this->ideal_Opcode();
24606     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24607                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24608   %}
24609   ins_pipe( pipe_slow );
24610 %}
24611 
24612 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24613   match(Set dst (MulVS (Binary dst src2) mask));
24614   match(Set dst (MulVI (Binary dst src2) mask));
24615   match(Set dst (MulVL (Binary dst src2) mask));
24616   match(Set dst (MulVF (Binary dst src2) mask));
24617   match(Set dst (MulVD (Binary dst src2) mask));
24618   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24619   ins_encode %{
24620     int vlen_enc = vector_length_encoding(this);
24621     BasicType bt = Matcher::vector_element_basic_type(this);
24622     int opc = this->ideal_Opcode();
24623     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24624                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24625   %}
24626   ins_pipe( pipe_slow );
24627 %}
24628 
24629 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24630   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24631   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24632   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24633   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24634   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24635   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24636   ins_encode %{
24637     int vlen_enc = vector_length_encoding(this);
24638     BasicType bt = Matcher::vector_element_basic_type(this);
24639     int opc = this->ideal_Opcode();
24640     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24641                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24642   %}
24643   ins_pipe( pipe_slow );
24644 %}
24645 
24646 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24647   match(Set dst (SqrtVF dst mask));
24648   match(Set dst (SqrtVD dst mask));
24649   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24650   ins_encode %{
24651     int vlen_enc = vector_length_encoding(this);
24652     BasicType bt = Matcher::vector_element_basic_type(this);
24653     int opc = this->ideal_Opcode();
24654     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24655                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24656   %}
24657   ins_pipe( pipe_slow );
24658 %}
24659 
24660 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24661   match(Set dst (DivVF (Binary dst src2) mask));
24662   match(Set dst (DivVD (Binary dst src2) mask));
24663   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24664   ins_encode %{
24665     int vlen_enc = vector_length_encoding(this);
24666     BasicType bt = Matcher::vector_element_basic_type(this);
24667     int opc = this->ideal_Opcode();
24668     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24669                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24670   %}
24671   ins_pipe( pipe_slow );
24672 %}
24673 
24674 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24675   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24676   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24677   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24678   ins_encode %{
24679     int vlen_enc = vector_length_encoding(this);
24680     BasicType bt = Matcher::vector_element_basic_type(this);
24681     int opc = this->ideal_Opcode();
24682     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24683                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24684   %}
24685   ins_pipe( pipe_slow );
24686 %}
24687 
24688 
24689 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24690   match(Set dst (RotateLeftV (Binary dst shift) mask));
24691   match(Set dst (RotateRightV (Binary dst shift) mask));
24692   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24693   ins_encode %{
24694     int vlen_enc = vector_length_encoding(this);
24695     BasicType bt = Matcher::vector_element_basic_type(this);
24696     int opc = this->ideal_Opcode();
24697     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24698                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24699   %}
24700   ins_pipe( pipe_slow );
24701 %}
24702 
24703 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24704   match(Set dst (RotateLeftV (Binary dst src2) mask));
24705   match(Set dst (RotateRightV (Binary dst src2) mask));
24706   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24707   ins_encode %{
24708     int vlen_enc = vector_length_encoding(this);
24709     BasicType bt = Matcher::vector_element_basic_type(this);
24710     int opc = this->ideal_Opcode();
24711     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24712                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24713   %}
24714   ins_pipe( pipe_slow );
24715 %}
24716 
24717 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24718   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24719   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24720   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24721   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24722   ins_encode %{
24723     int vlen_enc = vector_length_encoding(this);
24724     BasicType bt = Matcher::vector_element_basic_type(this);
24725     int opc = this->ideal_Opcode();
24726     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24727                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24728   %}
24729   ins_pipe( pipe_slow );
24730 %}
24731 
24732 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24733   predicate(!n->as_ShiftV()->is_var_shift());
24734   match(Set dst (LShiftVS (Binary dst src2) mask));
24735   match(Set dst (LShiftVI (Binary dst src2) mask));
24736   match(Set dst (LShiftVL (Binary dst src2) mask));
24737   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24738   ins_encode %{
24739     int vlen_enc = vector_length_encoding(this);
24740     BasicType bt = Matcher::vector_element_basic_type(this);
24741     int opc = this->ideal_Opcode();
24742     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24743                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24744   %}
24745   ins_pipe( pipe_slow );
24746 %}
24747 
24748 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24749   predicate(n->as_ShiftV()->is_var_shift());
24750   match(Set dst (LShiftVS (Binary dst src2) mask));
24751   match(Set dst (LShiftVI (Binary dst src2) mask));
24752   match(Set dst (LShiftVL (Binary dst src2) mask));
24753   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24754   ins_encode %{
24755     int vlen_enc = vector_length_encoding(this);
24756     BasicType bt = Matcher::vector_element_basic_type(this);
24757     int opc = this->ideal_Opcode();
24758     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24759                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24760   %}
24761   ins_pipe( pipe_slow );
24762 %}
24763 
24764 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24765   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24766   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24767   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24768   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24769   ins_encode %{
24770     int vlen_enc = vector_length_encoding(this);
24771     BasicType bt = Matcher::vector_element_basic_type(this);
24772     int opc = this->ideal_Opcode();
24773     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24774                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24775   %}
24776   ins_pipe( pipe_slow );
24777 %}
24778 
24779 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24780   predicate(!n->as_ShiftV()->is_var_shift());
24781   match(Set dst (RShiftVS (Binary dst src2) mask));
24782   match(Set dst (RShiftVI (Binary dst src2) mask));
24783   match(Set dst (RShiftVL (Binary dst src2) mask));
24784   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24785   ins_encode %{
24786     int vlen_enc = vector_length_encoding(this);
24787     BasicType bt = Matcher::vector_element_basic_type(this);
24788     int opc = this->ideal_Opcode();
24789     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24790                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24791   %}
24792   ins_pipe( pipe_slow );
24793 %}
24794 
24795 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24796   predicate(n->as_ShiftV()->is_var_shift());
24797   match(Set dst (RShiftVS (Binary dst src2) mask));
24798   match(Set dst (RShiftVI (Binary dst src2) mask));
24799   match(Set dst (RShiftVL (Binary dst src2) mask));
24800   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24801   ins_encode %{
24802     int vlen_enc = vector_length_encoding(this);
24803     BasicType bt = Matcher::vector_element_basic_type(this);
24804     int opc = this->ideal_Opcode();
24805     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24807   %}
24808   ins_pipe( pipe_slow );
24809 %}
24810 
24811 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24812   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24813   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24814   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24815   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24816   ins_encode %{
24817     int vlen_enc = vector_length_encoding(this);
24818     BasicType bt = Matcher::vector_element_basic_type(this);
24819     int opc = this->ideal_Opcode();
24820     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24821                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24822   %}
24823   ins_pipe( pipe_slow );
24824 %}
24825 
24826 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24827   predicate(!n->as_ShiftV()->is_var_shift());
24828   match(Set dst (URShiftVS (Binary dst src2) mask));
24829   match(Set dst (URShiftVI (Binary dst src2) mask));
24830   match(Set dst (URShiftVL (Binary dst src2) mask));
24831   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24832   ins_encode %{
24833     int vlen_enc = vector_length_encoding(this);
24834     BasicType bt = Matcher::vector_element_basic_type(this);
24835     int opc = this->ideal_Opcode();
24836     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24837                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24838   %}
24839   ins_pipe( pipe_slow );
24840 %}
24841 
24842 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24843   predicate(n->as_ShiftV()->is_var_shift());
24844   match(Set dst (URShiftVS (Binary dst src2) mask));
24845   match(Set dst (URShiftVI (Binary dst src2) mask));
24846   match(Set dst (URShiftVL (Binary dst src2) mask));
24847   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24848   ins_encode %{
24849     int vlen_enc = vector_length_encoding(this);
24850     BasicType bt = Matcher::vector_element_basic_type(this);
24851     int opc = this->ideal_Opcode();
24852     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24853                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24854   %}
24855   ins_pipe( pipe_slow );
24856 %}
24857 
24858 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24859   match(Set dst (MaxV (Binary dst src2) mask));
24860   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24861   ins_encode %{
24862     int vlen_enc = vector_length_encoding(this);
24863     BasicType bt = Matcher::vector_element_basic_type(this);
24864     int opc = this->ideal_Opcode();
24865     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24866                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24867   %}
24868   ins_pipe( pipe_slow );
24869 %}
24870 
24871 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24872   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24873   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24874   ins_encode %{
24875     int vlen_enc = vector_length_encoding(this);
24876     BasicType bt = Matcher::vector_element_basic_type(this);
24877     int opc = this->ideal_Opcode();
24878     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24879                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24880   %}
24881   ins_pipe( pipe_slow );
24882 %}
24883 
24884 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24885   match(Set dst (MinV (Binary dst src2) mask));
24886   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24887   ins_encode %{
24888     int vlen_enc = vector_length_encoding(this);
24889     BasicType bt = Matcher::vector_element_basic_type(this);
24890     int opc = this->ideal_Opcode();
24891     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24892                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24893   %}
24894   ins_pipe( pipe_slow );
24895 %}
24896 
24897 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24898   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24899   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24900   ins_encode %{
24901     int vlen_enc = vector_length_encoding(this);
24902     BasicType bt = Matcher::vector_element_basic_type(this);
24903     int opc = this->ideal_Opcode();
24904     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24905                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24906   %}
24907   ins_pipe( pipe_slow );
24908 %}
24909 
24910 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24911   match(Set dst (VectorRearrange (Binary dst src2) mask));
24912   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24913   ins_encode %{
24914     int vlen_enc = vector_length_encoding(this);
24915     BasicType bt = Matcher::vector_element_basic_type(this);
24916     int opc = this->ideal_Opcode();
24917     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24918                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24919   %}
24920   ins_pipe( pipe_slow );
24921 %}
24922 
24923 instruct vabs_masked(vec dst, kReg mask) %{
24924   match(Set dst (AbsVB dst mask));
24925   match(Set dst (AbsVS dst mask));
24926   match(Set dst (AbsVI dst mask));
24927   match(Set dst (AbsVL dst mask));
24928   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24929   ins_encode %{
24930     int vlen_enc = vector_length_encoding(this);
24931     BasicType bt = Matcher::vector_element_basic_type(this);
24932     int opc = this->ideal_Opcode();
24933     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24934                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24935   %}
24936   ins_pipe( pipe_slow );
24937 %}
24938 
24939 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24940   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24941   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24942   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24943   ins_encode %{
24944     assert(UseFMA, "Needs FMA instructions support.");
24945     int vlen_enc = vector_length_encoding(this);
24946     BasicType bt = Matcher::vector_element_basic_type(this);
24947     int opc = this->ideal_Opcode();
24948     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24949                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24950   %}
24951   ins_pipe( pipe_slow );
24952 %}
24953 
24954 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24955   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24956   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24957   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24958   ins_encode %{
24959     assert(UseFMA, "Needs FMA instructions support.");
24960     int vlen_enc = vector_length_encoding(this);
24961     BasicType bt = Matcher::vector_element_basic_type(this);
24962     int opc = this->ideal_Opcode();
24963     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24964                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24965   %}
24966   ins_pipe( pipe_slow );
24967 %}
24968 
24969 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24970   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24971   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24972   ins_encode %{
24973     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24974     int vlen_enc = vector_length_encoding(this, $src1);
24975     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24976 
24977     // Comparison i
24978     switch (src1_elem_bt) {
24979       case T_BYTE: {
24980         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24981         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24982         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24983         break;
24984       }
24985       case T_SHORT: {
24986         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24987         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24988         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24989         break;
24990       }
24991       case T_INT: {
24992         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24993         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24994         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24995         break;
24996       }
24997       case T_LONG: {
24998         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24999         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25000         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25001         break;
25002       }
25003       case T_FLOAT: {
25004         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25005         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25006         break;
25007       }
25008       case T_DOUBLE: {
25009         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25010         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25011         break;
25012       }
25013       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25014     }
25015   %}
25016   ins_pipe( pipe_slow );
25017 %}
25018 
25019 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25020   predicate(Matcher::vector_length(n) <= 32);
25021   match(Set dst (MaskAll src));
25022   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25023   ins_encode %{
25024     int mask_len = Matcher::vector_length(this);
25025     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25026   %}
25027   ins_pipe( pipe_slow );
25028 %}
25029 
25030 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25031   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25032   match(Set dst (XorVMask src (MaskAll cnt)));
25033   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25034   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25035   ins_encode %{
25036     uint masklen = Matcher::vector_length(this);
25037     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25038   %}
25039   ins_pipe( pipe_slow );
25040 %}
25041 
25042 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25043   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25044             (Matcher::vector_length(n) == 16) ||
25045             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25046   match(Set dst (XorVMask src (MaskAll cnt)));
25047   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25048   ins_encode %{
25049     uint masklen = Matcher::vector_length(this);
25050     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25051   %}
25052   ins_pipe( pipe_slow );
25053 %}
25054 
25055 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25056   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25057   match(Set dst (VectorLongToMask src));
25058   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25059   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25060   ins_encode %{
25061     int mask_len = Matcher::vector_length(this);
25062     int vec_enc  = vector_length_encoding(mask_len);
25063     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25064                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25065   %}
25066   ins_pipe( pipe_slow );
25067 %}
25068 
25069 
25070 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25071   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25072   match(Set dst (VectorLongToMask src));
25073   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25074   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25075   ins_encode %{
25076     int mask_len = Matcher::vector_length(this);
25077     assert(mask_len <= 32, "invalid mask length");
25078     int vec_enc  = vector_length_encoding(mask_len);
25079     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25080                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25081   %}
25082   ins_pipe( pipe_slow );
25083 %}
25084 
25085 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25086   predicate(n->bottom_type()->isa_vectmask());
25087   match(Set dst (VectorLongToMask src));
25088   format %{ "long_to_mask_evex $dst, $src\t!" %}
25089   ins_encode %{
25090     __ kmov($dst$$KRegister, $src$$Register);
25091   %}
25092   ins_pipe( pipe_slow );
25093 %}
25094 
25095 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25096   match(Set dst (AndVMask src1 src2));
25097   match(Set dst (OrVMask src1 src2));
25098   match(Set dst (XorVMask src1 src2));
25099   effect(TEMP kscratch);
25100   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25101   ins_encode %{
25102     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25103     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25104     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25105     uint masklen = Matcher::vector_length(this);
25106     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25107     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25108   %}
25109   ins_pipe( pipe_slow );
25110 %}
25111 
25112 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25113   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25114   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25115   ins_encode %{
25116     int vlen_enc = vector_length_encoding(this);
25117     BasicType bt = Matcher::vector_element_basic_type(this);
25118     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25119                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25120   %}
25121   ins_pipe( pipe_slow );
25122 %}
25123 
25124 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25125   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25126   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25127   ins_encode %{
25128     int vlen_enc = vector_length_encoding(this);
25129     BasicType bt = Matcher::vector_element_basic_type(this);
25130     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25131                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25132   %}
25133   ins_pipe( pipe_slow );
25134 %}
25135 
25136 instruct castMM(kReg dst)
25137 %{
25138   match(Set dst (CastVV dst));
25139 
25140   size(0);
25141   format %{ "# castVV of $dst" %}
25142   ins_encode(/* empty encoding */);
25143   ins_cost(0);
25144   ins_pipe(empty);
25145 %}
25146 
25147 instruct castVV(vec dst)
25148 %{
25149   match(Set dst (CastVV dst));
25150 
25151   size(0);
25152   format %{ "# castVV of $dst" %}
25153   ins_encode(/* empty encoding */);
25154   ins_cost(0);
25155   ins_pipe(empty);
25156 %}
25157 
25158 instruct castVVLeg(legVec dst)
25159 %{
25160   match(Set dst (CastVV dst));
25161 
25162   size(0);
25163   format %{ "# castVV of $dst" %}
25164   ins_encode(/* empty encoding */);
25165   ins_cost(0);
25166   ins_pipe(empty);
25167 %}
25168 
25169 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25170 %{
25171   match(Set dst (IsInfiniteF src));
25172   effect(TEMP ktmp, KILL cr);
25173   format %{ "float_class_check $dst, $src" %}
25174   ins_encode %{
25175     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25176     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25177   %}
25178   ins_pipe(pipe_slow);
25179 %}
25180 
25181 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25182 %{
25183   match(Set dst (IsInfiniteD src));
25184   effect(TEMP ktmp, KILL cr);
25185   format %{ "double_class_check $dst, $src" %}
25186   ins_encode %{
25187     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25188     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25189   %}
25190   ins_pipe(pipe_slow);
25191 %}
25192 
25193 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25194 %{
25195   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25196             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25197   match(Set dst (SaturatingAddV src1 src2));
25198   match(Set dst (SaturatingSubV src1 src2));
25199   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25200   ins_encode %{
25201     int vlen_enc = vector_length_encoding(this);
25202     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25203     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25204                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25205   %}
25206   ins_pipe(pipe_slow);
25207 %}
25208 
25209 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25210 %{
25211   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25212             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25213   match(Set dst (SaturatingAddV src1 src2));
25214   match(Set dst (SaturatingSubV src1 src2));
25215   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25216   ins_encode %{
25217     int vlen_enc = vector_length_encoding(this);
25218     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25219     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25220                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25221   %}
25222   ins_pipe(pipe_slow);
25223 %}
25224 
25225 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25226 %{
25227   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25228             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25229             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25230   match(Set dst (SaturatingAddV src1 src2));
25231   match(Set dst (SaturatingSubV src1 src2));
25232   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25233   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25234   ins_encode %{
25235     int vlen_enc = vector_length_encoding(this);
25236     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25237     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25238                                         $src1$$XMMRegister, $src2$$XMMRegister,
25239                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25240                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25241   %}
25242   ins_pipe(pipe_slow);
25243 %}
25244 
25245 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25246 %{
25247   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25249             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25250   match(Set dst (SaturatingAddV src1 src2));
25251   match(Set dst (SaturatingSubV src1 src2));
25252   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25253   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25254   ins_encode %{
25255     int vlen_enc = vector_length_encoding(this);
25256     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25257     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25258                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25259                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25260   %}
25261   ins_pipe(pipe_slow);
25262 %}
25263 
25264 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25265 %{
25266   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25267             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25268             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25269   match(Set dst (SaturatingAddV src1 src2));
25270   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25271   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25272   ins_encode %{
25273     int vlen_enc = vector_length_encoding(this);
25274     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25276                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25277   %}
25278   ins_pipe(pipe_slow);
25279 %}
25280 
25281 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25282 %{
25283   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25284             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25285             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25286   match(Set dst (SaturatingAddV src1 src2));
25287   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25288   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25289   ins_encode %{
25290     int vlen_enc = vector_length_encoding(this);
25291     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25292     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25293                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25294   %}
25295   ins_pipe(pipe_slow);
25296 %}
25297 
25298 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25299 %{
25300   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25301             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25302             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25303   match(Set dst (SaturatingSubV src1 src2));
25304   effect(TEMP ktmp);
25305   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25306   ins_encode %{
25307     int vlen_enc = vector_length_encoding(this);
25308     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25309     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25310                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25311   %}
25312   ins_pipe(pipe_slow);
25313 %}
25314 
25315 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25316 %{
25317   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25318             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25319             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25320   match(Set dst (SaturatingSubV src1 src2));
25321   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25322   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25323   ins_encode %{
25324     int vlen_enc = vector_length_encoding(this);
25325     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25326     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25327                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25328   %}
25329   ins_pipe(pipe_slow);
25330 %}
25331 
25332 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25333 %{
25334   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25335             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25336   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25337   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25338   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25339   ins_encode %{
25340     int vlen_enc = vector_length_encoding(this);
25341     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25342     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25343                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25344   %}
25345   ins_pipe(pipe_slow);
25346 %}
25347 
25348 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25349 %{
25350   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25351             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25352   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25353   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25354   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25355   ins_encode %{
25356     int vlen_enc = vector_length_encoding(this);
25357     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25358     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25359                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25360   %}
25361   ins_pipe(pipe_slow);
25362 %}
25363 
25364 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25365   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25366             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25367   match(Set dst (SaturatingAddV (Binary dst src) mask));
25368   match(Set dst (SaturatingSubV (Binary dst src) mask));
25369   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25370   ins_encode %{
25371     int vlen_enc = vector_length_encoding(this);
25372     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25373     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25374                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25375   %}
25376   ins_pipe( pipe_slow );
25377 %}
25378 
25379 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25380   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25381             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25382   match(Set dst (SaturatingAddV (Binary dst src) mask));
25383   match(Set dst (SaturatingSubV (Binary dst src) mask));
25384   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25385   ins_encode %{
25386     int vlen_enc = vector_length_encoding(this);
25387     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25388     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25389                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25390   %}
25391   ins_pipe( pipe_slow );
25392 %}
25393 
25394 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25395   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25396             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25397   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25398   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25399   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25400   ins_encode %{
25401     int vlen_enc = vector_length_encoding(this);
25402     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25403     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25404                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25405   %}
25406   ins_pipe( pipe_slow );
25407 %}
25408 
25409 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25410   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25411             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25412   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25413   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25414   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25415   ins_encode %{
25416     int vlen_enc = vector_length_encoding(this);
25417     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25418     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25419                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25420   %}
25421   ins_pipe( pipe_slow );
25422 %}
25423 
25424 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25425 %{
25426   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25427   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25428   ins_encode %{
25429     int vlen_enc = vector_length_encoding(this);
25430     BasicType bt = Matcher::vector_element_basic_type(this);
25431     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25432   %}
25433   ins_pipe(pipe_slow);
25434 %}
25435 
25436 instruct reinterpretS2HF(regF dst, rRegI src)
25437 %{
25438   match(Set dst (ReinterpretS2HF src));
25439   format %{ "vmovw $dst, $src" %}
25440   ins_encode %{
25441     __ vmovw($dst$$XMMRegister, $src$$Register);
25442   %}
25443   ins_pipe(pipe_slow);
25444 %}
25445 
25446 instruct reinterpretHF2S(rRegI dst, regF src)
25447 %{
25448   match(Set dst (ReinterpretHF2S src));
25449   format %{ "vmovw $dst, $src" %}
25450   ins_encode %{
25451     __ vmovw($dst$$Register, $src$$XMMRegister);
25452   %}
25453   ins_pipe(pipe_slow);
25454 %}
25455 
25456 instruct convF2HFAndS2HF(regF dst, regF src)
25457 %{
25458   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25459   format %{ "convF2HFAndS2HF $dst, $src" %}
25460   ins_encode %{
25461     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25462   %}
25463   ins_pipe(pipe_slow);
25464 %}
25465 
25466 instruct convHF2SAndHF2F(regF dst, regF src)
25467 %{
25468   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25469   format %{ "convHF2SAndHF2F $dst, $src" %}
25470   ins_encode %{
25471     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25472   %}
25473   ins_pipe(pipe_slow);
25474 %}
25475 
25476 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25477 %{
25478   match(Set dst (SqrtHF src));
25479   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25480   ins_encode %{
25481     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25482   %}
25483   ins_pipe(pipe_slow);
25484 %}
25485 
25486 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25487 %{
25488   match(Set dst (AddHF src1 src2));
25489   match(Set dst (DivHF src1 src2));
25490   match(Set dst (MulHF src1 src2));
25491   match(Set dst (SubHF src1 src2));
25492   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25493   ins_encode %{
25494     int opcode = this->ideal_Opcode();
25495     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25496   %}
25497   ins_pipe(pipe_slow);
25498 %}
25499 
25500 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25501 %{
25502   predicate(VM_Version::supports_avx10_2());
25503   match(Set dst (MaxHF src1 src2));
25504   match(Set dst (MinHF src1 src2));
25505   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25506   ins_encode %{
25507     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25508     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25509   %}
25510   ins_pipe( pipe_slow );
25511 %}
25512 
25513 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25514 %{
25515   predicate(!VM_Version::supports_avx10_2());
25516   match(Set dst (MaxHF src1 src2));
25517   match(Set dst (MinHF src1 src2));
25518   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520   ins_encode %{
25521     int opcode = this->ideal_Opcode();
25522     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25523                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25524   %}
25525   ins_pipe( pipe_slow );
25526 %}
25527 
25528 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25529 %{
25530   match(Set dst (FmaHF  src2 (Binary dst src1)));
25531   effect(DEF dst);
25532   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25533   ins_encode %{
25534     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25535   %}
25536   ins_pipe( pipe_slow );
25537 %}
25538 
25539 
25540 instruct vector_sqrt_HF_reg(vec dst, vec src)
25541 %{
25542   match(Set dst (SqrtVHF src));
25543   format %{ "vector_sqrt_fp16 $dst, $src" %}
25544   ins_encode %{
25545     int vlen_enc = vector_length_encoding(this);
25546     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25547   %}
25548   ins_pipe(pipe_slow);
25549 %}
25550 
25551 instruct vector_sqrt_HF_mem(vec dst, memory src)
25552 %{
25553   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25554   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25555   ins_encode %{
25556     int vlen_enc = vector_length_encoding(this);
25557     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25558   %}
25559   ins_pipe(pipe_slow);
25560 %}
25561 
25562 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25563 %{
25564   match(Set dst (AddVHF src1 src2));
25565   match(Set dst (DivVHF src1 src2));
25566   match(Set dst (MulVHF src1 src2));
25567   match(Set dst (SubVHF src1 src2));
25568   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25569   ins_encode %{
25570     int vlen_enc = vector_length_encoding(this);
25571     int opcode = this->ideal_Opcode();
25572     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25573   %}
25574   ins_pipe(pipe_slow);
25575 %}
25576 
25577 
25578 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25579 %{
25580   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25581   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25582   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25583   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25584   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25585   ins_encode %{
25586     int vlen_enc = vector_length_encoding(this);
25587     int opcode = this->ideal_Opcode();
25588     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25589   %}
25590   ins_pipe(pipe_slow);
25591 %}
25592 
25593 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25594 %{
25595   match(Set dst (FmaVHF src2 (Binary dst src1)));
25596   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25597   ins_encode %{
25598     int vlen_enc = vector_length_encoding(this);
25599     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25600   %}
25601   ins_pipe( pipe_slow );
25602 %}
25603 
25604 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25605 %{
25606   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25607   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25608   ins_encode %{
25609     int vlen_enc = vector_length_encoding(this);
25610     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25611   %}
25612   ins_pipe( pipe_slow );
25613 %}
25614 
25615 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25616 %{
25617   predicate(VM_Version::supports_avx10_2());
25618   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25619   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25620   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25621   ins_encode %{
25622     int vlen_enc = vector_length_encoding(this);
25623     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25624     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25625   %}
25626   ins_pipe( pipe_slow );
25627 %}
25628 
25629 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25630 %{
25631   predicate(VM_Version::supports_avx10_2());
25632   match(Set dst (MinVHF src1 src2));
25633   match(Set dst (MaxVHF src1 src2));
25634   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25635   ins_encode %{
25636     int vlen_enc = vector_length_encoding(this);
25637     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25638     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25639   %}
25640   ins_pipe( pipe_slow );
25641 %}
25642 
25643 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25644 %{
25645   predicate(!VM_Version::supports_avx10_2());
25646   match(Set dst (MinVHF src1 src2));
25647   match(Set dst (MaxVHF src1 src2));
25648   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25649   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25650   ins_encode %{
25651     int vlen_enc = vector_length_encoding(this);
25652     int opcode = this->ideal_Opcode();
25653     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25654                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25655   %}
25656   ins_pipe( pipe_slow );
25657 %}
25658 
25659 //----------PEEPHOLE RULES-----------------------------------------------------
25660 // These must follow all instruction definitions as they use the names
25661 // defined in the instructions definitions.
25662 //
25663 // peeppredicate ( rule_predicate );
25664 // // the predicate unless which the peephole rule will be ignored
25665 //
25666 // peepmatch ( root_instr_name [preceding_instruction]* );
25667 //
25668 // peepprocedure ( procedure_name );
25669 // // provide a procedure name to perform the optimization, the procedure should
25670 // // reside in the architecture dependent peephole file, the method has the
25671 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25672 // // with the arguments being the basic block, the current node index inside the
25673 // // block, the register allocator, the functions upon invoked return a new node
25674 // // defined in peepreplace, and the rules of the nodes appearing in the
25675 // // corresponding peepmatch, the function return true if successful, else
25676 // // return false
25677 //
25678 // peepconstraint %{
25679 // (instruction_number.operand_name relational_op instruction_number.operand_name
25680 //  [, ...] );
25681 // // instruction numbers are zero-based using left to right order in peepmatch
25682 //
25683 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25684 // // provide an instruction_number.operand_name for each operand that appears
25685 // // in the replacement instruction's match rule
25686 //
25687 // ---------VM FLAGS---------------------------------------------------------
25688 //
25689 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25690 //
25691 // Each peephole rule is given an identifying number starting with zero and
25692 // increasing by one in the order seen by the parser.  An individual peephole
25693 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25694 // on the command-line.
25695 //
25696 // ---------CURRENT LIMITATIONS----------------------------------------------
25697 //
25698 // Only transformations inside a basic block (do we need more for peephole)
25699 //
25700 // ---------EXAMPLE----------------------------------------------------------
25701 //
25702 // // pertinent parts of existing instructions in architecture description
25703 // instruct movI(rRegI dst, rRegI src)
25704 // %{
25705 //   match(Set dst (CopyI src));
25706 // %}
25707 //
25708 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25709 // %{
25710 //   match(Set dst (AddI dst src));
25711 //   effect(KILL cr);
25712 // %}
25713 //
25714 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25715 // %{
25716 //   match(Set dst (AddI dst src));
25717 // %}
25718 //
25719 // 1. Simple replacement
25720 // - Only match adjacent instructions in same basic block
25721 // - Only equality constraints
25722 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25723 // - Only one replacement instruction
25724 //
25725 // // Change (inc mov) to lea
25726 // peephole %{
25727 //   // lea should only be emitted when beneficial
25728 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25729 //   // increment preceded by register-register move
25730 //   peepmatch ( incI_rReg movI );
25731 //   // require that the destination register of the increment
25732 //   // match the destination register of the move
25733 //   peepconstraint ( 0.dst == 1.dst );
25734 //   // construct a replacement instruction that sets
25735 //   // the destination to ( move's source register + one )
25736 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25737 // %}
25738 //
25739 // 2. Procedural replacement
25740 // - More flexible finding relevent nodes
25741 // - More flexible constraints
25742 // - More flexible transformations
25743 // - May utilise architecture-dependent API more effectively
25744 // - Currently only one replacement instruction due to adlc parsing capabilities
25745 //
25746 // // Change (inc mov) to lea
25747 // peephole %{
25748 //   // lea should only be emitted when beneficial
25749 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25750 //   // the rule numbers of these nodes inside are passed into the function below
25751 //   peepmatch ( incI_rReg movI );
25752 //   // the method that takes the responsibility of transformation
25753 //   peepprocedure ( inc_mov_to_lea );
25754 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25755 //   // node is passed into the function above
25756 //   peepreplace ( leaI_rReg_immI() );
25757 // %}
25758 
25759 // These instructions is not matched by the matcher but used by the peephole
25760 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25761 %{
25762   predicate(false);
25763   match(Set dst (AddI src1 src2));
25764   format %{ "leal    $dst, [$src1 + $src2]" %}
25765   ins_encode %{
25766     Register dst = $dst$$Register;
25767     Register src1 = $src1$$Register;
25768     Register src2 = $src2$$Register;
25769     if (src1 != rbp && src1 != r13) {
25770       __ leal(dst, Address(src1, src2, Address::times_1));
25771     } else {
25772       assert(src2 != rbp && src2 != r13, "");
25773       __ leal(dst, Address(src2, src1, Address::times_1));
25774     }
25775   %}
25776   ins_pipe(ialu_reg_reg);
25777 %}
25778 
25779 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25780 %{
25781   predicate(false);
25782   match(Set dst (AddI src1 src2));
25783   format %{ "leal    $dst, [$src1 + $src2]" %}
25784   ins_encode %{
25785     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25786   %}
25787   ins_pipe(ialu_reg_reg);
25788 %}
25789 
25790 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25791 %{
25792   predicate(false);
25793   match(Set dst (LShiftI src shift));
25794   format %{ "leal    $dst, [$src << $shift]" %}
25795   ins_encode %{
25796     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25797     Register src = $src$$Register;
25798     if (scale == Address::times_2 && src != rbp && src != r13) {
25799       __ leal($dst$$Register, Address(src, src, Address::times_1));
25800     } else {
25801       __ leal($dst$$Register, Address(noreg, src, scale));
25802     }
25803   %}
25804   ins_pipe(ialu_reg_reg);
25805 %}
25806 
25807 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25808 %{
25809   predicate(false);
25810   match(Set dst (AddL src1 src2));
25811   format %{ "leaq    $dst, [$src1 + $src2]" %}
25812   ins_encode %{
25813     Register dst = $dst$$Register;
25814     Register src1 = $src1$$Register;
25815     Register src2 = $src2$$Register;
25816     if (src1 != rbp && src1 != r13) {
25817       __ leaq(dst, Address(src1, src2, Address::times_1));
25818     } else {
25819       assert(src2 != rbp && src2 != r13, "");
25820       __ leaq(dst, Address(src2, src1, Address::times_1));
25821     }
25822   %}
25823   ins_pipe(ialu_reg_reg);
25824 %}
25825 
25826 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25827 %{
25828   predicate(false);
25829   match(Set dst (AddL src1 src2));
25830   format %{ "leaq    $dst, [$src1 + $src2]" %}
25831   ins_encode %{
25832     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25833   %}
25834   ins_pipe(ialu_reg_reg);
25835 %}
25836 
25837 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25838 %{
25839   predicate(false);
25840   match(Set dst (LShiftL src shift));
25841   format %{ "leaq    $dst, [$src << $shift]" %}
25842   ins_encode %{
25843     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25844     Register src = $src$$Register;
25845     if (scale == Address::times_2 && src != rbp && src != r13) {
25846       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25847     } else {
25848       __ leaq($dst$$Register, Address(noreg, src, scale));
25849     }
25850   %}
25851   ins_pipe(ialu_reg_reg);
25852 %}
25853 
25854 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25855 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25856 // processors with at least partial ALU support for lea
25857 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25858 // beneficial for processors with full ALU support
25859 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25860 
25861 peephole
25862 %{
25863   peeppredicate(VM_Version::supports_fast_2op_lea());
25864   peepmatch (addI_rReg);
25865   peepprocedure (lea_coalesce_reg);
25866   peepreplace (leaI_rReg_rReg_peep());
25867 %}
25868 
25869 peephole
25870 %{
25871   peeppredicate(VM_Version::supports_fast_2op_lea());
25872   peepmatch (addI_rReg_imm);
25873   peepprocedure (lea_coalesce_imm);
25874   peepreplace (leaI_rReg_immI_peep());
25875 %}
25876 
25877 peephole
25878 %{
25879   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25880                 VM_Version::is_intel_cascade_lake());
25881   peepmatch (incI_rReg);
25882   peepprocedure (lea_coalesce_imm);
25883   peepreplace (leaI_rReg_immI_peep());
25884 %}
25885 
25886 peephole
25887 %{
25888   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25889                 VM_Version::is_intel_cascade_lake());
25890   peepmatch (decI_rReg);
25891   peepprocedure (lea_coalesce_imm);
25892   peepreplace (leaI_rReg_immI_peep());
25893 %}
25894 
25895 peephole
25896 %{
25897   peeppredicate(VM_Version::supports_fast_2op_lea());
25898   peepmatch (salI_rReg_immI2);
25899   peepprocedure (lea_coalesce_imm);
25900   peepreplace (leaI_rReg_immI2_peep());
25901 %}
25902 
25903 peephole
25904 %{
25905   peeppredicate(VM_Version::supports_fast_2op_lea());
25906   peepmatch (addL_rReg);
25907   peepprocedure (lea_coalesce_reg);
25908   peepreplace (leaL_rReg_rReg_peep());
25909 %}
25910 
25911 peephole
25912 %{
25913   peeppredicate(VM_Version::supports_fast_2op_lea());
25914   peepmatch (addL_rReg_imm);
25915   peepprocedure (lea_coalesce_imm);
25916   peepreplace (leaL_rReg_immL32_peep());
25917 %}
25918 
25919 peephole
25920 %{
25921   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25922                 VM_Version::is_intel_cascade_lake());
25923   peepmatch (incL_rReg);
25924   peepprocedure (lea_coalesce_imm);
25925   peepreplace (leaL_rReg_immL32_peep());
25926 %}
25927 
25928 peephole
25929 %{
25930   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25931                 VM_Version::is_intel_cascade_lake());
25932   peepmatch (decL_rReg);
25933   peepprocedure (lea_coalesce_imm);
25934   peepreplace (leaL_rReg_immL32_peep());
25935 %}
25936 
25937 peephole
25938 %{
25939   peeppredicate(VM_Version::supports_fast_2op_lea());
25940   peepmatch (salL_rReg_immI2);
25941   peepprocedure (lea_coalesce_imm);
25942   peepreplace (leaL_rReg_immI2_peep());
25943 %}
25944 
25945 peephole
25946 %{
25947   peepmatch (leaPCompressedOopOffset);
25948   peepprocedure (lea_remove_redundant);
25949 %}
25950 
25951 peephole
25952 %{
25953   peepmatch (leaP8Narrow);
25954   peepprocedure (lea_remove_redundant);
25955 %}
25956 
25957 peephole
25958 %{
25959   peepmatch (leaP32Narrow);
25960   peepprocedure (lea_remove_redundant);
25961 %}
25962 
25963 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25964 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25965 
25966 //int variant
25967 peephole
25968 %{
25969   peepmatch (testI_reg);
25970   peepprocedure (test_may_remove);
25971 %}
25972 
25973 //long variant
25974 peephole
25975 %{
25976   peepmatch (testL_reg);
25977   peepprocedure (test_may_remove);
25978 %}
25979 
25980 
25981 //----------SMARTSPILL RULES---------------------------------------------------
25982 // These must follow all instruction definitions as they use the names
25983 // defined in the instructions definitions.