1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   Label done;
 1708   __ movl(dst, -1);
 1709   __ jcc(Assembler::parity, done);
 1710   __ jcc(Assembler::below, done);
 1711   __ setcc(Assembler::notEqual, dst);
 1712   __ bind(done);
 1713 }
 1714 
 1715 // Math.min()    # Math.max()
 1716 // --------------------------
 1717 // ucomis[s/d]   #
 1718 // ja   -> b     # a
 1719 // jp   -> NaN   # NaN
 1720 // jb   -> a     # b
 1721 // je            #
 1722 // |-jz -> a | b # a & b
 1723 // |    -> a     #
 1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1725                             XMMRegister a, XMMRegister b,
 1726                             XMMRegister xmmt, Register rt,
 1727                             bool min, bool single) {
 1728 
 1729   Label nan, zero, below, above, done;
 1730 
 1731   if (single)
 1732     __ ucomiss(a, b);
 1733   else
 1734     __ ucomisd(a, b);
 1735 
 1736   if (dst->encoding() != (min ? b : a)->encoding())
 1737     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1738   else
 1739     __ jccb(Assembler::above, done);
 1740 
 1741   __ jccb(Assembler::parity, nan);  // PF=1
 1742   __ jccb(Assembler::below, below); // CF=1
 1743 
 1744   // equal
 1745   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1746   if (single) {
 1747     __ ucomiss(a, xmmt);
 1748     __ jccb(Assembler::equal, zero);
 1749 
 1750     __ movflt(dst, a);
 1751     __ jmp(done);
 1752   }
 1753   else {
 1754     __ ucomisd(a, xmmt);
 1755     __ jccb(Assembler::equal, zero);
 1756 
 1757     __ movdbl(dst, a);
 1758     __ jmp(done);
 1759   }
 1760 
 1761   __ bind(zero);
 1762   if (min)
 1763     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1764   else
 1765     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1766 
 1767   __ jmp(done);
 1768 
 1769   __ bind(above);
 1770   if (single)
 1771     __ movflt(dst, min ? b : a);
 1772   else
 1773     __ movdbl(dst, min ? b : a);
 1774 
 1775   __ jmp(done);
 1776 
 1777   __ bind(nan);
 1778   if (single) {
 1779     __ movl(rt, 0x7fc00000); // Float.NaN
 1780     __ movdl(dst, rt);
 1781   }
 1782   else {
 1783     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1784     __ movdq(dst, rt);
 1785   }
 1786   __ jmp(done);
 1787 
 1788   __ bind(below);
 1789   if (single)
 1790     __ movflt(dst, min ? a : b);
 1791   else
 1792     __ movdbl(dst, min ? a : b);
 1793 
 1794   __ bind(done);
 1795 }
 1796 
 1797 //=============================================================================
 1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1799 
 1800 int ConstantTable::calculate_table_base_offset() const {
 1801   return 0;  // absolute addressing, no offset
 1802 }
 1803 
 1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1806   ShouldNotReachHere();
 1807 }
 1808 
 1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1810   // Empty encoding
 1811 }
 1812 
 1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1814   return 0;
 1815 }
 1816 
 1817 #ifndef PRODUCT
 1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1819   st->print("# MachConstantBaseNode (empty encoding)");
 1820 }
 1821 #endif
 1822 
 1823 
 1824 //=============================================================================
 1825 #ifndef PRODUCT
 1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1827   Compile* C = ra_->C;
 1828 
 1829   int framesize = C->output()->frame_size_in_bytes();
 1830   int bangsize = C->output()->bang_size_in_bytes();
 1831   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1832   // Remove wordSize for return addr which is already pushed.
 1833   framesize -= wordSize;
 1834 
 1835   if (C->output()->need_stack_bang(bangsize)) {
 1836     framesize -= wordSize;
 1837     st->print("# stack bang (%d bytes)", bangsize);
 1838     st->print("\n\t");
 1839     st->print("pushq   rbp\t# Save rbp");
 1840     if (PreserveFramePointer) {
 1841         st->print("\n\t");
 1842         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1843     }
 1844     if (framesize) {
 1845       st->print("\n\t");
 1846       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1847     }
 1848   } else {
 1849     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1850     st->print("\n\t");
 1851     framesize -= wordSize;
 1852     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1853     if (PreserveFramePointer) {
 1854       st->print("\n\t");
 1855       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1856       if (framesize > 0) {
 1857         st->print("\n\t");
 1858         st->print("addq    rbp, #%d", framesize);
 1859       }
 1860     }
 1861   }
 1862 
 1863   if (VerifyStackAtCalls) {
 1864     st->print("\n\t");
 1865     framesize -= wordSize;
 1866     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1867 #ifdef ASSERT
 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);
 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();
 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 
 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP
 1928   framesize -= 2*wordSize;
 1929 
 1930   if (framesize) {
 1931     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1932     st->print("\t");
 1933   }
 1934 
 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 
 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 
 1996 static enum RC rc_class(OptoReg::Name reg)
 1997 {
 1998   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1999 
 2000   if (OptoReg::is_stack(reg)) return rc_stack;
 2001 
 2002   VMReg r = OptoReg::as_VMReg(reg);
 2003 
 2004   if (r->is_Register()) return rc_int;
 2005 
 2006   if (r->is_KRegister()) return rc_kreg;
 2007 
 2008   assert(r->is_XMMRegister(), "must be");
 2009   return rc_float;
 2010 }
 2011 
 2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2014                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2015 
 2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2017                      int stack_offset, int reg, uint ireg, outputStream* st);
 2018 
 2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2020                                       int dst_offset, uint ireg, outputStream* st) {
 2021   if (masm) {
 2022     switch (ireg) {
 2023     case Op_VecS:
 2024       __ movq(Address(rsp, -8), rax);
 2025       __ movl(rax, Address(rsp, src_offset));
 2026       __ movl(Address(rsp, dst_offset), rax);
 2027       __ movq(rax, Address(rsp, -8));
 2028       break;
 2029     case Op_VecD:
 2030       __ pushq(Address(rsp, src_offset));
 2031       __ popq (Address(rsp, dst_offset));
 2032       break;
 2033     case Op_VecX:
 2034       __ pushq(Address(rsp, src_offset));
 2035       __ popq (Address(rsp, dst_offset));
 2036       __ pushq(Address(rsp, src_offset+8));
 2037       __ popq (Address(rsp, dst_offset+8));
 2038       break;
 2039     case Op_VecY:
 2040       __ vmovdqu(Address(rsp, -32), xmm0);
 2041       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2042       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2043       __ vmovdqu(xmm0, Address(rsp, -32));
 2044       break;
 2045     case Op_VecZ:
 2046       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2047       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2048       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2049       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2050       break;
 2051     default:
 2052       ShouldNotReachHere();
 2053     }
 2054 #ifndef PRODUCT
 2055   } else {
 2056     switch (ireg) {
 2057     case Op_VecS:
 2058       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2059                 "movl    rax, [rsp + #%d]\n\t"
 2060                 "movl    [rsp + #%d], rax\n\t"
 2061                 "movq    rax, [rsp - #8]",
 2062                 src_offset, dst_offset);
 2063       break;
 2064     case Op_VecD:
 2065       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2066                 "popq    [rsp + #%d]",
 2067                 src_offset, dst_offset);
 2068       break;
 2069      case Op_VecX:
 2070       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2071                 "popq    [rsp + #%d]\n\t"
 2072                 "pushq   [rsp + #%d]\n\t"
 2073                 "popq    [rsp + #%d]",
 2074                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2075       break;
 2076     case Op_VecY:
 2077       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2078                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2079                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2080                 "vmovdqu xmm0, [rsp - #32]",
 2081                 src_offset, dst_offset);
 2082       break;
 2083     case Op_VecZ:
 2084       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2085                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2086                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2087                 "vmovdqu xmm0, [rsp - #64]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     default:
 2091       ShouldNotReachHere();
 2092     }
 2093 #endif
 2094   }
 2095 }
 2096 
 2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2098                                        PhaseRegAlloc* ra_,
 2099                                        bool do_size,
 2100                                        outputStream* st) const {
 2101   assert(masm != nullptr || st  != nullptr, "sanity");
 2102   // Get registers to move
 2103   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2104   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2105   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2106   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2107 
 2108   enum RC src_second_rc = rc_class(src_second);
 2109   enum RC src_first_rc = rc_class(src_first);
 2110   enum RC dst_second_rc = rc_class(dst_second);
 2111   enum RC dst_first_rc = rc_class(dst_first);
 2112 
 2113   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2114          "must move at least 1 register" );
 2115 
 2116   if (src_first == dst_first && src_second == dst_second) {
 2117     // Self copy, no move
 2118     return 0;
 2119   }
 2120   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2121     uint ireg = ideal_reg();
 2122     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2123     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2124     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2125       // mem -> mem
 2126       int src_offset = ra_->reg2offset(src_first);
 2127       int dst_offset = ra_->reg2offset(dst_first);
 2128       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2129     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2130       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2131     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2132       int stack_offset = ra_->reg2offset(dst_first);
 2133       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2134     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2135       int stack_offset = ra_->reg2offset(src_first);
 2136       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2137     } else {
 2138       ShouldNotReachHere();
 2139     }
 2140     return 0;
 2141   }
 2142   if (src_first_rc == rc_stack) {
 2143     // mem ->
 2144     if (dst_first_rc == rc_stack) {
 2145       // mem -> mem
 2146       assert(src_second != dst_first, "overlap");
 2147       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2148           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2149         // 64-bit
 2150         int src_offset = ra_->reg2offset(src_first);
 2151         int dst_offset = ra_->reg2offset(dst_first);
 2152         if (masm) {
 2153           __ pushq(Address(rsp, src_offset));
 2154           __ popq (Address(rsp, dst_offset));
 2155 #ifndef PRODUCT
 2156         } else {
 2157           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2158                     "popq    [rsp + #%d]",
 2159                      src_offset, dst_offset);
 2160 #endif
 2161         }
 2162       } else {
 2163         // 32-bit
 2164         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2165         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2166         // No pushl/popl, so:
 2167         int src_offset = ra_->reg2offset(src_first);
 2168         int dst_offset = ra_->reg2offset(dst_first);
 2169         if (masm) {
 2170           __ movq(Address(rsp, -8), rax);
 2171           __ movl(rax, Address(rsp, src_offset));
 2172           __ movl(Address(rsp, dst_offset), rax);
 2173           __ movq(rax, Address(rsp, -8));
 2174 #ifndef PRODUCT
 2175         } else {
 2176           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2177                     "movl    rax, [rsp + #%d]\n\t"
 2178                     "movl    [rsp + #%d], rax\n\t"
 2179                     "movq    rax, [rsp - #8]",
 2180                      src_offset, dst_offset);
 2181 #endif
 2182         }
 2183       }
 2184       return 0;
 2185     } else if (dst_first_rc == rc_int) {
 2186       // mem -> gpr
 2187       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2188           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2189         // 64-bit
 2190         int offset = ra_->reg2offset(src_first);
 2191         if (masm) {
 2192           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2193 #ifndef PRODUCT
 2194         } else {
 2195           st->print("movq    %s, [rsp + #%d]\t# spill",
 2196                      Matcher::regName[dst_first],
 2197                      offset);
 2198 #endif
 2199         }
 2200       } else {
 2201         // 32-bit
 2202         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2203         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2204         int offset = ra_->reg2offset(src_first);
 2205         if (masm) {
 2206           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movl    %s, [rsp + #%d]\t# spill",
 2210                      Matcher::regName[dst_first],
 2211                      offset);
 2212 #endif
 2213         }
 2214       }
 2215       return 0;
 2216     } else if (dst_first_rc == rc_float) {
 2217       // mem-> xmm
 2218       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2219           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2220         // 64-bit
 2221         int offset = ra_->reg2offset(src_first);
 2222         if (masm) {
 2223           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2224 #ifndef PRODUCT
 2225         } else {
 2226           st->print("%s  %s, [rsp + #%d]\t# spill",
 2227                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2228                      Matcher::regName[dst_first],
 2229                      offset);
 2230 #endif
 2231         }
 2232       } else {
 2233         // 32-bit
 2234         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2235         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2236         int offset = ra_->reg2offset(src_first);
 2237         if (masm) {
 2238           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2239 #ifndef PRODUCT
 2240         } else {
 2241           st->print("movss   %s, [rsp + #%d]\t# spill",
 2242                      Matcher::regName[dst_first],
 2243                      offset);
 2244 #endif
 2245         }
 2246       }
 2247       return 0;
 2248     } else if (dst_first_rc == rc_kreg) {
 2249       // mem -> kreg
 2250       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2251           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2252         // 64-bit
 2253         int offset = ra_->reg2offset(src_first);
 2254         if (masm) {
 2255           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2256 #ifndef PRODUCT
 2257         } else {
 2258           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2259                      Matcher::regName[dst_first],
 2260                      offset);
 2261 #endif
 2262         }
 2263       }
 2264       return 0;
 2265     }
 2266   } else if (src_first_rc == rc_int) {
 2267     // gpr ->
 2268     if (dst_first_rc == rc_stack) {
 2269       // gpr -> mem
 2270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2272         // 64-bit
 2273         int offset = ra_->reg2offset(dst_first);
 2274         if (masm) {
 2275           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2276 #ifndef PRODUCT
 2277         } else {
 2278           st->print("movq    [rsp + #%d], %s\t# spill",
 2279                      offset,
 2280                      Matcher::regName[src_first]);
 2281 #endif
 2282         }
 2283       } else {
 2284         // 32-bit
 2285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2287         int offset = ra_->reg2offset(dst_first);
 2288         if (masm) {
 2289           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2290 #ifndef PRODUCT
 2291         } else {
 2292           st->print("movl    [rsp + #%d], %s\t# spill",
 2293                      offset,
 2294                      Matcher::regName[src_first]);
 2295 #endif
 2296         }
 2297       }
 2298       return 0;
 2299     } else if (dst_first_rc == rc_int) {
 2300       // gpr -> gpr
 2301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2303         // 64-bit
 2304         if (masm) {
 2305           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2306                   as_Register(Matcher::_regEncode[src_first]));
 2307 #ifndef PRODUCT
 2308         } else {
 2309           st->print("movq    %s, %s\t# spill",
 2310                      Matcher::regName[dst_first],
 2311                      Matcher::regName[src_first]);
 2312 #endif
 2313         }
 2314         return 0;
 2315       } else {
 2316         // 32-bit
 2317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2319         if (masm) {
 2320           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2321                   as_Register(Matcher::_regEncode[src_first]));
 2322 #ifndef PRODUCT
 2323         } else {
 2324           st->print("movl    %s, %s\t# spill",
 2325                      Matcher::regName[dst_first],
 2326                      Matcher::regName[src_first]);
 2327 #endif
 2328         }
 2329         return 0;
 2330       }
 2331     } else if (dst_first_rc == rc_float) {
 2332       // gpr -> xmm
 2333       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2334           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2335         // 64-bit
 2336         if (masm) {
 2337           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2338 #ifndef PRODUCT
 2339         } else {
 2340           st->print("movdq   %s, %s\t# spill",
 2341                      Matcher::regName[dst_first],
 2342                      Matcher::regName[src_first]);
 2343 #endif
 2344         }
 2345       } else {
 2346         // 32-bit
 2347         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2348         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2349         if (masm) {
 2350           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2351 #ifndef PRODUCT
 2352         } else {
 2353           st->print("movdl   %s, %s\t# spill",
 2354                      Matcher::regName[dst_first],
 2355                      Matcher::regName[src_first]);
 2356 #endif
 2357         }
 2358       }
 2359       return 0;
 2360     } else if (dst_first_rc == rc_kreg) {
 2361       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2362           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2363         // 64-bit
 2364         if (masm) {
 2365           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2366   #ifndef PRODUCT
 2367         } else {
 2368            st->print("kmovq   %s, %s\t# spill",
 2369                        Matcher::regName[dst_first],
 2370                        Matcher::regName[src_first]);
 2371   #endif
 2372         }
 2373       }
 2374       Unimplemented();
 2375       return 0;
 2376     }
 2377   } else if (src_first_rc == rc_float) {
 2378     // xmm ->
 2379     if (dst_first_rc == rc_stack) {
 2380       // xmm -> mem
 2381       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2382           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2383         // 64-bit
 2384         int offset = ra_->reg2offset(dst_first);
 2385         if (masm) {
 2386           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2387 #ifndef PRODUCT
 2388         } else {
 2389           st->print("movsd   [rsp + #%d], %s\t# spill",
 2390                      offset,
 2391                      Matcher::regName[src_first]);
 2392 #endif
 2393         }
 2394       } else {
 2395         // 32-bit
 2396         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2397         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2398         int offset = ra_->reg2offset(dst_first);
 2399         if (masm) {
 2400           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2401 #ifndef PRODUCT
 2402         } else {
 2403           st->print("movss   [rsp + #%d], %s\t# spill",
 2404                      offset,
 2405                      Matcher::regName[src_first]);
 2406 #endif
 2407         }
 2408       }
 2409       return 0;
 2410     } else if (dst_first_rc == rc_int) {
 2411       // xmm -> gpr
 2412       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2413           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2414         // 64-bit
 2415         if (masm) {
 2416           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2417 #ifndef PRODUCT
 2418         } else {
 2419           st->print("movdq   %s, %s\t# spill",
 2420                      Matcher::regName[dst_first],
 2421                      Matcher::regName[src_first]);
 2422 #endif
 2423         }
 2424       } else {
 2425         // 32-bit
 2426         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2427         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2428         if (masm) {
 2429           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2430 #ifndef PRODUCT
 2431         } else {
 2432           st->print("movdl   %s, %s\t# spill",
 2433                      Matcher::regName[dst_first],
 2434                      Matcher::regName[src_first]);
 2435 #endif
 2436         }
 2437       }
 2438       return 0;
 2439     } else if (dst_first_rc == rc_float) {
 2440       // xmm -> xmm
 2441       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2442           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2443         // 64-bit
 2444         if (masm) {
 2445           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2446 #ifndef PRODUCT
 2447         } else {
 2448           st->print("%s  %s, %s\t# spill",
 2449                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2450                      Matcher::regName[dst_first],
 2451                      Matcher::regName[src_first]);
 2452 #endif
 2453         }
 2454       } else {
 2455         // 32-bit
 2456         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2457         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2458         if (masm) {
 2459           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2460 #ifndef PRODUCT
 2461         } else {
 2462           st->print("%s  %s, %s\t# spill",
 2463                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2464                      Matcher::regName[dst_first],
 2465                      Matcher::regName[src_first]);
 2466 #endif
 2467         }
 2468       }
 2469       return 0;
 2470     } else if (dst_first_rc == rc_kreg) {
 2471       assert(false, "Illegal spilling");
 2472       return 0;
 2473     }
 2474   } else if (src_first_rc == rc_kreg) {
 2475     if (dst_first_rc == rc_stack) {
 2476       // mem -> kreg
 2477       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2478           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2479         // 64-bit
 2480         int offset = ra_->reg2offset(dst_first);
 2481         if (masm) {
 2482           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2483 #ifndef PRODUCT
 2484         } else {
 2485           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2486                      offset,
 2487                      Matcher::regName[src_first]);
 2488 #endif
 2489         }
 2490       }
 2491       return 0;
 2492     } else if (dst_first_rc == rc_int) {
 2493       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2494           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2495         // 64-bit
 2496         if (masm) {
 2497           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2498 #ifndef PRODUCT
 2499         } else {
 2500          st->print("kmovq   %s, %s\t# spill",
 2501                      Matcher::regName[dst_first],
 2502                      Matcher::regName[src_first]);
 2503 #endif
 2504         }
 2505       }
 2506       Unimplemented();
 2507       return 0;
 2508     } else if (dst_first_rc == rc_kreg) {
 2509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2511         // 64-bit
 2512         if (masm) {
 2513           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2514 #ifndef PRODUCT
 2515         } else {
 2516          st->print("kmovq   %s, %s\t# spill",
 2517                      Matcher::regName[dst_first],
 2518                      Matcher::regName[src_first]);
 2519 #endif
 2520         }
 2521       }
 2522       return 0;
 2523     } else if (dst_first_rc == rc_float) {
 2524       assert(false, "Illegal spill");
 2525       return 0;
 2526     }
 2527   }
 2528 
 2529   assert(0," foo ");
 2530   Unimplemented();
 2531   return 0;
 2532 }
 2533 
 2534 #ifndef PRODUCT
 2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2536   implementation(nullptr, ra_, false, st);
 2537 }
 2538 #endif
 2539 
 2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2541   implementation(masm, ra_, false, nullptr);
 2542 }
 2543 
 2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2545   return MachNode::size(ra_);
 2546 }
 2547 
 2548 //=============================================================================
 2549 #ifndef PRODUCT
 2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2551 {
 2552   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2553   int reg = ra_->get_reg_first(this);
 2554   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2555             Matcher::regName[reg], offset);
 2556 }
 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2648   assert(EnableVectorSupport, "sanity");
 2649   int lo = XMM0_num;
 2650   int hi = XMM0b_num;
 2651   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2652   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2653   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2654   return OptoRegPair(hi, lo);
 2655 }
 2656 
 2657 // Is this branch offset short enough that a short branch can be used?
 2658 //
 2659 // NOTE: If the platform does not provide any short branch variants, then
 2660 //       this method should return false for offset 0.
 2661 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2662   // The passed offset is relative to address of the branch.
 2663   // On 86 a branch displacement is calculated relative to address
 2664   // of a next instruction.
 2665   offset -= br_size;
 2666 
 2667   // the short version of jmpConUCF2 contains multiple branches,
 2668   // making the reach slightly less
 2669   if (rule == jmpConUCF2_rule)
 2670     return (-126 <= offset && offset <= 125);
 2671   return (-128 <= offset && offset <= 127);
 2672 }
 2673 
 2674 // Return whether or not this register is ever used as an argument.
 2675 // This function is used on startup to build the trampoline stubs in
 2676 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2677 // call in the trampoline, and arguments in those registers not be
 2678 // available to the callee.
 2679 bool Matcher::can_be_java_arg(int reg)
 2680 {
 2681   return
 2682     reg ==  RDI_num || reg == RDI_H_num ||
 2683     reg ==  RSI_num || reg == RSI_H_num ||
 2684     reg ==  RDX_num || reg == RDX_H_num ||
 2685     reg ==  RCX_num || reg == RCX_H_num ||
 2686     reg ==   R8_num || reg ==  R8_H_num ||
 2687     reg ==   R9_num || reg ==  R9_H_num ||
 2688     reg ==  R12_num || reg == R12_H_num ||
 2689     reg == XMM0_num || reg == XMM0b_num ||
 2690     reg == XMM1_num || reg == XMM1b_num ||
 2691     reg == XMM2_num || reg == XMM2b_num ||
 2692     reg == XMM3_num || reg == XMM3b_num ||
 2693     reg == XMM4_num || reg == XMM4b_num ||
 2694     reg == XMM5_num || reg == XMM5b_num ||
 2695     reg == XMM6_num || reg == XMM6b_num ||
 2696     reg == XMM7_num || reg == XMM7b_num;
 2697 }
 2698 
 2699 bool Matcher::is_spillable_arg(int reg)
 2700 {
 2701   return can_be_java_arg(reg);
 2702 }
 2703 
 2704 uint Matcher::int_pressure_limit()
 2705 {
 2706   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2707 }
 2708 
 2709 uint Matcher::float_pressure_limit()
 2710 {
 2711   // After experiment around with different values, the following default threshold
 2712   // works best for LCM's register pressure scheduling on x64.
 2713   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2714   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2715   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2716 }
 2717 
 2718 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2719   // In 64 bit mode a code which use multiply when
 2720   // devisor is constant is faster than hardware
 2721   // DIV instruction (it uses MulHiL).
 2722   return false;
 2723 }
 2724 
 2725 // Register for DIVI projection of divmodI
 2726 const RegMask& Matcher::divI_proj_mask() {
 2727   return INT_RAX_REG_mask();
 2728 }
 2729 
 2730 // Register for MODI projection of divmodI
 2731 const RegMask& Matcher::modI_proj_mask() {
 2732   return INT_RDX_REG_mask();
 2733 }
 2734 
 2735 // Register for DIVL projection of divmodL
 2736 const RegMask& Matcher::divL_proj_mask() {
 2737   return LONG_RAX_REG_mask();
 2738 }
 2739 
 2740 // Register for MODL projection of divmodL
 2741 const RegMask& Matcher::modL_proj_mask() {
 2742   return LONG_RDX_REG_mask();
 2743 }
 2744 
 2745 %}
 2746 
 2747 source_hpp %{
 2748 // Header information of the source block.
 2749 // Method declarations/definitions which are used outside
 2750 // the ad-scope can conveniently be defined here.
 2751 //
 2752 // To keep related declarations/definitions/uses close together,
 2753 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2754 
 2755 #include "runtime/vm_version.hpp"
 2756 
 2757 class NativeJump;
 2758 
 2759 class CallStubImpl {
 2760 
 2761   //--------------------------------------------------------------
 2762   //---<  Used for optimization in Compile::shorten_branches  >---
 2763   //--------------------------------------------------------------
 2764 
 2765  public:
 2766   // Size of call trampoline stub.
 2767   static uint size_call_trampoline() {
 2768     return 0; // no call trampolines on this platform
 2769   }
 2770 
 2771   // number of relocations needed by a call trampoline stub
 2772   static uint reloc_call_trampoline() {
 2773     return 0; // no call trampolines on this platform
 2774   }
 2775 };
 2776 
 2777 class HandlerImpl {
 2778 
 2779  public:
 2780 
 2781   static int emit_exception_handler(C2_MacroAssembler *masm);
 2782   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2783 
 2784   static uint size_exception_handler() {
 2785     // NativeCall instruction size is the same as NativeJump.
 2786     // exception handler starts out as jump and can be patched to
 2787     // a call be deoptimization.  (4932387)
 2788     // Note that this value is also credited (in output.cpp) to
 2789     // the size of the code section.
 2790     return NativeJump::instruction_size;
 2791   }
 2792 
 2793   static uint size_deopt_handler() {
 2794     // three 5 byte instructions plus one move for unreachable address.
 2795     return 15+3;
 2796   }
 2797 };
 2798 
 2799 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2800   switch(bytes) {
 2801     case  4: // fall-through
 2802     case  8: // fall-through
 2803     case 16: return Assembler::AVX_128bit;
 2804     case 32: return Assembler::AVX_256bit;
 2805     case 64: return Assembler::AVX_512bit;
 2806 
 2807     default: {
 2808       ShouldNotReachHere();
 2809       return Assembler::AVX_NoVec;
 2810     }
 2811   }
 2812 }
 2813 
 2814 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2815   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2816 }
 2817 
 2818 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2819   uint def_idx = use->operand_index(opnd);
 2820   Node* def = use->in(def_idx);
 2821   return vector_length_encoding(def);
 2822 }
 2823 
 2824 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2825   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2826          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2827 }
 2828 
 2829 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2830   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2831            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2832 }
 2833 
 2834 class Node::PD {
 2835 public:
 2836   enum NodeFlags {
 2837     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2838     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2839     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2840     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2841     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2842     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2843     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2844     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2845     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2846     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2847     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2848     _last_flag                = Flag_clears_sign_flag
 2849   };
 2850 };
 2851 
 2852 %} // end source_hpp
 2853 
 2854 source %{
 2855 
 2856 #include "opto/addnode.hpp"
 2857 #include "c2_intelJccErratum_x86.hpp"
 2858 
 2859 void PhaseOutput::pd_perform_mach_node_analysis() {
 2860   if (VM_Version::has_intel_jcc_erratum()) {
 2861     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2862     _buf_sizes._code += extra_padding;
 2863   }
 2864 }
 2865 
 2866 int MachNode::pd_alignment_required() const {
 2867   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2868     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2869     return IntelJccErratum::largest_jcc_size() + 1;
 2870   } else {
 2871     return 1;
 2872   }
 2873 }
 2874 
 2875 int MachNode::compute_padding(int current_offset) const {
 2876   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2877     Compile* C = Compile::current();
 2878     PhaseOutput* output = C->output();
 2879     Block* block = output->block();
 2880     int index = output->index();
 2881     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2882   } else {
 2883     return 0;
 2884   }
 2885 }
 2886 
 2887 // Emit exception handler code.
 2888 // Stuff framesize into a register and call a VM stub routine.
 2889 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
 2890 
 2891   // Note that the code buffer's insts_mark is always relative to insts.
 2892   // That's why we must use the macroassembler to generate a handler.
 2893   address base = __ start_a_stub(size_exception_handler());
 2894   if (base == nullptr) {
 2895     ciEnv::current()->record_failure("CodeCache is full");
 2896     return 0;  // CodeBuffer::expand failed
 2897   }
 2898   int offset = __ offset();
 2899   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2900   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2901   __ end_a_stub();
 2902   return offset;
 2903 }
 2904 
 2905 // Emit deopt handler code.
 2906 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2907 
 2908   // Note that the code buffer's insts_mark is always relative to insts.
 2909   // That's why we must use the macroassembler to generate a handler.
 2910   address base = __ start_a_stub(size_deopt_handler());
 2911   if (base == nullptr) {
 2912     ciEnv::current()->record_failure("CodeCache is full");
 2913     return 0;  // CodeBuffer::expand failed
 2914   }
 2915   int offset = __ offset();
 2916 
 2917   address the_pc = (address) __ pc();
 2918   Label next;
 2919   // push a "the_pc" on the stack without destroying any registers
 2920   // as they all may be live.
 2921 
 2922   // push address of "next"
 2923   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
 2924   __ bind(next);
 2925   // adjust it so it matches "the_pc"
 2926   __ subptr(Address(rsp, 0), __ offset() - offset);
 2927 
 2928   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2929   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2930   __ end_a_stub();
 2931   return offset;
 2932 }
 2933 
 2934 static Assembler::Width widthForType(BasicType bt) {
 2935   if (bt == T_BYTE) {
 2936     return Assembler::B;
 2937   } else if (bt == T_SHORT) {
 2938     return Assembler::W;
 2939   } else if (bt == T_INT) {
 2940     return Assembler::D;
 2941   } else {
 2942     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2943     return Assembler::Q;
 2944   }
 2945 }
 2946 
 2947 //=============================================================================
 2948 
 2949   // Float masks come from different places depending on platform.
 2950   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2951   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2952   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2953   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2954   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2955   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2956   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2957   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2958   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2959   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2960   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2961   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2962   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2963   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2964   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2965   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2966   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2967   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2968   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2969 
 2970 //=============================================================================
 2971 bool Matcher::match_rule_supported(int opcode) {
 2972   if (!has_match_rule(opcode)) {
 2973     return false; // no match rule present
 2974   }
 2975   switch (opcode) {
 2976     case Op_AbsVL:
 2977     case Op_StoreVectorScatter:
 2978       if (UseAVX < 3) {
 2979         return false;
 2980       }
 2981       break;
 2982     case Op_PopCountI:
 2983     case Op_PopCountL:
 2984       if (!UsePopCountInstruction) {
 2985         return false;
 2986       }
 2987       break;
 2988     case Op_PopCountVI:
 2989       if (UseAVX < 2) {
 2990         return false;
 2991       }
 2992       break;
 2993     case Op_CompressV:
 2994     case Op_ExpandV:
 2995     case Op_PopCountVL:
 2996       if (UseAVX < 2) {
 2997         return false;
 2998       }
 2999       break;
 3000     case Op_MulVI:
 3001       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3002         return false;
 3003       }
 3004       break;
 3005     case Op_MulVL:
 3006       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3007         return false;
 3008       }
 3009       break;
 3010     case Op_MulReductionVL:
 3011       if (VM_Version::supports_avx512dq() == false) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_AbsVB:
 3016     case Op_AbsVS:
 3017     case Op_AbsVI:
 3018     case Op_AddReductionVI:
 3019     case Op_AndReductionV:
 3020     case Op_OrReductionV:
 3021     case Op_XorReductionV:
 3022       if (UseSSE < 3) { // requires at least SSSE3
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_MaxHF:
 3027     case Op_MinHF:
 3028       if (!VM_Version::supports_avx512vlbw()) {
 3029         return false;
 3030       }  // fallthrough
 3031     case Op_AddHF:
 3032     case Op_DivHF:
 3033     case Op_FmaHF:
 3034     case Op_MulHF:
 3035     case Op_ReinterpretS2HF:
 3036     case Op_ReinterpretHF2S:
 3037     case Op_SubHF:
 3038     case Op_SqrtHF:
 3039       if (!VM_Version::supports_avx512_fp16()) {
 3040         return false;
 3041       }
 3042       break;
 3043     case Op_VectorLoadShuffle:
 3044     case Op_VectorRearrange:
 3045     case Op_MulReductionVI:
 3046       if (UseSSE < 4) { // requires at least SSE4
 3047         return false;
 3048       }
 3049       break;
 3050     case Op_IsInfiniteF:
 3051     case Op_IsInfiniteD:
 3052       if (!VM_Version::supports_avx512dq()) {
 3053         return false;
 3054       }
 3055       break;
 3056     case Op_SqrtVD:
 3057     case Op_SqrtVF:
 3058     case Op_VectorMaskCmp:
 3059     case Op_VectorCastB2X:
 3060     case Op_VectorCastS2X:
 3061     case Op_VectorCastI2X:
 3062     case Op_VectorCastL2X:
 3063     case Op_VectorCastF2X:
 3064     case Op_VectorCastD2X:
 3065     case Op_VectorUCastB2X:
 3066     case Op_VectorUCastS2X:
 3067     case Op_VectorUCastI2X:
 3068     case Op_VectorMaskCast:
 3069       if (UseAVX < 1) { // enabled for AVX only
 3070         return false;
 3071       }
 3072       break;
 3073     case Op_PopulateIndex:
 3074       if (UseAVX < 2) {
 3075         return false;
 3076       }
 3077       break;
 3078     case Op_RoundVF:
 3079       if (UseAVX < 2) { // enabled for AVX2 only
 3080         return false;
 3081       }
 3082       break;
 3083     case Op_RoundVD:
 3084       if (UseAVX < 3) {
 3085         return false;  // enabled for AVX3 only
 3086       }
 3087       break;
 3088     case Op_CompareAndSwapL:
 3089     case Op_CompareAndSwapP:
 3090       break;
 3091     case Op_StrIndexOf:
 3092       if (!UseSSE42Intrinsics) {
 3093         return false;
 3094       }
 3095       break;
 3096     case Op_StrIndexOfChar:
 3097       if (!UseSSE42Intrinsics) {
 3098         return false;
 3099       }
 3100       break;
 3101     case Op_OnSpinWait:
 3102       if (VM_Version::supports_on_spin_wait() == false) {
 3103         return false;
 3104       }
 3105       break;
 3106     case Op_MulVB:
 3107     case Op_LShiftVB:
 3108     case Op_RShiftVB:
 3109     case Op_URShiftVB:
 3110     case Op_VectorInsert:
 3111     case Op_VectorLoadMask:
 3112     case Op_VectorStoreMask:
 3113     case Op_VectorBlend:
 3114       if (UseSSE < 4) {
 3115         return false;
 3116       }
 3117       break;
 3118     case Op_MaxD:
 3119     case Op_MaxF:
 3120     case Op_MinD:
 3121     case Op_MinF:
 3122       if (UseAVX < 1) { // enabled for AVX only
 3123         return false;
 3124       }
 3125       break;
 3126     case Op_CacheWB:
 3127     case Op_CacheWBPreSync:
 3128     case Op_CacheWBPostSync:
 3129       if (!VM_Version::supports_data_cache_line_flush()) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_ExtractB:
 3134     case Op_ExtractL:
 3135     case Op_ExtractI:
 3136     case Op_RoundDoubleMode:
 3137       if (UseSSE < 4) {
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_RoundDoubleModeV:
 3142       if (VM_Version::supports_avx() == false) {
 3143         return false; // 128bit vroundpd is not available
 3144       }
 3145       break;
 3146     case Op_LoadVectorGather:
 3147     case Op_LoadVectorGatherMasked:
 3148       if (UseAVX < 2) {
 3149         return false;
 3150       }
 3151       break;
 3152     case Op_FmaF:
 3153     case Op_FmaD:
 3154     case Op_FmaVD:
 3155     case Op_FmaVF:
 3156       if (!UseFMA) {
 3157         return false;
 3158       }
 3159       break;
 3160     case Op_MacroLogicV:
 3161       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3162         return false;
 3163       }
 3164       break;
 3165 
 3166     case Op_VectorCmpMasked:
 3167     case Op_VectorMaskGen:
 3168       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3169         return false;
 3170       }
 3171       break;
 3172     case Op_VectorMaskFirstTrue:
 3173     case Op_VectorMaskLastTrue:
 3174     case Op_VectorMaskTrueCount:
 3175     case Op_VectorMaskToLong:
 3176       if (UseAVX < 1) {
 3177          return false;
 3178       }
 3179       break;
 3180     case Op_RoundF:
 3181     case Op_RoundD:
 3182       break;
 3183     case Op_CopySignD:
 3184     case Op_CopySignF:
 3185       if (UseAVX < 3)  {
 3186         return false;
 3187       }
 3188       if (!VM_Version::supports_avx512vl()) {
 3189         return false;
 3190       }
 3191       break;
 3192     case Op_CompressBits:
 3193     case Op_ExpandBits:
 3194       if (!VM_Version::supports_bmi2()) {
 3195         return false;
 3196       }
 3197       break;
 3198     case Op_CompressM:
 3199       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3200         return false;
 3201       }
 3202       break;
 3203     case Op_ConvF2HF:
 3204     case Op_ConvHF2F:
 3205       if (!VM_Version::supports_float16()) {
 3206         return false;
 3207       }
 3208       break;
 3209     case Op_VectorCastF2HF:
 3210     case Op_VectorCastHF2F:
 3211       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3212         return false;
 3213       }
 3214       break;
 3215   }
 3216   return true;  // Match rules are supported by default.
 3217 }
 3218 
 3219 //------------------------------------------------------------------------
 3220 
 3221 static inline bool is_pop_count_instr_target(BasicType bt) {
 3222   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3223          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3224 }
 3225 
 3226 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3227   return match_rule_supported_vector(opcode, vlen, bt);
 3228 }
 3229 
 3230 // Identify extra cases that we might want to provide match rules for vector nodes and
 3231 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3232 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3233   if (!match_rule_supported(opcode)) {
 3234     return false;
 3235   }
 3236   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3237   //   * SSE2 supports 128bit vectors for all types;
 3238   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3239   //   * AVX2 supports 256bit vectors for all types;
 3240   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3241   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3242   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3243   // And MaxVectorSize is taken into account as well.
 3244   if (!vector_size_supported(bt, vlen)) {
 3245     return false;
 3246   }
 3247   // Special cases which require vector length follow:
 3248   //   * implementation limitations
 3249   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3250   //   * 128bit vroundpd instruction is present only in AVX1
 3251   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3252   switch (opcode) {
 3253     case Op_MaxVHF:
 3254     case Op_MinVHF:
 3255       if (!VM_Version::supports_avx512bw()) {
 3256         return false;
 3257       }
 3258     case Op_AddVHF:
 3259     case Op_DivVHF:
 3260     case Op_FmaVHF:
 3261     case Op_MulVHF:
 3262     case Op_SubVHF:
 3263     case Op_SqrtVHF:
 3264       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3265         return false;
 3266       }
 3267       if (!VM_Version::supports_avx512_fp16()) {
 3268         return false;
 3269       }
 3270       break;
 3271     case Op_AbsVF:
 3272     case Op_NegVF:
 3273       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3274         return false; // 512bit vandps and vxorps are not available
 3275       }
 3276       break;
 3277     case Op_AbsVD:
 3278     case Op_NegVD:
 3279       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3280         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3281       }
 3282       break;
 3283     case Op_RotateRightV:
 3284     case Op_RotateLeftV:
 3285       if (bt != T_INT && bt != T_LONG) {
 3286         return false;
 3287       } // fallthrough
 3288     case Op_MacroLogicV:
 3289       if (!VM_Version::supports_evex() ||
 3290           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3291         return false;
 3292       }
 3293       break;
 3294     case Op_ClearArray:
 3295     case Op_VectorMaskGen:
 3296     case Op_VectorCmpMasked:
 3297       if (!VM_Version::supports_avx512bw()) {
 3298         return false;
 3299       }
 3300       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3301         return false;
 3302       }
 3303       break;
 3304     case Op_LoadVectorMasked:
 3305     case Op_StoreVectorMasked:
 3306       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3307         return false;
 3308       }
 3309       break;
 3310     case Op_UMinV:
 3311     case Op_UMaxV:
 3312       if (UseAVX == 0) {
 3313         return false;
 3314       }
 3315       break;
 3316     case Op_MaxV:
 3317     case Op_MinV:
 3318       if (UseSSE < 4 && is_integral_type(bt)) {
 3319         return false;
 3320       }
 3321       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3322           // Float/Double intrinsics are enabled for AVX family currently.
 3323           if (UseAVX == 0) {
 3324             return false;
 3325           }
 3326           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3327             return false;
 3328           }
 3329       }
 3330       break;
 3331     case Op_CallLeafVector:
 3332       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3333         return false;
 3334       }
 3335       break;
 3336     case Op_AddReductionVI:
 3337       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3338         return false;
 3339       }
 3340       // fallthrough
 3341     case Op_AndReductionV:
 3342     case Op_OrReductionV:
 3343     case Op_XorReductionV:
 3344       if (is_subword_type(bt) && (UseSSE < 4)) {
 3345         return false;
 3346       }
 3347       break;
 3348     case Op_MinReductionV:
 3349     case Op_MaxReductionV:
 3350       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3351         return false;
 3352       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3353         return false;
 3354       }
 3355       // Float/Double intrinsics enabled for AVX family.
 3356       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3357         return false;
 3358       }
 3359       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_VectorTest:
 3364       if (UseSSE < 4) {
 3365         return false; // Implementation limitation
 3366       } else if (size_in_bits < 32) {
 3367         return false; // Implementation limitation
 3368       }
 3369       break;
 3370     case Op_VectorLoadShuffle:
 3371     case Op_VectorRearrange:
 3372       if(vlen == 2) {
 3373         return false; // Implementation limitation due to how shuffle is loaded
 3374       } else if (size_in_bits == 256 && UseAVX < 2) {
 3375         return false; // Implementation limitation
 3376       }
 3377       break;
 3378     case Op_VectorLoadMask:
 3379     case Op_VectorMaskCast:
 3380       if (size_in_bits == 256 && UseAVX < 2) {
 3381         return false; // Implementation limitation
 3382       }
 3383       // fallthrough
 3384     case Op_VectorStoreMask:
 3385       if (vlen == 2) {
 3386         return false; // Implementation limitation
 3387       }
 3388       break;
 3389     case Op_PopulateIndex:
 3390       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3391         return false;
 3392       }
 3393       break;
 3394     case Op_VectorCastB2X:
 3395     case Op_VectorCastS2X:
 3396     case Op_VectorCastI2X:
 3397       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_VectorCastL2X:
 3402       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3403         return false;
 3404       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3405         return false;
 3406       }
 3407       break;
 3408     case Op_VectorCastF2X: {
 3409         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3410         // happen after intermediate conversion to integer and special handling
 3411         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3412         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3413         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3414           return false;
 3415         }
 3416       }
 3417       // fallthrough
 3418     case Op_VectorCastD2X:
 3419       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3420         return false;
 3421       }
 3422       break;
 3423     case Op_VectorCastF2HF:
 3424     case Op_VectorCastHF2F:
 3425       if (!VM_Version::supports_f16c() &&
 3426          ((!VM_Version::supports_evex() ||
 3427          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3428         return false;
 3429       }
 3430       break;
 3431     case Op_RoundVD:
 3432       if (!VM_Version::supports_avx512dq()) {
 3433         return false;
 3434       }
 3435       break;
 3436     case Op_MulReductionVI:
 3437       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3438         return false;
 3439       }
 3440       break;
 3441     case Op_LoadVectorGatherMasked:
 3442       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3443         return false;
 3444       }
 3445       if (is_subword_type(bt) &&
 3446          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3447           (size_in_bits < 64)                                      ||
 3448           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3449         return false;
 3450       }
 3451       break;
 3452     case Op_StoreVectorScatterMasked:
 3453     case Op_StoreVectorScatter:
 3454       if (is_subword_type(bt)) {
 3455         return false;
 3456       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3457         return false;
 3458       }
 3459       // fallthrough
 3460     case Op_LoadVectorGather:
 3461       if (!is_subword_type(bt) && size_in_bits == 64) {
 3462         return false;
 3463       }
 3464       if (is_subword_type(bt) && size_in_bits < 64) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_SaturatingAddV:
 3469     case Op_SaturatingSubV:
 3470       if (UseAVX < 1) {
 3471         return false; // Implementation limitation
 3472       }
 3473       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3474         return false;
 3475       }
 3476       break;
 3477     case Op_SelectFromTwoVector:
 3478        if (size_in_bits < 128) {
 3479          return false;
 3480        }
 3481        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3482          return false;
 3483        }
 3484        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3485          return false;
 3486        }
 3487        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3488          return false;
 3489        }
 3490        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3491          return false;
 3492        }
 3493        break;
 3494     case Op_MaskAll:
 3495       if (!VM_Version::supports_evex()) {
 3496         return false;
 3497       }
 3498       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3499         return false;
 3500       }
 3501       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3502         return false;
 3503       }
 3504       break;
 3505     case Op_VectorMaskCmp:
 3506       if (vlen < 2 || size_in_bits < 32) {
 3507         return false;
 3508       }
 3509       break;
 3510     case Op_CompressM:
 3511       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3512         return false;
 3513       }
 3514       break;
 3515     case Op_CompressV:
 3516     case Op_ExpandV:
 3517       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3518         return false;
 3519       }
 3520       if (size_in_bits < 128 ) {
 3521         return false;
 3522       }
 3523     case Op_VectorLongToMask:
 3524       if (UseAVX < 1) {
 3525         return false;
 3526       }
 3527       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3528         return false;
 3529       }
 3530       break;
 3531     case Op_SignumVD:
 3532     case Op_SignumVF:
 3533       if (UseAVX < 1) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_PopCountVI:
 3538     case Op_PopCountVL: {
 3539         if (!is_pop_count_instr_target(bt) &&
 3540             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3541           return false;
 3542         }
 3543       }
 3544       break;
 3545     case Op_ReverseV:
 3546     case Op_ReverseBytesV:
 3547       if (UseAVX < 2) {
 3548         return false;
 3549       }
 3550       break;
 3551     case Op_CountTrailingZerosV:
 3552     case Op_CountLeadingZerosV:
 3553       if (UseAVX < 2) {
 3554         return false;
 3555       }
 3556       break;
 3557   }
 3558   return true;  // Per default match rules are supported.
 3559 }
 3560 
 3561 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3562   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3563   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3564   // of their non-masked counterpart with mask edge being the differentiator.
 3565   // This routine does a strict check on the existence of masked operation patterns
 3566   // by returning a default false value for all the other opcodes apart from the
 3567   // ones whose masked instruction patterns are defined in this file.
 3568   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3569     return false;
 3570   }
 3571 
 3572   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3573   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3574     return false;
 3575   }
 3576   switch(opcode) {
 3577     // Unary masked operations
 3578     case Op_AbsVB:
 3579     case Op_AbsVS:
 3580       if(!VM_Version::supports_avx512bw()) {
 3581         return false;  // Implementation limitation
 3582       }
 3583     case Op_AbsVI:
 3584     case Op_AbsVL:
 3585       return true;
 3586 
 3587     // Ternary masked operations
 3588     case Op_FmaVF:
 3589     case Op_FmaVD:
 3590       return true;
 3591 
 3592     case Op_MacroLogicV:
 3593       if(bt != T_INT && bt != T_LONG) {
 3594         return false;
 3595       }
 3596       return true;
 3597 
 3598     // Binary masked operations
 3599     case Op_AddVB:
 3600     case Op_AddVS:
 3601     case Op_SubVB:
 3602     case Op_SubVS:
 3603     case Op_MulVS:
 3604     case Op_LShiftVS:
 3605     case Op_RShiftVS:
 3606     case Op_URShiftVS:
 3607       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3608       if (!VM_Version::supports_avx512bw()) {
 3609         return false;  // Implementation limitation
 3610       }
 3611       return true;
 3612 
 3613     case Op_MulVL:
 3614       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3615       if (!VM_Version::supports_avx512dq()) {
 3616         return false;  // Implementation limitation
 3617       }
 3618       return true;
 3619 
 3620     case Op_AndV:
 3621     case Op_OrV:
 3622     case Op_XorV:
 3623     case Op_RotateRightV:
 3624     case Op_RotateLeftV:
 3625       if (bt != T_INT && bt != T_LONG) {
 3626         return false; // Implementation limitation
 3627       }
 3628       return true;
 3629 
 3630     case Op_VectorLoadMask:
 3631       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3632       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3633         return false;
 3634       }
 3635       return true;
 3636 
 3637     case Op_AddVI:
 3638     case Op_AddVL:
 3639     case Op_AddVF:
 3640     case Op_AddVD:
 3641     case Op_SubVI:
 3642     case Op_SubVL:
 3643     case Op_SubVF:
 3644     case Op_SubVD:
 3645     case Op_MulVI:
 3646     case Op_MulVF:
 3647     case Op_MulVD:
 3648     case Op_DivVF:
 3649     case Op_DivVD:
 3650     case Op_SqrtVF:
 3651     case Op_SqrtVD:
 3652     case Op_LShiftVI:
 3653     case Op_LShiftVL:
 3654     case Op_RShiftVI:
 3655     case Op_RShiftVL:
 3656     case Op_URShiftVI:
 3657     case Op_URShiftVL:
 3658     case Op_LoadVectorMasked:
 3659     case Op_StoreVectorMasked:
 3660     case Op_LoadVectorGatherMasked:
 3661     case Op_StoreVectorScatterMasked:
 3662       return true;
 3663 
 3664     case Op_UMinV:
 3665     case Op_UMaxV:
 3666       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3667         return false;
 3668       } // fallthrough
 3669     case Op_MaxV:
 3670     case Op_MinV:
 3671       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3672         return false; // Implementation limitation
 3673       }
 3674       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3675         return false; // Implementation limitation
 3676       }
 3677       return true;
 3678     case Op_SaturatingAddV:
 3679     case Op_SaturatingSubV:
 3680       if (!is_subword_type(bt)) {
 3681         return false;
 3682       }
 3683       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3684         return false; // Implementation limitation
 3685       }
 3686       return true;
 3687 
 3688     case Op_VectorMaskCmp:
 3689       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3690         return false; // Implementation limitation
 3691       }
 3692       return true;
 3693 
 3694     case Op_VectorRearrange:
 3695       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3696         return false; // Implementation limitation
 3697       }
 3698       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3699         return false; // Implementation limitation
 3700       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3701         return false; // Implementation limitation
 3702       }
 3703       return true;
 3704 
 3705     // Binary Logical operations
 3706     case Op_AndVMask:
 3707     case Op_OrVMask:
 3708     case Op_XorVMask:
 3709       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3710         return false; // Implementation limitation
 3711       }
 3712       return true;
 3713 
 3714     case Op_PopCountVI:
 3715     case Op_PopCountVL:
 3716       if (!is_pop_count_instr_target(bt)) {
 3717         return false;
 3718       }
 3719       return true;
 3720 
 3721     case Op_MaskAll:
 3722       return true;
 3723 
 3724     case Op_CountLeadingZerosV:
 3725       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3726         return true;
 3727       }
 3728     default:
 3729       return false;
 3730   }
 3731 }
 3732 
 3733 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3734   return false;
 3735 }
 3736 
 3737 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3738 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3739   switch (elem_bt) {
 3740     case T_BYTE:  return false;
 3741     case T_SHORT: return !VM_Version::supports_avx512bw();
 3742     case T_INT:   return !VM_Version::supports_avx();
 3743     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3744     default:
 3745       ShouldNotReachHere();
 3746       return false;
 3747   }
 3748 }
 3749 
 3750 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3751   // Prefer predicate if the mask type is "TypeVectMask".
 3752   return vt->isa_vectmask() != nullptr;
 3753 }
 3754 
 3755 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3756   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3757   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3758   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3759       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3760     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3761     return new legVecZOper();
 3762   }
 3763   if (legacy) {
 3764     switch (ideal_reg) {
 3765       case Op_VecS: return new legVecSOper();
 3766       case Op_VecD: return new legVecDOper();
 3767       case Op_VecX: return new legVecXOper();
 3768       case Op_VecY: return new legVecYOper();
 3769       case Op_VecZ: return new legVecZOper();
 3770     }
 3771   } else {
 3772     switch (ideal_reg) {
 3773       case Op_VecS: return new vecSOper();
 3774       case Op_VecD: return new vecDOper();
 3775       case Op_VecX: return new vecXOper();
 3776       case Op_VecY: return new vecYOper();
 3777       case Op_VecZ: return new vecZOper();
 3778     }
 3779   }
 3780   ShouldNotReachHere();
 3781   return nullptr;
 3782 }
 3783 
 3784 bool Matcher::is_reg2reg_move(MachNode* m) {
 3785   switch (m->rule()) {
 3786     case MoveVec2Leg_rule:
 3787     case MoveLeg2Vec_rule:
 3788     case MoveF2VL_rule:
 3789     case MoveF2LEG_rule:
 3790     case MoveVL2F_rule:
 3791     case MoveLEG2F_rule:
 3792     case MoveD2VL_rule:
 3793     case MoveD2LEG_rule:
 3794     case MoveVL2D_rule:
 3795     case MoveLEG2D_rule:
 3796       return true;
 3797     default:
 3798       return false;
 3799   }
 3800 }
 3801 
 3802 bool Matcher::is_generic_vector(MachOper* opnd) {
 3803   switch (opnd->opcode()) {
 3804     case VEC:
 3805     case LEGVEC:
 3806       return true;
 3807     default:
 3808       return false;
 3809   }
 3810 }
 3811 
 3812 //------------------------------------------------------------------------
 3813 
 3814 const RegMask* Matcher::predicate_reg_mask(void) {
 3815   return &_VECTMASK_REG_mask;
 3816 }
 3817 
 3818 // Max vector size in bytes. 0 if not supported.
 3819 int Matcher::vector_width_in_bytes(BasicType bt) {
 3820   assert(is_java_primitive(bt), "only primitive type vectors");
 3821   // SSE2 supports 128bit vectors for all types.
 3822   // AVX2 supports 256bit vectors for all types.
 3823   // AVX2/EVEX supports 512bit vectors for all types.
 3824   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3825   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3826   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3827     size = (UseAVX > 2) ? 64 : 32;
 3828   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3829     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3830   // Use flag to limit vector size.
 3831   size = MIN2(size,(int)MaxVectorSize);
 3832   // Minimum 2 values in vector (or 4 for bytes).
 3833   switch (bt) {
 3834   case T_DOUBLE:
 3835   case T_LONG:
 3836     if (size < 16) return 0;
 3837     break;
 3838   case T_FLOAT:
 3839   case T_INT:
 3840     if (size < 8) return 0;
 3841     break;
 3842   case T_BOOLEAN:
 3843     if (size < 4) return 0;
 3844     break;
 3845   case T_CHAR:
 3846     if (size < 4) return 0;
 3847     break;
 3848   case T_BYTE:
 3849     if (size < 4) return 0;
 3850     break;
 3851   case T_SHORT:
 3852     if (size < 4) return 0;
 3853     break;
 3854   default:
 3855     ShouldNotReachHere();
 3856   }
 3857   return size;
 3858 }
 3859 
 3860 // Limits on vector size (number of elements) loaded into vector.
 3861 int Matcher::max_vector_size(const BasicType bt) {
 3862   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3863 }
 3864 int Matcher::min_vector_size(const BasicType bt) {
 3865   int max_size = max_vector_size(bt);
 3866   // Min size which can be loaded into vector is 4 bytes.
 3867   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3868   // Support for calling svml double64 vectors
 3869   if (bt == T_DOUBLE) {
 3870     size = 1;
 3871   }
 3872   return MIN2(size,max_size);
 3873 }
 3874 
 3875 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3876   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3877   // by default on Cascade Lake
 3878   if (VM_Version::is_default_intel_cascade_lake()) {
 3879     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3880   }
 3881   return Matcher::max_vector_size(bt);
 3882 }
 3883 
 3884 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3885   return -1;
 3886 }
 3887 
 3888 // Vector ideal reg corresponding to specified size in bytes
 3889 uint Matcher::vector_ideal_reg(int size) {
 3890   assert(MaxVectorSize >= size, "");
 3891   switch(size) {
 3892     case  4: return Op_VecS;
 3893     case  8: return Op_VecD;
 3894     case 16: return Op_VecX;
 3895     case 32: return Op_VecY;
 3896     case 64: return Op_VecZ;
 3897   }
 3898   ShouldNotReachHere();
 3899   return 0;
 3900 }
 3901 
 3902 // Check for shift by small constant as well
 3903 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3904   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3905       shift->in(2)->get_int() <= 3 &&
 3906       // Are there other uses besides address expressions?
 3907       !matcher->is_visited(shift)) {
 3908     address_visited.set(shift->_idx); // Flag as address_visited
 3909     mstack.push(shift->in(2), Matcher::Visit);
 3910     Node *conv = shift->in(1);
 3911     // Allow Matcher to match the rule which bypass
 3912     // ConvI2L operation for an array index on LP64
 3913     // if the index value is positive.
 3914     if (conv->Opcode() == Op_ConvI2L &&
 3915         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3916         // Are there other uses besides address expressions?
 3917         !matcher->is_visited(conv)) {
 3918       address_visited.set(conv->_idx); // Flag as address_visited
 3919       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3920     } else {
 3921       mstack.push(conv, Matcher::Pre_Visit);
 3922     }
 3923     return true;
 3924   }
 3925   return false;
 3926 }
 3927 
 3928 // This function identifies sub-graphs in which a 'load' node is
 3929 // input to two different nodes, and such that it can be matched
 3930 // with BMI instructions like blsi, blsr, etc.
 3931 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3932 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3933 // refers to the same node.
 3934 //
 3935 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3936 // This is a temporary solution until we make DAGs expressible in ADL.
 3937 template<typename ConType>
 3938 class FusedPatternMatcher {
 3939   Node* _op1_node;
 3940   Node* _mop_node;
 3941   int _con_op;
 3942 
 3943   static int match_next(Node* n, int next_op, int next_op_idx) {
 3944     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3945       return -1;
 3946     }
 3947 
 3948     if (next_op_idx == -1) { // n is commutative, try rotations
 3949       if (n->in(1)->Opcode() == next_op) {
 3950         return 1;
 3951       } else if (n->in(2)->Opcode() == next_op) {
 3952         return 2;
 3953       }
 3954     } else {
 3955       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3956       if (n->in(next_op_idx)->Opcode() == next_op) {
 3957         return next_op_idx;
 3958       }
 3959     }
 3960     return -1;
 3961   }
 3962 
 3963  public:
 3964   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3965     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3966 
 3967   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3968              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 3969              typename ConType::NativeType con_value) {
 3970     if (_op1_node->Opcode() != op1) {
 3971       return false;
 3972     }
 3973     if (_mop_node->outcnt() > 2) {
 3974       return false;
 3975     }
 3976     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 3977     if (op1_op2_idx == -1) {
 3978       return false;
 3979     }
 3980     // Memory operation must be the other edge
 3981     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 3982 
 3983     // Check that the mop node is really what we want
 3984     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 3985       Node* op2_node = _op1_node->in(op1_op2_idx);
 3986       if (op2_node->outcnt() > 1) {
 3987         return false;
 3988       }
 3989       assert(op2_node->Opcode() == op2, "Should be");
 3990       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 3991       if (op2_con_idx == -1) {
 3992         return false;
 3993       }
 3994       // Memory operation must be the other edge
 3995       int op2_mop_idx = (op2_con_idx & 1) + 1;
 3996       // Check that the memory operation is the same node
 3997       if (op2_node->in(op2_mop_idx) == _mop_node) {
 3998         // Now check the constant
 3999         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4000         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4001           return true;
 4002         }
 4003       }
 4004     }
 4005     return false;
 4006   }
 4007 };
 4008 
 4009 static bool is_bmi_pattern(Node* n, Node* m) {
 4010   assert(UseBMI1Instructions, "sanity");
 4011   if (n != nullptr && m != nullptr) {
 4012     if (m->Opcode() == Op_LoadI) {
 4013       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4014       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4015              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4016              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4017     } else if (m->Opcode() == Op_LoadL) {
 4018       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4019       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4020              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4021              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4022     }
 4023   }
 4024   return false;
 4025 }
 4026 
 4027 // Should the matcher clone input 'm' of node 'n'?
 4028 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4029   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4030   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4031     mstack.push(m, Visit);
 4032     return true;
 4033   }
 4034   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4035     mstack.push(m, Visit);           // m = ShiftCntV
 4036     return true;
 4037   }
 4038   if (is_encode_and_store_pattern(n, m)) {
 4039     mstack.push(m, Visit);
 4040     return true;
 4041   }
 4042   return false;
 4043 }
 4044 
 4045 // Should the Matcher clone shifts on addressing modes, expecting them
 4046 // to be subsumed into complex addressing expressions or compute them
 4047 // into registers?
 4048 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4049   Node *off = m->in(AddPNode::Offset);
 4050   if (off->is_Con()) {
 4051     address_visited.test_set(m->_idx); // Flag as address_visited
 4052     Node *adr = m->in(AddPNode::Address);
 4053 
 4054     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4055     // AtomicAdd is not an addressing expression.
 4056     // Cheap to find it by looking for screwy base.
 4057     if (adr->is_AddP() &&
 4058         !adr->in(AddPNode::Base)->is_top() &&
 4059         !adr->in(AddPNode::Offset)->is_Con() &&
 4060         off->get_long() == (int) (off->get_long()) && // immL32
 4061         // Are there other uses besides address expressions?
 4062         !is_visited(adr)) {
 4063       address_visited.set(adr->_idx); // Flag as address_visited
 4064       Node *shift = adr->in(AddPNode::Offset);
 4065       if (!clone_shift(shift, this, mstack, address_visited)) {
 4066         mstack.push(shift, Pre_Visit);
 4067       }
 4068       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4069       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4070     } else {
 4071       mstack.push(adr, Pre_Visit);
 4072     }
 4073 
 4074     // Clone X+offset as it also folds into most addressing expressions
 4075     mstack.push(off, Visit);
 4076     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4077     return true;
 4078   } else if (clone_shift(off, this, mstack, address_visited)) {
 4079     address_visited.test_set(m->_idx); // Flag as address_visited
 4080     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4081     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4082     return true;
 4083   }
 4084   return false;
 4085 }
 4086 
 4087 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4088   switch (bt) {
 4089     case BoolTest::eq:
 4090       return Assembler::eq;
 4091     case BoolTest::ne:
 4092       return Assembler::neq;
 4093     case BoolTest::le:
 4094     case BoolTest::ule:
 4095       return Assembler::le;
 4096     case BoolTest::ge:
 4097     case BoolTest::uge:
 4098       return Assembler::nlt;
 4099     case BoolTest::lt:
 4100     case BoolTest::ult:
 4101       return Assembler::lt;
 4102     case BoolTest::gt:
 4103     case BoolTest::ugt:
 4104       return Assembler::nle;
 4105     default : ShouldNotReachHere(); return Assembler::_false;
 4106   }
 4107 }
 4108 
 4109 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4110   switch (bt) {
 4111   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4112   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4113   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4114   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4115   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4116   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4117   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4118   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4119   }
 4120 }
 4121 
 4122 // Helper methods for MachSpillCopyNode::implementation().
 4123 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4124                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4125   assert(ireg == Op_VecS || // 32bit vector
 4126          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4127           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4128          "no non-adjacent vector moves" );
 4129   if (masm) {
 4130     switch (ireg) {
 4131     case Op_VecS: // copy whole register
 4132     case Op_VecD:
 4133     case Op_VecX:
 4134       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4135         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4136       } else {
 4137         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4138      }
 4139       break;
 4140     case Op_VecY:
 4141       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4142         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4143       } else {
 4144         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4145      }
 4146       break;
 4147     case Op_VecZ:
 4148       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4149       break;
 4150     default:
 4151       ShouldNotReachHere();
 4152     }
 4153 #ifndef PRODUCT
 4154   } else {
 4155     switch (ireg) {
 4156     case Op_VecS:
 4157     case Op_VecD:
 4158     case Op_VecX:
 4159       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4160       break;
 4161     case Op_VecY:
 4162     case Op_VecZ:
 4163       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4164       break;
 4165     default:
 4166       ShouldNotReachHere();
 4167     }
 4168 #endif
 4169   }
 4170 }
 4171 
 4172 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4173                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4174   if (masm) {
 4175     if (is_load) {
 4176       switch (ireg) {
 4177       case Op_VecS:
 4178         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4179         break;
 4180       case Op_VecD:
 4181         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4182         break;
 4183       case Op_VecX:
 4184         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4185           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4186         } else {
 4187           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4188           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4189         }
 4190         break;
 4191       case Op_VecY:
 4192         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4193           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4194         } else {
 4195           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4196           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4197         }
 4198         break;
 4199       case Op_VecZ:
 4200         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4201         break;
 4202       default:
 4203         ShouldNotReachHere();
 4204       }
 4205     } else { // store
 4206       switch (ireg) {
 4207       case Op_VecS:
 4208         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4209         break;
 4210       case Op_VecD:
 4211         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4212         break;
 4213       case Op_VecX:
 4214         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4215           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4216         }
 4217         else {
 4218           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4219         }
 4220         break;
 4221       case Op_VecY:
 4222         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4223           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4224         }
 4225         else {
 4226           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4227         }
 4228         break;
 4229       case Op_VecZ:
 4230         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4231         break;
 4232       default:
 4233         ShouldNotReachHere();
 4234       }
 4235     }
 4236 #ifndef PRODUCT
 4237   } else {
 4238     if (is_load) {
 4239       switch (ireg) {
 4240       case Op_VecS:
 4241         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4242         break;
 4243       case Op_VecD:
 4244         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4245         break;
 4246        case Op_VecX:
 4247         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4248         break;
 4249       case Op_VecY:
 4250       case Op_VecZ:
 4251         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4252         break;
 4253       default:
 4254         ShouldNotReachHere();
 4255       }
 4256     } else { // store
 4257       switch (ireg) {
 4258       case Op_VecS:
 4259         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4260         break;
 4261       case Op_VecD:
 4262         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4263         break;
 4264        case Op_VecX:
 4265         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4266         break;
 4267       case Op_VecY:
 4268       case Op_VecZ:
 4269         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4270         break;
 4271       default:
 4272         ShouldNotReachHere();
 4273       }
 4274     }
 4275 #endif
 4276   }
 4277 }
 4278 
 4279 template <class T>
 4280 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4281   int size = type2aelembytes(bt) * len;
 4282   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4283   for (int i = 0; i < len; i++) {
 4284     int offset = i * type2aelembytes(bt);
 4285     switch (bt) {
 4286       case T_BYTE: val->at(i) = con; break;
 4287       case T_SHORT: {
 4288         jshort c = con;
 4289         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4290         break;
 4291       }
 4292       case T_INT: {
 4293         jint c = con;
 4294         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4295         break;
 4296       }
 4297       case T_LONG: {
 4298         jlong c = con;
 4299         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4300         break;
 4301       }
 4302       case T_FLOAT: {
 4303         jfloat c = con;
 4304         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4305         break;
 4306       }
 4307       case T_DOUBLE: {
 4308         jdouble c = con;
 4309         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4310         break;
 4311       }
 4312       default: assert(false, "%s", type2name(bt));
 4313     }
 4314   }
 4315   return val;
 4316 }
 4317 
 4318 static inline jlong high_bit_set(BasicType bt) {
 4319   switch (bt) {
 4320     case T_BYTE:  return 0x8080808080808080;
 4321     case T_SHORT: return 0x8000800080008000;
 4322     case T_INT:   return 0x8000000080000000;
 4323     case T_LONG:  return 0x8000000000000000;
 4324     default:
 4325       ShouldNotReachHere();
 4326       return 0;
 4327   }
 4328 }
 4329 
 4330 #ifndef PRODUCT
 4331   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4332     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4333   }
 4334 #endif
 4335 
 4336   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4337     __ nop(_count);
 4338   }
 4339 
 4340   uint MachNopNode::size(PhaseRegAlloc*) const {
 4341     return _count;
 4342   }
 4343 
 4344 #ifndef PRODUCT
 4345   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4346     st->print("# breakpoint");
 4347   }
 4348 #endif
 4349 
 4350   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4351     __ int3();
 4352   }
 4353 
 4354   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4355     return MachNode::size(ra_);
 4356   }
 4357 
 4358 %}
 4359 
 4360 //----------ENCODING BLOCK-----------------------------------------------------
 4361 // This block specifies the encoding classes used by the compiler to
 4362 // output byte streams.  Encoding classes are parameterized macros
 4363 // used by Machine Instruction Nodes in order to generate the bit
 4364 // encoding of the instruction.  Operands specify their base encoding
 4365 // interface with the interface keyword.  There are currently
 4366 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4367 // COND_INTER.  REG_INTER causes an operand to generate a function
 4368 // which returns its register number when queried.  CONST_INTER causes
 4369 // an operand to generate a function which returns the value of the
 4370 // constant when queried.  MEMORY_INTER causes an operand to generate
 4371 // four functions which return the Base Register, the Index Register,
 4372 // the Scale Value, and the Offset Value of the operand when queried.
 4373 // COND_INTER causes an operand to generate six functions which return
 4374 // the encoding code (ie - encoding bits for the instruction)
 4375 // associated with each basic boolean condition for a conditional
 4376 // instruction.
 4377 //
 4378 // Instructions specify two basic values for encoding.  Again, a
 4379 // function is available to check if the constant displacement is an
 4380 // oop. They use the ins_encode keyword to specify their encoding
 4381 // classes (which must be a sequence of enc_class names, and their
 4382 // parameters, specified in the encoding block), and they use the
 4383 // opcode keyword to specify, in order, their primary, secondary, and
 4384 // tertiary opcode.  Only the opcode sections which a particular
 4385 // instruction needs for encoding need to be specified.
 4386 encode %{
 4387   enc_class cdql_enc(no_rax_rdx_RegI div)
 4388   %{
 4389     // Full implementation of Java idiv and irem; checks for
 4390     // special case as described in JVM spec., p.243 & p.271.
 4391     //
 4392     //         normal case                           special case
 4393     //
 4394     // input : rax: dividend                         min_int
 4395     //         reg: divisor                          -1
 4396     //
 4397     // output: rax: quotient  (= rax idiv reg)       min_int
 4398     //         rdx: remainder (= rax irem reg)       0
 4399     //
 4400     //  Code sequnce:
 4401     //
 4402     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4403     //    5:   75 07/08                jne    e <normal>
 4404     //    7:   33 d2                   xor    %edx,%edx
 4405     //  [div >= 8 -> offset + 1]
 4406     //  [REX_B]
 4407     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4408     //    c:   74 03/04                je     11 <done>
 4409     // 000000000000000e <normal>:
 4410     //    e:   99                      cltd
 4411     //  [div >= 8 -> offset + 1]
 4412     //  [REX_B]
 4413     //    f:   f7 f9                   idiv   $div
 4414     // 0000000000000011 <done>:
 4415     Label normal;
 4416     Label done;
 4417 
 4418     // cmp    $0x80000000,%eax
 4419     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4420 
 4421     // jne    e <normal>
 4422     __ jccb(Assembler::notEqual, normal);
 4423 
 4424     // xor    %edx,%edx
 4425     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4426 
 4427     // cmp    $0xffffffffffffffff,%ecx
 4428     __ cmpl($div$$Register, -1);
 4429 
 4430     // je     11 <done>
 4431     __ jccb(Assembler::equal, done);
 4432 
 4433     // <normal>
 4434     // cltd
 4435     __ bind(normal);
 4436     __ cdql();
 4437 
 4438     // idivl
 4439     // <done>
 4440     __ idivl($div$$Register);
 4441     __ bind(done);
 4442   %}
 4443 
 4444   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4445   %{
 4446     // Full implementation of Java ldiv and lrem; checks for
 4447     // special case as described in JVM spec., p.243 & p.271.
 4448     //
 4449     //         normal case                           special case
 4450     //
 4451     // input : rax: dividend                         min_long
 4452     //         reg: divisor                          -1
 4453     //
 4454     // output: rax: quotient  (= rax idiv reg)       min_long
 4455     //         rdx: remainder (= rax irem reg)       0
 4456     //
 4457     //  Code sequnce:
 4458     //
 4459     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4460     //    7:   00 00 80
 4461     //    a:   48 39 d0                cmp    %rdx,%rax
 4462     //    d:   75 08                   jne    17 <normal>
 4463     //    f:   33 d2                   xor    %edx,%edx
 4464     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4465     //   15:   74 05                   je     1c <done>
 4466     // 0000000000000017 <normal>:
 4467     //   17:   48 99                   cqto
 4468     //   19:   48 f7 f9                idiv   $div
 4469     // 000000000000001c <done>:
 4470     Label normal;
 4471     Label done;
 4472 
 4473     // mov    $0x8000000000000000,%rdx
 4474     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4475 
 4476     // cmp    %rdx,%rax
 4477     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4478 
 4479     // jne    17 <normal>
 4480     __ jccb(Assembler::notEqual, normal);
 4481 
 4482     // xor    %edx,%edx
 4483     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4484 
 4485     // cmp    $0xffffffffffffffff,$div
 4486     __ cmpq($div$$Register, -1);
 4487 
 4488     // je     1e <done>
 4489     __ jccb(Assembler::equal, done);
 4490 
 4491     // <normal>
 4492     // cqto
 4493     __ bind(normal);
 4494     __ cdqq();
 4495 
 4496     // idivq (note: must be emitted by the user of this rule)
 4497     // <done>
 4498     __ idivq($div$$Register);
 4499     __ bind(done);
 4500   %}
 4501 
 4502   enc_class clear_avx %{
 4503     DEBUG_ONLY(int off0 = __ offset());
 4504     if (generate_vzeroupper(Compile::current())) {
 4505       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4506       // Clear upper bits of YMM registers when current compiled code uses
 4507       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4508       __ vzeroupper();
 4509     }
 4510     DEBUG_ONLY(int off1 = __ offset());
 4511     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4512   %}
 4513 
 4514   enc_class Java_To_Runtime(method meth) %{
 4515     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4516     __ call(r10);
 4517     __ post_call_nop();
 4518   %}
 4519 
 4520   enc_class Java_Static_Call(method meth)
 4521   %{
 4522     // JAVA STATIC CALL
 4523     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4524     // determine who we intended to call.
 4525     if (!_method) {
 4526       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4527     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4528       // The NOP here is purely to ensure that eliding a call to
 4529       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4530       __ addr_nop_5();
 4531       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4532     } else {
 4533       int method_index = resolved_method_index(masm);
 4534       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4535                                                   : static_call_Relocation::spec(method_index);
 4536       address mark = __ pc();
 4537       int call_offset = __ offset();
 4538       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4539       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4540         // Calls of the same statically bound method can share
 4541         // a stub to the interpreter.
 4542         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4543       } else {
 4544         // Emit stubs for static call.
 4545         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4546         __ clear_inst_mark();
 4547         if (stub == nullptr) {
 4548           ciEnv::current()->record_failure("CodeCache is full");
 4549           return;
 4550         }
 4551       }
 4552     }
 4553     __ post_call_nop();
 4554   %}
 4555 
 4556   enc_class Java_Dynamic_Call(method meth) %{
 4557     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4558     __ post_call_nop();
 4559   %}
 4560 
 4561   enc_class call_epilog %{
 4562     if (VerifyStackAtCalls) {
 4563       // Check that stack depth is unchanged: find majik cookie on stack
 4564       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4565       Label L;
 4566       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4567       __ jccb(Assembler::equal, L);
 4568       // Die if stack mismatch
 4569       __ int3();
 4570       __ bind(L);
 4571     }
 4572     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4573       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4574       // Search for the corresponding projection, get the register and emit code that initialized it.
 4575       uint con = (tf()->range_cc()->cnt() - 1);
 4576       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4577         ProjNode* proj = fast_out(i)->as_Proj();
 4578         if (proj->_con == con) {
 4579           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4580           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4581           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4582           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4583           __ testq(rax, rax);
 4584           __ setb(Assembler::notZero, toReg);
 4585           __ movzbl(toReg, toReg);
 4586           if (reg->is_stack()) {
 4587             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4588             __ movq(Address(rsp, st_off), toReg);
 4589           }
 4590           break;
 4591         }
 4592       }
 4593       if (return_value_is_used()) {
 4594         // An inline type is returned as fields in multiple registers.
 4595         // Rax either contains an oop if the inline type is buffered or a pointer
 4596         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4597         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4598         // rax &= (rax & 1) - 1
 4599         __ movptr(rscratch1, rax);
 4600         __ andptr(rscratch1, 0x1);
 4601         __ subptr(rscratch1, 0x1);
 4602         __ andptr(rax, rscratch1);
 4603       }
 4604     }
 4605   %}
 4606 
 4607 %}
 4608 
 4609 //----------FRAME--------------------------------------------------------------
 4610 // Definition of frame structure and management information.
 4611 //
 4612 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4613 //                             |   (to get allocators register number
 4614 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4615 //  r   CALLER     |        |
 4616 //  o     |        +--------+      pad to even-align allocators stack-slot
 4617 //  w     V        |  pad0  |        numbers; owned by CALLER
 4618 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4619 //  h     ^        |   in   |  5
 4620 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4621 //  |     |        |        |  3
 4622 //  |     |        +--------+
 4623 //  V     |        | old out|      Empty on Intel, window on Sparc
 4624 //        |    old |preserve|      Must be even aligned.
 4625 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4626 //        |        |   in   |  3   area for Intel ret address
 4627 //     Owned by    |preserve|      Empty on Sparc.
 4628 //       SELF      +--------+
 4629 //        |        |  pad2  |  2   pad to align old SP
 4630 //        |        +--------+  1
 4631 //        |        | locks  |  0
 4632 //        |        +--------+----> OptoReg::stack0(), even aligned
 4633 //        |        |  pad1  | 11   pad to align new SP
 4634 //        |        +--------+
 4635 //        |        |        | 10
 4636 //        |        | spills |  9   spills
 4637 //        V        |        |  8   (pad0 slot for callee)
 4638 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4639 //        ^        |  out   |  7
 4640 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4641 //     Owned by    +--------+
 4642 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4643 //        |    new |preserve|      Must be even-aligned.
 4644 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4645 //        |        |        |
 4646 //
 4647 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4648 //         known from SELF's arguments and the Java calling convention.
 4649 //         Region 6-7 is determined per call site.
 4650 // Note 2: If the calling convention leaves holes in the incoming argument
 4651 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4652 //         are owned by the CALLEE.  Holes should not be necessary in the
 4653 //         incoming area, as the Java calling convention is completely under
 4654 //         the control of the AD file.  Doubles can be sorted and packed to
 4655 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4656 //         varargs C calling conventions.
 4657 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4658 //         even aligned with pad0 as needed.
 4659 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4660 //         region 6-11 is even aligned; it may be padded out more so that
 4661 //         the region from SP to FP meets the minimum stack alignment.
 4662 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4663 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4664 //         SP meets the minimum alignment.
 4665 
 4666 frame
 4667 %{
 4668   // These three registers define part of the calling convention
 4669   // between compiled code and the interpreter.
 4670   inline_cache_reg(RAX);                // Inline Cache Register
 4671 
 4672   // Optional: name the operand used by cisc-spilling to access
 4673   // [stack_pointer + offset]
 4674   cisc_spilling_operand_name(indOffset32);
 4675 
 4676   // Number of stack slots consumed by locking an object
 4677   sync_stack_slots(2);
 4678 
 4679   // Compiled code's Frame Pointer
 4680   frame_pointer(RSP);
 4681 
 4682   // Interpreter stores its frame pointer in a register which is
 4683   // stored to the stack by I2CAdaptors.
 4684   // I2CAdaptors convert from interpreted java to compiled java.
 4685   interpreter_frame_pointer(RBP);
 4686 
 4687   // Stack alignment requirement
 4688   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4689 
 4690   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4691   // for calls to C.  Supports the var-args backing area for register parms.
 4692   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4693 
 4694   // The after-PROLOG location of the return address.  Location of
 4695   // return address specifies a type (REG or STACK) and a number
 4696   // representing the register number (i.e. - use a register name) or
 4697   // stack slot.
 4698   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4699   // Otherwise, it is above the locks and verification slot and alignment word
 4700   return_addr(STACK - 2 +
 4701               align_up((Compile::current()->in_preserve_stack_slots() +
 4702                         Compile::current()->fixed_slots()),
 4703                        stack_alignment_in_slots()));
 4704 
 4705   // Location of compiled Java return values.  Same as C for now.
 4706   return_value
 4707   %{
 4708     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4709            "only return normal values");
 4710 
 4711     static const int lo[Op_RegL + 1] = {
 4712       0,
 4713       0,
 4714       RAX_num,  // Op_RegN
 4715       RAX_num,  // Op_RegI
 4716       RAX_num,  // Op_RegP
 4717       XMM0_num, // Op_RegF
 4718       XMM0_num, // Op_RegD
 4719       RAX_num   // Op_RegL
 4720     };
 4721     static const int hi[Op_RegL + 1] = {
 4722       0,
 4723       0,
 4724       OptoReg::Bad, // Op_RegN
 4725       OptoReg::Bad, // Op_RegI
 4726       RAX_H_num,    // Op_RegP
 4727       OptoReg::Bad, // Op_RegF
 4728       XMM0b_num,    // Op_RegD
 4729       RAX_H_num     // Op_RegL
 4730     };
 4731     // Excluded flags and vector registers.
 4732     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4733     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4734   %}
 4735 %}
 4736 
 4737 //----------ATTRIBUTES---------------------------------------------------------
 4738 //----------Operand Attributes-------------------------------------------------
 4739 op_attrib op_cost(0);        // Required cost attribute
 4740 
 4741 //----------Instruction Attributes---------------------------------------------
 4742 ins_attrib ins_cost(100);       // Required cost attribute
 4743 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4745                                 // a non-matching short branch variant
 4746                                 // of some long branch?
 4747 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4748                                 // be a power of 2) specifies the
 4749                                 // alignment that some part of the
 4750                                 // instruction (not necessarily the
 4751                                 // start) requires.  If > 1, a
 4752                                 // compute_padding() function must be
 4753                                 // provided for the instruction
 4754 
 4755 // Whether this node is expanded during code emission into a sequence of
 4756 // instructions and the first instruction can perform an implicit null check.
 4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4758 
 4759 //----------OPERANDS-----------------------------------------------------------
 4760 // Operand definitions must precede instruction definitions for correct parsing
 4761 // in the ADLC because operands constitute user defined types which are used in
 4762 // instruction definitions.
 4763 
 4764 //----------Simple Operands----------------------------------------------------
 4765 // Immediate Operands
 4766 // Integer Immediate
 4767 operand immI()
 4768 %{
 4769   match(ConI);
 4770 
 4771   op_cost(10);
 4772   format %{ %}
 4773   interface(CONST_INTER);
 4774 %}
 4775 
 4776 // Constant for test vs zero
 4777 operand immI_0()
 4778 %{
 4779   predicate(n->get_int() == 0);
 4780   match(ConI);
 4781 
 4782   op_cost(0);
 4783   format %{ %}
 4784   interface(CONST_INTER);
 4785 %}
 4786 
 4787 // Constant for increment
 4788 operand immI_1()
 4789 %{
 4790   predicate(n->get_int() == 1);
 4791   match(ConI);
 4792 
 4793   op_cost(0);
 4794   format %{ %}
 4795   interface(CONST_INTER);
 4796 %}
 4797 
 4798 // Constant for decrement
 4799 operand immI_M1()
 4800 %{
 4801   predicate(n->get_int() == -1);
 4802   match(ConI);
 4803 
 4804   op_cost(0);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 operand immI_2()
 4810 %{
 4811   predicate(n->get_int() == 2);
 4812   match(ConI);
 4813 
 4814   op_cost(0);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 operand immI_4()
 4820 %{
 4821   predicate(n->get_int() == 4);
 4822   match(ConI);
 4823 
 4824   op_cost(0);
 4825   format %{ %}
 4826   interface(CONST_INTER);
 4827 %}
 4828 
 4829 operand immI_8()
 4830 %{
 4831   predicate(n->get_int() == 8);
 4832   match(ConI);
 4833 
 4834   op_cost(0);
 4835   format %{ %}
 4836   interface(CONST_INTER);
 4837 %}
 4838 
 4839 // Valid scale values for addressing modes
 4840 operand immI2()
 4841 %{
 4842   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4843   match(ConI);
 4844 
 4845   format %{ %}
 4846   interface(CONST_INTER);
 4847 %}
 4848 
 4849 operand immU7()
 4850 %{
 4851   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4852   match(ConI);
 4853 
 4854   op_cost(5);
 4855   format %{ %}
 4856   interface(CONST_INTER);
 4857 %}
 4858 
 4859 operand immI8()
 4860 %{
 4861   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4862   match(ConI);
 4863 
 4864   op_cost(5);
 4865   format %{ %}
 4866   interface(CONST_INTER);
 4867 %}
 4868 
 4869 operand immU8()
 4870 %{
 4871   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4872   match(ConI);
 4873 
 4874   op_cost(5);
 4875   format %{ %}
 4876   interface(CONST_INTER);
 4877 %}
 4878 
 4879 operand immI16()
 4880 %{
 4881   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4882   match(ConI);
 4883 
 4884   op_cost(10);
 4885   format %{ %}
 4886   interface(CONST_INTER);
 4887 %}
 4888 
 4889 // Int Immediate non-negative
 4890 operand immU31()
 4891 %{
 4892   predicate(n->get_int() >= 0);
 4893   match(ConI);
 4894 
 4895   op_cost(0);
 4896   format %{ %}
 4897   interface(CONST_INTER);
 4898 %}
 4899 
 4900 // Pointer Immediate
 4901 operand immP()
 4902 %{
 4903   match(ConP);
 4904 
 4905   op_cost(10);
 4906   format %{ %}
 4907   interface(CONST_INTER);
 4908 %}
 4909 
 4910 // Null Pointer Immediate
 4911 operand immP0()
 4912 %{
 4913   predicate(n->get_ptr() == 0);
 4914   match(ConP);
 4915 
 4916   op_cost(5);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 // Pointer Immediate
 4922 operand immN() %{
 4923   match(ConN);
 4924 
 4925   op_cost(10);
 4926   format %{ %}
 4927   interface(CONST_INTER);
 4928 %}
 4929 
 4930 operand immNKlass() %{
 4931   match(ConNKlass);
 4932 
 4933   op_cost(10);
 4934   format %{ %}
 4935   interface(CONST_INTER);
 4936 %}
 4937 
 4938 // Null Pointer Immediate
 4939 operand immN0() %{
 4940   predicate(n->get_narrowcon() == 0);
 4941   match(ConN);
 4942 
 4943   op_cost(5);
 4944   format %{ %}
 4945   interface(CONST_INTER);
 4946 %}
 4947 
 4948 operand immP31()
 4949 %{
 4950   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4951             && (n->get_ptr() >> 31) == 0);
 4952   match(ConP);
 4953 
 4954   op_cost(5);
 4955   format %{ %}
 4956   interface(CONST_INTER);
 4957 %}
 4958 
 4959 
 4960 // Long Immediate
 4961 operand immL()
 4962 %{
 4963   match(ConL);
 4964 
 4965   op_cost(20);
 4966   format %{ %}
 4967   interface(CONST_INTER);
 4968 %}
 4969 
 4970 // Long Immediate 8-bit
 4971 operand immL8()
 4972 %{
 4973   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4974   match(ConL);
 4975 
 4976   op_cost(5);
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Long Immediate 32-bit unsigned
 4982 operand immUL32()
 4983 %{
 4984   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4985   match(ConL);
 4986 
 4987   op_cost(10);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 // Long Immediate 32-bit signed
 4993 operand immL32()
 4994 %{
 4995   predicate(n->get_long() == (int) (n->get_long()));
 4996   match(ConL);
 4997 
 4998   op_cost(15);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 operand immL_Pow2()
 5004 %{
 5005   predicate(is_power_of_2((julong)n->get_long()));
 5006   match(ConL);
 5007 
 5008   op_cost(15);
 5009   format %{ %}
 5010   interface(CONST_INTER);
 5011 %}
 5012 
 5013 operand immL_NotPow2()
 5014 %{
 5015   predicate(is_power_of_2((julong)~n->get_long()));
 5016   match(ConL);
 5017 
 5018   op_cost(15);
 5019   format %{ %}
 5020   interface(CONST_INTER);
 5021 %}
 5022 
 5023 // Long Immediate zero
 5024 operand immL0()
 5025 %{
 5026   predicate(n->get_long() == 0L);
 5027   match(ConL);
 5028 
 5029   op_cost(10);
 5030   format %{ %}
 5031   interface(CONST_INTER);
 5032 %}
 5033 
 5034 // Constant for increment
 5035 operand immL1()
 5036 %{
 5037   predicate(n->get_long() == 1);
 5038   match(ConL);
 5039 
 5040   format %{ %}
 5041   interface(CONST_INTER);
 5042 %}
 5043 
 5044 // Constant for decrement
 5045 operand immL_M1()
 5046 %{
 5047   predicate(n->get_long() == -1);
 5048   match(ConL);
 5049 
 5050   format %{ %}
 5051   interface(CONST_INTER);
 5052 %}
 5053 
 5054 // Long Immediate: low 32-bit mask
 5055 operand immL_32bits()
 5056 %{
 5057   predicate(n->get_long() == 0xFFFFFFFFL);
 5058   match(ConL);
 5059   op_cost(20);
 5060 
 5061   format %{ %}
 5062   interface(CONST_INTER);
 5063 %}
 5064 
 5065 // Int Immediate: 2^n-1, positive
 5066 operand immI_Pow2M1()
 5067 %{
 5068   predicate((n->get_int() > 0)
 5069             && is_power_of_2((juint)n->get_int() + 1));
 5070   match(ConI);
 5071 
 5072   op_cost(20);
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Float Immediate zero
 5078 operand immF0()
 5079 %{
 5080   predicate(jint_cast(n->getf()) == 0);
 5081   match(ConF);
 5082 
 5083   op_cost(5);
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Float Immediate
 5089 operand immF()
 5090 %{
 5091   match(ConF);
 5092 
 5093   op_cost(15);
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Half Float Immediate
 5099 operand immH()
 5100 %{
 5101   match(ConH);
 5102 
 5103   op_cost(15);
 5104   format %{ %}
 5105   interface(CONST_INTER);
 5106 %}
 5107 
 5108 // Double Immediate zero
 5109 operand immD0()
 5110 %{
 5111   predicate(jlong_cast(n->getd()) == 0);
 5112   match(ConD);
 5113 
 5114   op_cost(5);
 5115   format %{ %}
 5116   interface(CONST_INTER);
 5117 %}
 5118 
 5119 // Double Immediate
 5120 operand immD()
 5121 %{
 5122   match(ConD);
 5123 
 5124   op_cost(15);
 5125   format %{ %}
 5126   interface(CONST_INTER);
 5127 %}
 5128 
 5129 // Immediates for special shifts (sign extend)
 5130 
 5131 // Constants for increment
 5132 operand immI_16()
 5133 %{
 5134   predicate(n->get_int() == 16);
 5135   match(ConI);
 5136 
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 operand immI_24()
 5142 %{
 5143   predicate(n->get_int() == 24);
 5144   match(ConI);
 5145 
 5146   format %{ %}
 5147   interface(CONST_INTER);
 5148 %}
 5149 
 5150 // Constant for byte-wide masking
 5151 operand immI_255()
 5152 %{
 5153   predicate(n->get_int() == 255);
 5154   match(ConI);
 5155 
 5156   format %{ %}
 5157   interface(CONST_INTER);
 5158 %}
 5159 
 5160 // Constant for short-wide masking
 5161 operand immI_65535()
 5162 %{
 5163   predicate(n->get_int() == 65535);
 5164   match(ConI);
 5165 
 5166   format %{ %}
 5167   interface(CONST_INTER);
 5168 %}
 5169 
 5170 // Constant for byte-wide masking
 5171 operand immL_255()
 5172 %{
 5173   predicate(n->get_long() == 255);
 5174   match(ConL);
 5175 
 5176   format %{ %}
 5177   interface(CONST_INTER);
 5178 %}
 5179 
 5180 // Constant for short-wide masking
 5181 operand immL_65535()
 5182 %{
 5183   predicate(n->get_long() == 65535);
 5184   match(ConL);
 5185 
 5186   format %{ %}
 5187   interface(CONST_INTER);
 5188 %}
 5189 
 5190 operand kReg()
 5191 %{
 5192   constraint(ALLOC_IN_RC(vectmask_reg));
 5193   match(RegVectMask);
 5194   format %{%}
 5195   interface(REG_INTER);
 5196 %}
 5197 
 5198 // Register Operands
 5199 // Integer Register
 5200 operand rRegI()
 5201 %{
 5202   constraint(ALLOC_IN_RC(int_reg));
 5203   match(RegI);
 5204 
 5205   match(rax_RegI);
 5206   match(rbx_RegI);
 5207   match(rcx_RegI);
 5208   match(rdx_RegI);
 5209   match(rdi_RegI);
 5210 
 5211   format %{ %}
 5212   interface(REG_INTER);
 5213 %}
 5214 
 5215 // Special Registers
 5216 operand rax_RegI()
 5217 %{
 5218   constraint(ALLOC_IN_RC(int_rax_reg));
 5219   match(RegI);
 5220   match(rRegI);
 5221 
 5222   format %{ "RAX" %}
 5223   interface(REG_INTER);
 5224 %}
 5225 
 5226 // Special Registers
 5227 operand rbx_RegI()
 5228 %{
 5229   constraint(ALLOC_IN_RC(int_rbx_reg));
 5230   match(RegI);
 5231   match(rRegI);
 5232 
 5233   format %{ "RBX" %}
 5234   interface(REG_INTER);
 5235 %}
 5236 
 5237 operand rcx_RegI()
 5238 %{
 5239   constraint(ALLOC_IN_RC(int_rcx_reg));
 5240   match(RegI);
 5241   match(rRegI);
 5242 
 5243   format %{ "RCX" %}
 5244   interface(REG_INTER);
 5245 %}
 5246 
 5247 operand rdx_RegI()
 5248 %{
 5249   constraint(ALLOC_IN_RC(int_rdx_reg));
 5250   match(RegI);
 5251   match(rRegI);
 5252 
 5253   format %{ "RDX" %}
 5254   interface(REG_INTER);
 5255 %}
 5256 
 5257 operand rdi_RegI()
 5258 %{
 5259   constraint(ALLOC_IN_RC(int_rdi_reg));
 5260   match(RegI);
 5261   match(rRegI);
 5262 
 5263   format %{ "RDI" %}
 5264   interface(REG_INTER);
 5265 %}
 5266 
 5267 operand no_rax_rdx_RegI()
 5268 %{
 5269   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5270   match(RegI);
 5271   match(rbx_RegI);
 5272   match(rcx_RegI);
 5273   match(rdi_RegI);
 5274 
 5275   format %{ %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 operand no_rbp_r13_RegI()
 5280 %{
 5281   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5282   match(RegI);
 5283   match(rRegI);
 5284   match(rax_RegI);
 5285   match(rbx_RegI);
 5286   match(rcx_RegI);
 5287   match(rdx_RegI);
 5288   match(rdi_RegI);
 5289 
 5290   format %{ %}
 5291   interface(REG_INTER);
 5292 %}
 5293 
 5294 // Pointer Register
 5295 operand any_RegP()
 5296 %{
 5297   constraint(ALLOC_IN_RC(any_reg));
 5298   match(RegP);
 5299   match(rax_RegP);
 5300   match(rbx_RegP);
 5301   match(rdi_RegP);
 5302   match(rsi_RegP);
 5303   match(rbp_RegP);
 5304   match(r15_RegP);
 5305   match(rRegP);
 5306 
 5307   format %{ %}
 5308   interface(REG_INTER);
 5309 %}
 5310 
 5311 operand rRegP()
 5312 %{
 5313   constraint(ALLOC_IN_RC(ptr_reg));
 5314   match(RegP);
 5315   match(rax_RegP);
 5316   match(rbx_RegP);
 5317   match(rdi_RegP);
 5318   match(rsi_RegP);
 5319   match(rbp_RegP);  // See Q&A below about
 5320   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5321 
 5322   format %{ %}
 5323   interface(REG_INTER);
 5324 %}
 5325 
 5326 operand rRegN() %{
 5327   constraint(ALLOC_IN_RC(int_reg));
 5328   match(RegN);
 5329 
 5330   format %{ %}
 5331   interface(REG_INTER);
 5332 %}
 5333 
 5334 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5335 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5336 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5337 // The output of an instruction is controlled by the allocator, which respects
 5338 // register class masks, not match rules.  Unless an instruction mentions
 5339 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5340 // by the allocator as an input.
 5341 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5342 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5343 // result, RBP is not included in the output of the instruction either.
 5344 
 5345 // This operand is not allowed to use RBP even if
 5346 // RBP is not used to hold the frame pointer.
 5347 operand no_rbp_RegP()
 5348 %{
 5349   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5350   match(RegP);
 5351   match(rbx_RegP);
 5352   match(rsi_RegP);
 5353   match(rdi_RegP);
 5354 
 5355   format %{ %}
 5356   interface(REG_INTER);
 5357 %}
 5358 
 5359 // Special Registers
 5360 // Return a pointer value
 5361 operand rax_RegP()
 5362 %{
 5363   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5364   match(RegP);
 5365   match(rRegP);
 5366 
 5367   format %{ %}
 5368   interface(REG_INTER);
 5369 %}
 5370 
 5371 // Special Registers
 5372 // Return a compressed pointer value
 5373 operand rax_RegN()
 5374 %{
 5375   constraint(ALLOC_IN_RC(int_rax_reg));
 5376   match(RegN);
 5377   match(rRegN);
 5378 
 5379   format %{ %}
 5380   interface(REG_INTER);
 5381 %}
 5382 
 5383 // Used in AtomicAdd
 5384 operand rbx_RegP()
 5385 %{
 5386   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5387   match(RegP);
 5388   match(rRegP);
 5389 
 5390   format %{ %}
 5391   interface(REG_INTER);
 5392 %}
 5393 
 5394 operand rsi_RegP()
 5395 %{
 5396   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5397   match(RegP);
 5398   match(rRegP);
 5399 
 5400   format %{ %}
 5401   interface(REG_INTER);
 5402 %}
 5403 
 5404 operand rbp_RegP()
 5405 %{
 5406   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5407   match(RegP);
 5408   match(rRegP);
 5409 
 5410   format %{ %}
 5411   interface(REG_INTER);
 5412 %}
 5413 
 5414 // Used in rep stosq
 5415 operand rdi_RegP()
 5416 %{
 5417   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5418   match(RegP);
 5419   match(rRegP);
 5420 
 5421   format %{ %}
 5422   interface(REG_INTER);
 5423 %}
 5424 
 5425 operand r15_RegP()
 5426 %{
 5427   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5428   match(RegP);
 5429   match(rRegP);
 5430 
 5431   format %{ %}
 5432   interface(REG_INTER);
 5433 %}
 5434 
 5435 operand rRegL()
 5436 %{
 5437   constraint(ALLOC_IN_RC(long_reg));
 5438   match(RegL);
 5439   match(rax_RegL);
 5440   match(rdx_RegL);
 5441 
 5442   format %{ %}
 5443   interface(REG_INTER);
 5444 %}
 5445 
 5446 // Special Registers
 5447 operand no_rax_rdx_RegL()
 5448 %{
 5449   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5450   match(RegL);
 5451   match(rRegL);
 5452 
 5453   format %{ %}
 5454   interface(REG_INTER);
 5455 %}
 5456 
 5457 operand rax_RegL()
 5458 %{
 5459   constraint(ALLOC_IN_RC(long_rax_reg));
 5460   match(RegL);
 5461   match(rRegL);
 5462 
 5463   format %{ "RAX" %}
 5464   interface(REG_INTER);
 5465 %}
 5466 
 5467 operand rcx_RegL()
 5468 %{
 5469   constraint(ALLOC_IN_RC(long_rcx_reg));
 5470   match(RegL);
 5471   match(rRegL);
 5472 
 5473   format %{ %}
 5474   interface(REG_INTER);
 5475 %}
 5476 
 5477 operand rdx_RegL()
 5478 %{
 5479   constraint(ALLOC_IN_RC(long_rdx_reg));
 5480   match(RegL);
 5481   match(rRegL);
 5482 
 5483   format %{ %}
 5484   interface(REG_INTER);
 5485 %}
 5486 
 5487 operand r11_RegL()
 5488 %{
 5489   constraint(ALLOC_IN_RC(long_r11_reg));
 5490   match(RegL);
 5491   match(rRegL);
 5492 
 5493   format %{ %}
 5494   interface(REG_INTER);
 5495 %}
 5496 
 5497 operand no_rbp_r13_RegL()
 5498 %{
 5499   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5500   match(RegL);
 5501   match(rRegL);
 5502   match(rax_RegL);
 5503   match(rcx_RegL);
 5504   match(rdx_RegL);
 5505 
 5506   format %{ %}
 5507   interface(REG_INTER);
 5508 %}
 5509 
 5510 // Flags register, used as output of compare instructions
 5511 operand rFlagsReg()
 5512 %{
 5513   constraint(ALLOC_IN_RC(int_flags));
 5514   match(RegFlags);
 5515 
 5516   format %{ "RFLAGS" %}
 5517   interface(REG_INTER);
 5518 %}
 5519 
 5520 // Flags register, used as output of FLOATING POINT compare instructions
 5521 operand rFlagsRegU()
 5522 %{
 5523   constraint(ALLOC_IN_RC(int_flags));
 5524   match(RegFlags);
 5525 
 5526   format %{ "RFLAGS_U" %}
 5527   interface(REG_INTER);
 5528 %}
 5529 
 5530 operand rFlagsRegUCF() %{
 5531   constraint(ALLOC_IN_RC(int_flags));
 5532   match(RegFlags);
 5533   predicate(false);
 5534 
 5535   format %{ "RFLAGS_U_CF" %}
 5536   interface(REG_INTER);
 5537 %}
 5538 
 5539 // Float register operands
 5540 operand regF() %{
 5541    constraint(ALLOC_IN_RC(float_reg));
 5542    match(RegF);
 5543 
 5544    format %{ %}
 5545    interface(REG_INTER);
 5546 %}
 5547 
 5548 // Float register operands
 5549 operand legRegF() %{
 5550    constraint(ALLOC_IN_RC(float_reg_legacy));
 5551    match(RegF);
 5552 
 5553    format %{ %}
 5554    interface(REG_INTER);
 5555 %}
 5556 
 5557 // Float register operands
 5558 operand vlRegF() %{
 5559    constraint(ALLOC_IN_RC(float_reg_vl));
 5560    match(RegF);
 5561 
 5562    format %{ %}
 5563    interface(REG_INTER);
 5564 %}
 5565 
 5566 // Double register operands
 5567 operand regD() %{
 5568    constraint(ALLOC_IN_RC(double_reg));
 5569    match(RegD);
 5570 
 5571    format %{ %}
 5572    interface(REG_INTER);
 5573 %}
 5574 
 5575 // Double register operands
 5576 operand legRegD() %{
 5577    constraint(ALLOC_IN_RC(double_reg_legacy));
 5578    match(RegD);
 5579 
 5580    format %{ %}
 5581    interface(REG_INTER);
 5582 %}
 5583 
 5584 // Double register operands
 5585 operand vlRegD() %{
 5586    constraint(ALLOC_IN_RC(double_reg_vl));
 5587    match(RegD);
 5588 
 5589    format %{ %}
 5590    interface(REG_INTER);
 5591 %}
 5592 
 5593 //----------Memory Operands----------------------------------------------------
 5594 // Direct Memory Operand
 5595 // operand direct(immP addr)
 5596 // %{
 5597 //   match(addr);
 5598 
 5599 //   format %{ "[$addr]" %}
 5600 //   interface(MEMORY_INTER) %{
 5601 //     base(0xFFFFFFFF);
 5602 //     index(0x4);
 5603 //     scale(0x0);
 5604 //     disp($addr);
 5605 //   %}
 5606 // %}
 5607 
 5608 // Indirect Memory Operand
 5609 operand indirect(any_RegP reg)
 5610 %{
 5611   constraint(ALLOC_IN_RC(ptr_reg));
 5612   match(reg);
 5613 
 5614   format %{ "[$reg]" %}
 5615   interface(MEMORY_INTER) %{
 5616     base($reg);
 5617     index(0x4);
 5618     scale(0x0);
 5619     disp(0x0);
 5620   %}
 5621 %}
 5622 
 5623 // Indirect Memory Plus Short Offset Operand
 5624 operand indOffset8(any_RegP reg, immL8 off)
 5625 %{
 5626   constraint(ALLOC_IN_RC(ptr_reg));
 5627   match(AddP reg off);
 5628 
 5629   format %{ "[$reg + $off (8-bit)]" %}
 5630   interface(MEMORY_INTER) %{
 5631     base($reg);
 5632     index(0x4);
 5633     scale(0x0);
 5634     disp($off);
 5635   %}
 5636 %}
 5637 
 5638 // Indirect Memory Plus Long Offset Operand
 5639 operand indOffset32(any_RegP reg, immL32 off)
 5640 %{
 5641   constraint(ALLOC_IN_RC(ptr_reg));
 5642   match(AddP reg off);
 5643 
 5644   format %{ "[$reg + $off (32-bit)]" %}
 5645   interface(MEMORY_INTER) %{
 5646     base($reg);
 5647     index(0x4);
 5648     scale(0x0);
 5649     disp($off);
 5650   %}
 5651 %}
 5652 
 5653 // Indirect Memory Plus Index Register Plus Offset Operand
 5654 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5655 %{
 5656   constraint(ALLOC_IN_RC(ptr_reg));
 5657   match(AddP (AddP reg lreg) off);
 5658 
 5659   op_cost(10);
 5660   format %{"[$reg + $off + $lreg]" %}
 5661   interface(MEMORY_INTER) %{
 5662     base($reg);
 5663     index($lreg);
 5664     scale(0x0);
 5665     disp($off);
 5666   %}
 5667 %}
 5668 
 5669 // Indirect Memory Plus Index Register Plus Offset Operand
 5670 operand indIndex(any_RegP reg, rRegL lreg)
 5671 %{
 5672   constraint(ALLOC_IN_RC(ptr_reg));
 5673   match(AddP reg lreg);
 5674 
 5675   op_cost(10);
 5676   format %{"[$reg + $lreg]" %}
 5677   interface(MEMORY_INTER) %{
 5678     base($reg);
 5679     index($lreg);
 5680     scale(0x0);
 5681     disp(0x0);
 5682   %}
 5683 %}
 5684 
 5685 // Indirect Memory Times Scale Plus Index Register
 5686 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5687 %{
 5688   constraint(ALLOC_IN_RC(ptr_reg));
 5689   match(AddP reg (LShiftL lreg scale));
 5690 
 5691   op_cost(10);
 5692   format %{"[$reg + $lreg << $scale]" %}
 5693   interface(MEMORY_INTER) %{
 5694     base($reg);
 5695     index($lreg);
 5696     scale($scale);
 5697     disp(0x0);
 5698   %}
 5699 %}
 5700 
 5701 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5702 %{
 5703   constraint(ALLOC_IN_RC(ptr_reg));
 5704   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5705   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5706 
 5707   op_cost(10);
 5708   format %{"[$reg + pos $idx << $scale]" %}
 5709   interface(MEMORY_INTER) %{
 5710     base($reg);
 5711     index($idx);
 5712     scale($scale);
 5713     disp(0x0);
 5714   %}
 5715 %}
 5716 
 5717 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5718 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5719 %{
 5720   constraint(ALLOC_IN_RC(ptr_reg));
 5721   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5722 
 5723   op_cost(10);
 5724   format %{"[$reg + $off + $lreg << $scale]" %}
 5725   interface(MEMORY_INTER) %{
 5726     base($reg);
 5727     index($lreg);
 5728     scale($scale);
 5729     disp($off);
 5730   %}
 5731 %}
 5732 
 5733 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5734 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5735 %{
 5736   constraint(ALLOC_IN_RC(ptr_reg));
 5737   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5738   match(AddP (AddP reg (ConvI2L idx)) off);
 5739 
 5740   op_cost(10);
 5741   format %{"[$reg + $off + $idx]" %}
 5742   interface(MEMORY_INTER) %{
 5743     base($reg);
 5744     index($idx);
 5745     scale(0x0);
 5746     disp($off);
 5747   %}
 5748 %}
 5749 
 5750 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5751 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5752 %{
 5753   constraint(ALLOC_IN_RC(ptr_reg));
 5754   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5755   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5756 
 5757   op_cost(10);
 5758   format %{"[$reg + $off + $idx << $scale]" %}
 5759   interface(MEMORY_INTER) %{
 5760     base($reg);
 5761     index($idx);
 5762     scale($scale);
 5763     disp($off);
 5764   %}
 5765 %}
 5766 
 5767 // Indirect Narrow Oop Operand
 5768 operand indCompressedOop(rRegN reg) %{
 5769   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5770   constraint(ALLOC_IN_RC(ptr_reg));
 5771   match(DecodeN reg);
 5772 
 5773   op_cost(10);
 5774   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5775   interface(MEMORY_INTER) %{
 5776     base(0xc); // R12
 5777     index($reg);
 5778     scale(0x3);
 5779     disp(0x0);
 5780   %}
 5781 %}
 5782 
 5783 // Indirect Narrow Oop Plus Offset Operand
 5784 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5785 // we can't free r12 even with CompressedOops::base() == nullptr.
 5786 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5787   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5788   constraint(ALLOC_IN_RC(ptr_reg));
 5789   match(AddP (DecodeN reg) off);
 5790 
 5791   op_cost(10);
 5792   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5793   interface(MEMORY_INTER) %{
 5794     base(0xc); // R12
 5795     index($reg);
 5796     scale(0x3);
 5797     disp($off);
 5798   %}
 5799 %}
 5800 
 5801 // Indirect Memory Operand
 5802 operand indirectNarrow(rRegN reg)
 5803 %{
 5804   predicate(CompressedOops::shift() == 0);
 5805   constraint(ALLOC_IN_RC(ptr_reg));
 5806   match(DecodeN reg);
 5807 
 5808   format %{ "[$reg]" %}
 5809   interface(MEMORY_INTER) %{
 5810     base($reg);
 5811     index(0x4);
 5812     scale(0x0);
 5813     disp(0x0);
 5814   %}
 5815 %}
 5816 
 5817 // Indirect Memory Plus Short Offset Operand
 5818 operand indOffset8Narrow(rRegN reg, immL8 off)
 5819 %{
 5820   predicate(CompressedOops::shift() == 0);
 5821   constraint(ALLOC_IN_RC(ptr_reg));
 5822   match(AddP (DecodeN reg) off);
 5823 
 5824   format %{ "[$reg + $off (8-bit)]" %}
 5825   interface(MEMORY_INTER) %{
 5826     base($reg);
 5827     index(0x4);
 5828     scale(0x0);
 5829     disp($off);
 5830   %}
 5831 %}
 5832 
 5833 // Indirect Memory Plus Long Offset Operand
 5834 operand indOffset32Narrow(rRegN reg, immL32 off)
 5835 %{
 5836   predicate(CompressedOops::shift() == 0);
 5837   constraint(ALLOC_IN_RC(ptr_reg));
 5838   match(AddP (DecodeN reg) off);
 5839 
 5840   format %{ "[$reg + $off (32-bit)]" %}
 5841   interface(MEMORY_INTER) %{
 5842     base($reg);
 5843     index(0x4);
 5844     scale(0x0);
 5845     disp($off);
 5846   %}
 5847 %}
 5848 
 5849 // Indirect Memory Plus Index Register Plus Offset Operand
 5850 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5851 %{
 5852   predicate(CompressedOops::shift() == 0);
 5853   constraint(ALLOC_IN_RC(ptr_reg));
 5854   match(AddP (AddP (DecodeN reg) lreg) off);
 5855 
 5856   op_cost(10);
 5857   format %{"[$reg + $off + $lreg]" %}
 5858   interface(MEMORY_INTER) %{
 5859     base($reg);
 5860     index($lreg);
 5861     scale(0x0);
 5862     disp($off);
 5863   %}
 5864 %}
 5865 
 5866 // Indirect Memory Plus Index Register Plus Offset Operand
 5867 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5868 %{
 5869   predicate(CompressedOops::shift() == 0);
 5870   constraint(ALLOC_IN_RC(ptr_reg));
 5871   match(AddP (DecodeN reg) lreg);
 5872 
 5873   op_cost(10);
 5874   format %{"[$reg + $lreg]" %}
 5875   interface(MEMORY_INTER) %{
 5876     base($reg);
 5877     index($lreg);
 5878     scale(0x0);
 5879     disp(0x0);
 5880   %}
 5881 %}
 5882 
 5883 // Indirect Memory Times Scale Plus Index Register
 5884 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5885 %{
 5886   predicate(CompressedOops::shift() == 0);
 5887   constraint(ALLOC_IN_RC(ptr_reg));
 5888   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5889 
 5890   op_cost(10);
 5891   format %{"[$reg + $lreg << $scale]" %}
 5892   interface(MEMORY_INTER) %{
 5893     base($reg);
 5894     index($lreg);
 5895     scale($scale);
 5896     disp(0x0);
 5897   %}
 5898 %}
 5899 
 5900 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5901 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5902 %{
 5903   predicate(CompressedOops::shift() == 0);
 5904   constraint(ALLOC_IN_RC(ptr_reg));
 5905   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5906 
 5907   op_cost(10);
 5908   format %{"[$reg + $off + $lreg << $scale]" %}
 5909   interface(MEMORY_INTER) %{
 5910     base($reg);
 5911     index($lreg);
 5912     scale($scale);
 5913     disp($off);
 5914   %}
 5915 %}
 5916 
 5917 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5918 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5919 %{
 5920   constraint(ALLOC_IN_RC(ptr_reg));
 5921   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5922   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5923 
 5924   op_cost(10);
 5925   format %{"[$reg + $off + $idx]" %}
 5926   interface(MEMORY_INTER) %{
 5927     base($reg);
 5928     index($idx);
 5929     scale(0x0);
 5930     disp($off);
 5931   %}
 5932 %}
 5933 
 5934 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5935 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5936 %{
 5937   constraint(ALLOC_IN_RC(ptr_reg));
 5938   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5939   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5940 
 5941   op_cost(10);
 5942   format %{"[$reg + $off + $idx << $scale]" %}
 5943   interface(MEMORY_INTER) %{
 5944     base($reg);
 5945     index($idx);
 5946     scale($scale);
 5947     disp($off);
 5948   %}
 5949 %}
 5950 
 5951 //----------Special Memory Operands--------------------------------------------
 5952 // Stack Slot Operand - This operand is used for loading and storing temporary
 5953 //                      values on the stack where a match requires a value to
 5954 //                      flow through memory.
 5955 operand stackSlotP(sRegP reg)
 5956 %{
 5957   constraint(ALLOC_IN_RC(stack_slots));
 5958   // No match rule because this operand is only generated in matching
 5959 
 5960   format %{ "[$reg]" %}
 5961   interface(MEMORY_INTER) %{
 5962     base(0x4);   // RSP
 5963     index(0x4);  // No Index
 5964     scale(0x0);  // No Scale
 5965     disp($reg);  // Stack Offset
 5966   %}
 5967 %}
 5968 
 5969 operand stackSlotI(sRegI reg)
 5970 %{
 5971   constraint(ALLOC_IN_RC(stack_slots));
 5972   // No match rule because this operand is only generated in matching
 5973 
 5974   format %{ "[$reg]" %}
 5975   interface(MEMORY_INTER) %{
 5976     base(0x4);   // RSP
 5977     index(0x4);  // No Index
 5978     scale(0x0);  // No Scale
 5979     disp($reg);  // Stack Offset
 5980   %}
 5981 %}
 5982 
 5983 operand stackSlotF(sRegF reg)
 5984 %{
 5985   constraint(ALLOC_IN_RC(stack_slots));
 5986   // No match rule because this operand is only generated in matching
 5987 
 5988   format %{ "[$reg]" %}
 5989   interface(MEMORY_INTER) %{
 5990     base(0x4);   // RSP
 5991     index(0x4);  // No Index
 5992     scale(0x0);  // No Scale
 5993     disp($reg);  // Stack Offset
 5994   %}
 5995 %}
 5996 
 5997 operand stackSlotD(sRegD reg)
 5998 %{
 5999   constraint(ALLOC_IN_RC(stack_slots));
 6000   // No match rule because this operand is only generated in matching
 6001 
 6002   format %{ "[$reg]" %}
 6003   interface(MEMORY_INTER) %{
 6004     base(0x4);   // RSP
 6005     index(0x4);  // No Index
 6006     scale(0x0);  // No Scale
 6007     disp($reg);  // Stack Offset
 6008   %}
 6009 %}
 6010 operand stackSlotL(sRegL reg)
 6011 %{
 6012   constraint(ALLOC_IN_RC(stack_slots));
 6013   // No match rule because this operand is only generated in matching
 6014 
 6015   format %{ "[$reg]" %}
 6016   interface(MEMORY_INTER) %{
 6017     base(0x4);   // RSP
 6018     index(0x4);  // No Index
 6019     scale(0x0);  // No Scale
 6020     disp($reg);  // Stack Offset
 6021   %}
 6022 %}
 6023 
 6024 //----------Conditional Branch Operands----------------------------------------
 6025 // Comparison Op  - This is the operation of the comparison, and is limited to
 6026 //                  the following set of codes:
 6027 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6028 //
 6029 // Other attributes of the comparison, such as unsignedness, are specified
 6030 // by the comparison instruction that sets a condition code flags register.
 6031 // That result is represented by a flags operand whose subtype is appropriate
 6032 // to the unsignedness (etc.) of the comparison.
 6033 //
 6034 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6035 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6036 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6037 
 6038 // Comparison Code
 6039 operand cmpOp()
 6040 %{
 6041   match(Bool);
 6042 
 6043   format %{ "" %}
 6044   interface(COND_INTER) %{
 6045     equal(0x4, "e");
 6046     not_equal(0x5, "ne");
 6047     less(0xC, "l");
 6048     greater_equal(0xD, "ge");
 6049     less_equal(0xE, "le");
 6050     greater(0xF, "g");
 6051     overflow(0x0, "o");
 6052     no_overflow(0x1, "no");
 6053   %}
 6054 %}
 6055 
 6056 // Comparison Code, unsigned compare.  Used by FP also, with
 6057 // C2 (unordered) turned into GT or LT already.  The other bits
 6058 // C0 and C3 are turned into Carry & Zero flags.
 6059 operand cmpOpU()
 6060 %{
 6061   match(Bool);
 6062 
 6063   format %{ "" %}
 6064   interface(COND_INTER) %{
 6065     equal(0x4, "e");
 6066     not_equal(0x5, "ne");
 6067     less(0x2, "b");
 6068     greater_equal(0x3, "ae");
 6069     less_equal(0x6, "be");
 6070     greater(0x7, "a");
 6071     overflow(0x0, "o");
 6072     no_overflow(0x1, "no");
 6073   %}
 6074 %}
 6075 
 6076 
 6077 // Floating comparisons that don't require any fixup for the unordered case,
 6078 // If both inputs of the comparison are the same, ZF is always set so we
 6079 // don't need to use cmpOpUCF2 for eq/ne
 6080 operand cmpOpUCF() %{
 6081   match(Bool);
 6082   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6083             n->as_Bool()->_test._test == BoolTest::ge ||
 6084             n->as_Bool()->_test._test == BoolTest::le ||
 6085             n->as_Bool()->_test._test == BoolTest::gt ||
 6086             n->in(1)->in(1) == n->in(1)->in(2));
 6087   format %{ "" %}
 6088   interface(COND_INTER) %{
 6089     equal(0xb, "np");
 6090     not_equal(0xa, "p");
 6091     less(0x2, "b");
 6092     greater_equal(0x3, "ae");
 6093     less_equal(0x6, "be");
 6094     greater(0x7, "a");
 6095     overflow(0x0, "o");
 6096     no_overflow(0x1, "no");
 6097   %}
 6098 %}
 6099 
 6100 
 6101 // Floating comparisons that can be fixed up with extra conditional jumps
 6102 operand cmpOpUCF2() %{
 6103   match(Bool);
 6104   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6105              n->as_Bool()->_test._test == BoolTest::eq) &&
 6106             n->in(1)->in(1) != n->in(1)->in(2));
 6107   format %{ "" %}
 6108   interface(COND_INTER) %{
 6109     equal(0x4, "e");
 6110     not_equal(0x5, "ne");
 6111     less(0x2, "b");
 6112     greater_equal(0x3, "ae");
 6113     less_equal(0x6, "be");
 6114     greater(0x7, "a");
 6115     overflow(0x0, "o");
 6116     no_overflow(0x1, "no");
 6117   %}
 6118 %}
 6119 
 6120 // Operands for bound floating pointer register arguments
 6121 operand rxmm0() %{
 6122   constraint(ALLOC_IN_RC(xmm0_reg));
 6123   match(VecX);
 6124   format%{%}
 6125   interface(REG_INTER);
 6126 %}
 6127 
 6128 // Vectors
 6129 
 6130 // Dummy generic vector class. Should be used for all vector operands.
 6131 // Replaced with vec[SDXYZ] during post-selection pass.
 6132 operand vec() %{
 6133   constraint(ALLOC_IN_RC(dynamic));
 6134   match(VecX);
 6135   match(VecY);
 6136   match(VecZ);
 6137   match(VecS);
 6138   match(VecD);
 6139 
 6140   format %{ %}
 6141   interface(REG_INTER);
 6142 %}
 6143 
 6144 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6145 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6146 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6147 // runtime code generation via reg_class_dynamic.
 6148 operand legVec() %{
 6149   constraint(ALLOC_IN_RC(dynamic));
 6150   match(VecX);
 6151   match(VecY);
 6152   match(VecZ);
 6153   match(VecS);
 6154   match(VecD);
 6155 
 6156   format %{ %}
 6157   interface(REG_INTER);
 6158 %}
 6159 
 6160 // Replaces vec during post-selection cleanup. See above.
 6161 operand vecS() %{
 6162   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6163   match(VecS);
 6164 
 6165   format %{ %}
 6166   interface(REG_INTER);
 6167 %}
 6168 
 6169 // Replaces legVec during post-selection cleanup. See above.
 6170 operand legVecS() %{
 6171   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6172   match(VecS);
 6173 
 6174   format %{ %}
 6175   interface(REG_INTER);
 6176 %}
 6177 
 6178 // Replaces vec during post-selection cleanup. See above.
 6179 operand vecD() %{
 6180   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6181   match(VecD);
 6182 
 6183   format %{ %}
 6184   interface(REG_INTER);
 6185 %}
 6186 
 6187 // Replaces legVec during post-selection cleanup. See above.
 6188 operand legVecD() %{
 6189   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6190   match(VecD);
 6191 
 6192   format %{ %}
 6193   interface(REG_INTER);
 6194 %}
 6195 
 6196 // Replaces vec during post-selection cleanup. See above.
 6197 operand vecX() %{
 6198   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6199   match(VecX);
 6200 
 6201   format %{ %}
 6202   interface(REG_INTER);
 6203 %}
 6204 
 6205 // Replaces legVec during post-selection cleanup. See above.
 6206 operand legVecX() %{
 6207   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6208   match(VecX);
 6209 
 6210   format %{ %}
 6211   interface(REG_INTER);
 6212 %}
 6213 
 6214 // Replaces vec during post-selection cleanup. See above.
 6215 operand vecY() %{
 6216   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6217   match(VecY);
 6218 
 6219   format %{ %}
 6220   interface(REG_INTER);
 6221 %}
 6222 
 6223 // Replaces legVec during post-selection cleanup. See above.
 6224 operand legVecY() %{
 6225   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6226   match(VecY);
 6227 
 6228   format %{ %}
 6229   interface(REG_INTER);
 6230 %}
 6231 
 6232 // Replaces vec during post-selection cleanup. See above.
 6233 operand vecZ() %{
 6234   constraint(ALLOC_IN_RC(vectorz_reg));
 6235   match(VecZ);
 6236 
 6237   format %{ %}
 6238   interface(REG_INTER);
 6239 %}
 6240 
 6241 // Replaces legVec during post-selection cleanup. See above.
 6242 operand legVecZ() %{
 6243   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6244   match(VecZ);
 6245 
 6246   format %{ %}
 6247   interface(REG_INTER);
 6248 %}
 6249 
 6250 //----------OPERAND CLASSES----------------------------------------------------
 6251 // Operand Classes are groups of operands that are used as to simplify
 6252 // instruction definitions by not requiring the AD writer to specify separate
 6253 // instructions for every form of operand when the instruction accepts
 6254 // multiple operand types with the same basic encoding and format.  The classic
 6255 // case of this is memory operands.
 6256 
 6257 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6258                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6259                indCompressedOop, indCompressedOopOffset,
 6260                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6261                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6262                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6263 
 6264 //----------PIPELINE-----------------------------------------------------------
 6265 // Rules which define the behavior of the target architectures pipeline.
 6266 pipeline %{
 6267 
 6268 //----------ATTRIBUTES---------------------------------------------------------
 6269 attributes %{
 6270   variable_size_instructions;        // Fixed size instructions
 6271   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6272   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6273   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6274   instruction_fetch_units = 1;       // of 16 bytes
 6275 %}
 6276 
 6277 //----------RESOURCES----------------------------------------------------------
 6278 // Resources are the functional units available to the machine
 6279 
 6280 // Generic P2/P3 pipeline
 6281 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6282 // 3 instructions decoded per cycle.
 6283 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6284 // 3 ALU op, only ALU0 handles mul instructions.
 6285 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6286            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6287            BR, FPU,
 6288            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6289 
 6290 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6291 // Pipeline Description specifies the stages in the machine's pipeline
 6292 
 6293 // Generic P2/P3 pipeline
 6294 pipe_desc(S0, S1, S2, S3, S4, S5);
 6295 
 6296 //----------PIPELINE CLASSES---------------------------------------------------
 6297 // Pipeline Classes describe the stages in which input and output are
 6298 // referenced by the hardware pipeline.
 6299 
 6300 // Naming convention: ialu or fpu
 6301 // Then: _reg
 6302 // Then: _reg if there is a 2nd register
 6303 // Then: _long if it's a pair of instructions implementing a long
 6304 // Then: _fat if it requires the big decoder
 6305 //   Or: _mem if it requires the big decoder and a memory unit.
 6306 
 6307 // Integer ALU reg operation
 6308 pipe_class ialu_reg(rRegI dst)
 6309 %{
 6310     single_instruction;
 6311     dst    : S4(write);
 6312     dst    : S3(read);
 6313     DECODE : S0;        // any decoder
 6314     ALU    : S3;        // any alu
 6315 %}
 6316 
 6317 // Long ALU reg operation
 6318 pipe_class ialu_reg_long(rRegL dst)
 6319 %{
 6320     instruction_count(2);
 6321     dst    : S4(write);
 6322     dst    : S3(read);
 6323     DECODE : S0(2);     // any 2 decoders
 6324     ALU    : S3(2);     // both alus
 6325 %}
 6326 
 6327 // Integer ALU reg operation using big decoder
 6328 pipe_class ialu_reg_fat(rRegI dst)
 6329 %{
 6330     single_instruction;
 6331     dst    : S4(write);
 6332     dst    : S3(read);
 6333     D0     : S0;        // big decoder only
 6334     ALU    : S3;        // any alu
 6335 %}
 6336 
 6337 // Integer ALU reg-reg operation
 6338 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6339 %{
 6340     single_instruction;
 6341     dst    : S4(write);
 6342     src    : S3(read);
 6343     DECODE : S0;        // any decoder
 6344     ALU    : S3;        // any alu
 6345 %}
 6346 
 6347 // Integer ALU reg-reg operation
 6348 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6349 %{
 6350     single_instruction;
 6351     dst    : S4(write);
 6352     src    : S3(read);
 6353     D0     : S0;        // big decoder only
 6354     ALU    : S3;        // any alu
 6355 %}
 6356 
 6357 // Integer ALU reg-mem operation
 6358 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6359 %{
 6360     single_instruction;
 6361     dst    : S5(write);
 6362     mem    : S3(read);
 6363     D0     : S0;        // big decoder only
 6364     ALU    : S4;        // any alu
 6365     MEM    : S3;        // any mem
 6366 %}
 6367 
 6368 // Integer mem operation (prefetch)
 6369 pipe_class ialu_mem(memory mem)
 6370 %{
 6371     single_instruction;
 6372     mem    : S3(read);
 6373     D0     : S0;        // big decoder only
 6374     MEM    : S3;        // any mem
 6375 %}
 6376 
 6377 // Integer Store to Memory
 6378 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6379 %{
 6380     single_instruction;
 6381     mem    : S3(read);
 6382     src    : S5(read);
 6383     D0     : S0;        // big decoder only
 6384     ALU    : S4;        // any alu
 6385     MEM    : S3;
 6386 %}
 6387 
 6388 // // Long Store to Memory
 6389 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6390 // %{
 6391 //     instruction_count(2);
 6392 //     mem    : S3(read);
 6393 //     src    : S5(read);
 6394 //     D0     : S0(2);          // big decoder only; twice
 6395 //     ALU    : S4(2);     // any 2 alus
 6396 //     MEM    : S3(2);  // Both mems
 6397 // %}
 6398 
 6399 // Integer Store to Memory
 6400 pipe_class ialu_mem_imm(memory mem)
 6401 %{
 6402     single_instruction;
 6403     mem    : S3(read);
 6404     D0     : S0;        // big decoder only
 6405     ALU    : S4;        // any alu
 6406     MEM    : S3;
 6407 %}
 6408 
 6409 // Integer ALU0 reg-reg operation
 6410 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6411 %{
 6412     single_instruction;
 6413     dst    : S4(write);
 6414     src    : S3(read);
 6415     D0     : S0;        // Big decoder only
 6416     ALU0   : S3;        // only alu0
 6417 %}
 6418 
 6419 // Integer ALU0 reg-mem operation
 6420 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6421 %{
 6422     single_instruction;
 6423     dst    : S5(write);
 6424     mem    : S3(read);
 6425     D0     : S0;        // big decoder only
 6426     ALU0   : S4;        // ALU0 only
 6427     MEM    : S3;        // any mem
 6428 %}
 6429 
 6430 // Integer ALU reg-reg operation
 6431 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6432 %{
 6433     single_instruction;
 6434     cr     : S4(write);
 6435     src1   : S3(read);
 6436     src2   : S3(read);
 6437     DECODE : S0;        // any decoder
 6438     ALU    : S3;        // any alu
 6439 %}
 6440 
 6441 // Integer ALU reg-imm operation
 6442 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6443 %{
 6444     single_instruction;
 6445     cr     : S4(write);
 6446     src1   : S3(read);
 6447     DECODE : S0;        // any decoder
 6448     ALU    : S3;        // any alu
 6449 %}
 6450 
 6451 // Integer ALU reg-mem operation
 6452 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6453 %{
 6454     single_instruction;
 6455     cr     : S4(write);
 6456     src1   : S3(read);
 6457     src2   : S3(read);
 6458     D0     : S0;        // big decoder only
 6459     ALU    : S4;        // any alu
 6460     MEM    : S3;
 6461 %}
 6462 
 6463 // Conditional move reg-reg
 6464 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6465 %{
 6466     instruction_count(4);
 6467     y      : S4(read);
 6468     q      : S3(read);
 6469     p      : S3(read);
 6470     DECODE : S0(4);     // any decoder
 6471 %}
 6472 
 6473 // Conditional move reg-reg
 6474 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6475 %{
 6476     single_instruction;
 6477     dst    : S4(write);
 6478     src    : S3(read);
 6479     cr     : S3(read);
 6480     DECODE : S0;        // any decoder
 6481 %}
 6482 
 6483 // Conditional move reg-mem
 6484 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6485 %{
 6486     single_instruction;
 6487     dst    : S4(write);
 6488     src    : S3(read);
 6489     cr     : S3(read);
 6490     DECODE : S0;        // any decoder
 6491     MEM    : S3;
 6492 %}
 6493 
 6494 // Conditional move reg-reg long
 6495 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6496 %{
 6497     single_instruction;
 6498     dst    : S4(write);
 6499     src    : S3(read);
 6500     cr     : S3(read);
 6501     DECODE : S0(2);     // any 2 decoders
 6502 %}
 6503 
 6504 // Float reg-reg operation
 6505 pipe_class fpu_reg(regD dst)
 6506 %{
 6507     instruction_count(2);
 6508     dst    : S3(read);
 6509     DECODE : S0(2);     // any 2 decoders
 6510     FPU    : S3;
 6511 %}
 6512 
 6513 // Float reg-reg operation
 6514 pipe_class fpu_reg_reg(regD dst, regD src)
 6515 %{
 6516     instruction_count(2);
 6517     dst    : S4(write);
 6518     src    : S3(read);
 6519     DECODE : S0(2);     // any 2 decoders
 6520     FPU    : S3;
 6521 %}
 6522 
 6523 // Float reg-reg operation
 6524 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6525 %{
 6526     instruction_count(3);
 6527     dst    : S4(write);
 6528     src1   : S3(read);
 6529     src2   : S3(read);
 6530     DECODE : S0(3);     // any 3 decoders
 6531     FPU    : S3(2);
 6532 %}
 6533 
 6534 // Float reg-reg operation
 6535 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6536 %{
 6537     instruction_count(4);
 6538     dst    : S4(write);
 6539     src1   : S3(read);
 6540     src2   : S3(read);
 6541     src3   : S3(read);
 6542     DECODE : S0(4);     // any 3 decoders
 6543     FPU    : S3(2);
 6544 %}
 6545 
 6546 // Float reg-reg operation
 6547 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6548 %{
 6549     instruction_count(4);
 6550     dst    : S4(write);
 6551     src1   : S3(read);
 6552     src2   : S3(read);
 6553     src3   : S3(read);
 6554     DECODE : S1(3);     // any 3 decoders
 6555     D0     : S0;        // Big decoder only
 6556     FPU    : S3(2);
 6557     MEM    : S3;
 6558 %}
 6559 
 6560 // Float reg-mem operation
 6561 pipe_class fpu_reg_mem(regD dst, memory mem)
 6562 %{
 6563     instruction_count(2);
 6564     dst    : S5(write);
 6565     mem    : S3(read);
 6566     D0     : S0;        // big decoder only
 6567     DECODE : S1;        // any decoder for FPU POP
 6568     FPU    : S4;
 6569     MEM    : S3;        // any mem
 6570 %}
 6571 
 6572 // Float reg-mem operation
 6573 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6574 %{
 6575     instruction_count(3);
 6576     dst    : S5(write);
 6577     src1   : S3(read);
 6578     mem    : S3(read);
 6579     D0     : S0;        // big decoder only
 6580     DECODE : S1(2);     // any decoder for FPU POP
 6581     FPU    : S4;
 6582     MEM    : S3;        // any mem
 6583 %}
 6584 
 6585 // Float mem-reg operation
 6586 pipe_class fpu_mem_reg(memory mem, regD src)
 6587 %{
 6588     instruction_count(2);
 6589     src    : S5(read);
 6590     mem    : S3(read);
 6591     DECODE : S0;        // any decoder for FPU PUSH
 6592     D0     : S1;        // big decoder only
 6593     FPU    : S4;
 6594     MEM    : S3;        // any mem
 6595 %}
 6596 
 6597 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6598 %{
 6599     instruction_count(3);
 6600     src1   : S3(read);
 6601     src2   : S3(read);
 6602     mem    : S3(read);
 6603     DECODE : S0(2);     // any decoder for FPU PUSH
 6604     D0     : S1;        // big decoder only
 6605     FPU    : S4;
 6606     MEM    : S3;        // any mem
 6607 %}
 6608 
 6609 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6610 %{
 6611     instruction_count(3);
 6612     src1   : S3(read);
 6613     src2   : S3(read);
 6614     mem    : S4(read);
 6615     DECODE : S0;        // any decoder for FPU PUSH
 6616     D0     : S0(2);     // big decoder only
 6617     FPU    : S4;
 6618     MEM    : S3(2);     // any mem
 6619 %}
 6620 
 6621 pipe_class fpu_mem_mem(memory dst, memory src1)
 6622 %{
 6623     instruction_count(2);
 6624     src1   : S3(read);
 6625     dst    : S4(read);
 6626     D0     : S0(2);     // big decoder only
 6627     MEM    : S3(2);     // any mem
 6628 %}
 6629 
 6630 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6631 %{
 6632     instruction_count(3);
 6633     src1   : S3(read);
 6634     src2   : S3(read);
 6635     dst    : S4(read);
 6636     D0     : S0(3);     // big decoder only
 6637     FPU    : S4;
 6638     MEM    : S3(3);     // any mem
 6639 %}
 6640 
 6641 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6642 %{
 6643     instruction_count(3);
 6644     src1   : S4(read);
 6645     mem    : S4(read);
 6646     DECODE : S0;        // any decoder for FPU PUSH
 6647     D0     : S0(2);     // big decoder only
 6648     FPU    : S4;
 6649     MEM    : S3(2);     // any mem
 6650 %}
 6651 
 6652 // Float load constant
 6653 pipe_class fpu_reg_con(regD dst)
 6654 %{
 6655     instruction_count(2);
 6656     dst    : S5(write);
 6657     D0     : S0;        // big decoder only for the load
 6658     DECODE : S1;        // any decoder for FPU POP
 6659     FPU    : S4;
 6660     MEM    : S3;        // any mem
 6661 %}
 6662 
 6663 // Float load constant
 6664 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6665 %{
 6666     instruction_count(3);
 6667     dst    : S5(write);
 6668     src    : S3(read);
 6669     D0     : S0;        // big decoder only for the load
 6670     DECODE : S1(2);     // any decoder for FPU POP
 6671     FPU    : S4;
 6672     MEM    : S3;        // any mem
 6673 %}
 6674 
 6675 // UnConditional branch
 6676 pipe_class pipe_jmp(label labl)
 6677 %{
 6678     single_instruction;
 6679     BR   : S3;
 6680 %}
 6681 
 6682 // Conditional branch
 6683 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6684 %{
 6685     single_instruction;
 6686     cr    : S1(read);
 6687     BR    : S3;
 6688 %}
 6689 
 6690 // Allocation idiom
 6691 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6692 %{
 6693     instruction_count(1); force_serialization;
 6694     fixed_latency(6);
 6695     heap_ptr : S3(read);
 6696     DECODE   : S0(3);
 6697     D0       : S2;
 6698     MEM      : S3;
 6699     ALU      : S3(2);
 6700     dst      : S5(write);
 6701     BR       : S5;
 6702 %}
 6703 
 6704 // Generic big/slow expanded idiom
 6705 pipe_class pipe_slow()
 6706 %{
 6707     instruction_count(10); multiple_bundles; force_serialization;
 6708     fixed_latency(100);
 6709     D0  : S0(2);
 6710     MEM : S3(2);
 6711 %}
 6712 
 6713 // The real do-nothing guy
 6714 pipe_class empty()
 6715 %{
 6716     instruction_count(0);
 6717 %}
 6718 
 6719 // Define the class for the Nop node
 6720 define
 6721 %{
 6722    MachNop = empty;
 6723 %}
 6724 
 6725 %}
 6726 
 6727 //----------INSTRUCTIONS-------------------------------------------------------
 6728 //
 6729 // match      -- States which machine-independent subtree may be replaced
 6730 //               by this instruction.
 6731 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6732 //               selection to identify a minimum cost tree of machine
 6733 //               instructions that matches a tree of machine-independent
 6734 //               instructions.
 6735 // format     -- A string providing the disassembly for this instruction.
 6736 //               The value of an instruction's operand may be inserted
 6737 //               by referring to it with a '$' prefix.
 6738 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6739 //               to within an encode class as $primary, $secondary, and $tertiary
 6740 //               rrspectively.  The primary opcode is commonly used to
 6741 //               indicate the type of machine instruction, while secondary
 6742 //               and tertiary are often used for prefix options or addressing
 6743 //               modes.
 6744 // ins_encode -- A list of encode classes with parameters. The encode class
 6745 //               name must have been defined in an 'enc_class' specification
 6746 //               in the encode section of the architecture description.
 6747 
 6748 // ============================================================================
 6749 
 6750 instruct ShouldNotReachHere() %{
 6751   match(Halt);
 6752   format %{ "stop\t# ShouldNotReachHere" %}
 6753   ins_encode %{
 6754     if (is_reachable()) {
 6755       const char* str = __ code_string(_halt_reason);
 6756       __ stop(str);
 6757     }
 6758   %}
 6759   ins_pipe(pipe_slow);
 6760 %}
 6761 
 6762 // ============================================================================
 6763 
 6764 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6765 // Load Float
 6766 instruct MoveF2VL(vlRegF dst, regF src) %{
 6767   match(Set dst src);
 6768   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6769   ins_encode %{
 6770     ShouldNotReachHere();
 6771   %}
 6772   ins_pipe( fpu_reg_reg );
 6773 %}
 6774 
 6775 // Load Float
 6776 instruct MoveF2LEG(legRegF dst, regF src) %{
 6777   match(Set dst src);
 6778   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6779   ins_encode %{
 6780     ShouldNotReachHere();
 6781   %}
 6782   ins_pipe( fpu_reg_reg );
 6783 %}
 6784 
 6785 // Load Float
 6786 instruct MoveVL2F(regF dst, vlRegF src) %{
 6787   match(Set dst src);
 6788   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6789   ins_encode %{
 6790     ShouldNotReachHere();
 6791   %}
 6792   ins_pipe( fpu_reg_reg );
 6793 %}
 6794 
 6795 // Load Float
 6796 instruct MoveLEG2F(regF dst, legRegF src) %{
 6797   match(Set dst src);
 6798   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6799   ins_encode %{
 6800     ShouldNotReachHere();
 6801   %}
 6802   ins_pipe( fpu_reg_reg );
 6803 %}
 6804 
 6805 // Load Double
 6806 instruct MoveD2VL(vlRegD dst, regD src) %{
 6807   match(Set dst src);
 6808   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6809   ins_encode %{
 6810     ShouldNotReachHere();
 6811   %}
 6812   ins_pipe( fpu_reg_reg );
 6813 %}
 6814 
 6815 // Load Double
 6816 instruct MoveD2LEG(legRegD dst, regD src) %{
 6817   match(Set dst src);
 6818   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6819   ins_encode %{
 6820     ShouldNotReachHere();
 6821   %}
 6822   ins_pipe( fpu_reg_reg );
 6823 %}
 6824 
 6825 // Load Double
 6826 instruct MoveVL2D(regD dst, vlRegD src) %{
 6827   match(Set dst src);
 6828   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6829   ins_encode %{
 6830     ShouldNotReachHere();
 6831   %}
 6832   ins_pipe( fpu_reg_reg );
 6833 %}
 6834 
 6835 // Load Double
 6836 instruct MoveLEG2D(regD dst, legRegD src) %{
 6837   match(Set dst src);
 6838   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6839   ins_encode %{
 6840     ShouldNotReachHere();
 6841   %}
 6842   ins_pipe( fpu_reg_reg );
 6843 %}
 6844 
 6845 //----------Load/Store/Move Instructions---------------------------------------
 6846 //----------Load Instructions--------------------------------------------------
 6847 
 6848 // Load Byte (8 bit signed)
 6849 instruct loadB(rRegI dst, memory mem)
 6850 %{
 6851   match(Set dst (LoadB mem));
 6852 
 6853   ins_cost(125);
 6854   format %{ "movsbl  $dst, $mem\t# byte" %}
 6855 
 6856   ins_encode %{
 6857     __ movsbl($dst$$Register, $mem$$Address);
 6858   %}
 6859 
 6860   ins_pipe(ialu_reg_mem);
 6861 %}
 6862 
 6863 // Load Byte (8 bit signed) into Long Register
 6864 instruct loadB2L(rRegL dst, memory mem)
 6865 %{
 6866   match(Set dst (ConvI2L (LoadB mem)));
 6867 
 6868   ins_cost(125);
 6869   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6870 
 6871   ins_encode %{
 6872     __ movsbq($dst$$Register, $mem$$Address);
 6873   %}
 6874 
 6875   ins_pipe(ialu_reg_mem);
 6876 %}
 6877 
 6878 // Load Unsigned Byte (8 bit UNsigned)
 6879 instruct loadUB(rRegI dst, memory mem)
 6880 %{
 6881   match(Set dst (LoadUB mem));
 6882 
 6883   ins_cost(125);
 6884   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6885 
 6886   ins_encode %{
 6887     __ movzbl($dst$$Register, $mem$$Address);
 6888   %}
 6889 
 6890   ins_pipe(ialu_reg_mem);
 6891 %}
 6892 
 6893 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6894 instruct loadUB2L(rRegL dst, memory mem)
 6895 %{
 6896   match(Set dst (ConvI2L (LoadUB mem)));
 6897 
 6898   ins_cost(125);
 6899   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6900 
 6901   ins_encode %{
 6902     __ movzbq($dst$$Register, $mem$$Address);
 6903   %}
 6904 
 6905   ins_pipe(ialu_reg_mem);
 6906 %}
 6907 
 6908 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6909 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6910   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6911   effect(KILL cr);
 6912 
 6913   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6914             "andl    $dst, right_n_bits($mask, 8)" %}
 6915   ins_encode %{
 6916     Register Rdst = $dst$$Register;
 6917     __ movzbq(Rdst, $mem$$Address);
 6918     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6919   %}
 6920   ins_pipe(ialu_reg_mem);
 6921 %}
 6922 
 6923 // Load Short (16 bit signed)
 6924 instruct loadS(rRegI dst, memory mem)
 6925 %{
 6926   match(Set dst (LoadS mem));
 6927 
 6928   ins_cost(125);
 6929   format %{ "movswl $dst, $mem\t# short" %}
 6930 
 6931   ins_encode %{
 6932     __ movswl($dst$$Register, $mem$$Address);
 6933   %}
 6934 
 6935   ins_pipe(ialu_reg_mem);
 6936 %}
 6937 
 6938 // Load Short (16 bit signed) to Byte (8 bit signed)
 6939 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6940   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6941 
 6942   ins_cost(125);
 6943   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6944   ins_encode %{
 6945     __ movsbl($dst$$Register, $mem$$Address);
 6946   %}
 6947   ins_pipe(ialu_reg_mem);
 6948 %}
 6949 
 6950 // Load Short (16 bit signed) into Long Register
 6951 instruct loadS2L(rRegL dst, memory mem)
 6952 %{
 6953   match(Set dst (ConvI2L (LoadS mem)));
 6954 
 6955   ins_cost(125);
 6956   format %{ "movswq $dst, $mem\t# short -> long" %}
 6957 
 6958   ins_encode %{
 6959     __ movswq($dst$$Register, $mem$$Address);
 6960   %}
 6961 
 6962   ins_pipe(ialu_reg_mem);
 6963 %}
 6964 
 6965 // Load Unsigned Short/Char (16 bit UNsigned)
 6966 instruct loadUS(rRegI dst, memory mem)
 6967 %{
 6968   match(Set dst (LoadUS mem));
 6969 
 6970   ins_cost(125);
 6971   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6972 
 6973   ins_encode %{
 6974     __ movzwl($dst$$Register, $mem$$Address);
 6975   %}
 6976 
 6977   ins_pipe(ialu_reg_mem);
 6978 %}
 6979 
 6980 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6981 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6982   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6983 
 6984   ins_cost(125);
 6985   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6986   ins_encode %{
 6987     __ movsbl($dst$$Register, $mem$$Address);
 6988   %}
 6989   ins_pipe(ialu_reg_mem);
 6990 %}
 6991 
 6992 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6993 instruct loadUS2L(rRegL dst, memory mem)
 6994 %{
 6995   match(Set dst (ConvI2L (LoadUS mem)));
 6996 
 6997   ins_cost(125);
 6998   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6999 
 7000   ins_encode %{
 7001     __ movzwq($dst$$Register, $mem$$Address);
 7002   %}
 7003 
 7004   ins_pipe(ialu_reg_mem);
 7005 %}
 7006 
 7007 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7008 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7009   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7010 
 7011   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7012   ins_encode %{
 7013     __ movzbq($dst$$Register, $mem$$Address);
 7014   %}
 7015   ins_pipe(ialu_reg_mem);
 7016 %}
 7017 
 7018 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7019 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7020   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7021   effect(KILL cr);
 7022 
 7023   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7024             "andl    $dst, right_n_bits($mask, 16)" %}
 7025   ins_encode %{
 7026     Register Rdst = $dst$$Register;
 7027     __ movzwq(Rdst, $mem$$Address);
 7028     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7029   %}
 7030   ins_pipe(ialu_reg_mem);
 7031 %}
 7032 
 7033 // Load Integer
 7034 instruct loadI(rRegI dst, memory mem)
 7035 %{
 7036   match(Set dst (LoadI mem));
 7037 
 7038   ins_cost(125);
 7039   format %{ "movl    $dst, $mem\t# int" %}
 7040 
 7041   ins_encode %{
 7042     __ movl($dst$$Register, $mem$$Address);
 7043   %}
 7044 
 7045   ins_pipe(ialu_reg_mem);
 7046 %}
 7047 
 7048 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7049 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7050   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7051 
 7052   ins_cost(125);
 7053   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7054   ins_encode %{
 7055     __ movsbl($dst$$Register, $mem$$Address);
 7056   %}
 7057   ins_pipe(ialu_reg_mem);
 7058 %}
 7059 
 7060 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7061 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7062   match(Set dst (AndI (LoadI mem) mask));
 7063 
 7064   ins_cost(125);
 7065   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7066   ins_encode %{
 7067     __ movzbl($dst$$Register, $mem$$Address);
 7068   %}
 7069   ins_pipe(ialu_reg_mem);
 7070 %}
 7071 
 7072 // Load Integer (32 bit signed) to Short (16 bit signed)
 7073 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7074   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7075 
 7076   ins_cost(125);
 7077   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7078   ins_encode %{
 7079     __ movswl($dst$$Register, $mem$$Address);
 7080   %}
 7081   ins_pipe(ialu_reg_mem);
 7082 %}
 7083 
 7084 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7085 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7086   match(Set dst (AndI (LoadI mem) mask));
 7087 
 7088   ins_cost(125);
 7089   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7090   ins_encode %{
 7091     __ movzwl($dst$$Register, $mem$$Address);
 7092   %}
 7093   ins_pipe(ialu_reg_mem);
 7094 %}
 7095 
 7096 // Load Integer into Long Register
 7097 instruct loadI2L(rRegL dst, memory mem)
 7098 %{
 7099   match(Set dst (ConvI2L (LoadI mem)));
 7100 
 7101   ins_cost(125);
 7102   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7103 
 7104   ins_encode %{
 7105     __ movslq($dst$$Register, $mem$$Address);
 7106   %}
 7107 
 7108   ins_pipe(ialu_reg_mem);
 7109 %}
 7110 
 7111 // Load Integer with mask 0xFF into Long Register
 7112 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7113   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7114 
 7115   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7116   ins_encode %{
 7117     __ movzbq($dst$$Register, $mem$$Address);
 7118   %}
 7119   ins_pipe(ialu_reg_mem);
 7120 %}
 7121 
 7122 // Load Integer with mask 0xFFFF into Long Register
 7123 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7124   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7125 
 7126   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7127   ins_encode %{
 7128     __ movzwq($dst$$Register, $mem$$Address);
 7129   %}
 7130   ins_pipe(ialu_reg_mem);
 7131 %}
 7132 
 7133 // Load Integer with a 31-bit mask into Long Register
 7134 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7135   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7136   effect(KILL cr);
 7137 
 7138   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7139             "andl    $dst, $mask" %}
 7140   ins_encode %{
 7141     Register Rdst = $dst$$Register;
 7142     __ movl(Rdst, $mem$$Address);
 7143     __ andl(Rdst, $mask$$constant);
 7144   %}
 7145   ins_pipe(ialu_reg_mem);
 7146 %}
 7147 
 7148 // Load Unsigned Integer into Long Register
 7149 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7150 %{
 7151   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7152 
 7153   ins_cost(125);
 7154   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7155 
 7156   ins_encode %{
 7157     __ movl($dst$$Register, $mem$$Address);
 7158   %}
 7159 
 7160   ins_pipe(ialu_reg_mem);
 7161 %}
 7162 
 7163 // Load Long
 7164 instruct loadL(rRegL dst, memory mem)
 7165 %{
 7166   match(Set dst (LoadL mem));
 7167 
 7168   ins_cost(125);
 7169   format %{ "movq    $dst, $mem\t# long" %}
 7170 
 7171   ins_encode %{
 7172     __ movq($dst$$Register, $mem$$Address);
 7173   %}
 7174 
 7175   ins_pipe(ialu_reg_mem); // XXX
 7176 %}
 7177 
 7178 // Load Range
 7179 instruct loadRange(rRegI dst, memory mem)
 7180 %{
 7181   match(Set dst (LoadRange mem));
 7182 
 7183   ins_cost(125); // XXX
 7184   format %{ "movl    $dst, $mem\t# range" %}
 7185   ins_encode %{
 7186     __ movl($dst$$Register, $mem$$Address);
 7187   %}
 7188   ins_pipe(ialu_reg_mem);
 7189 %}
 7190 
 7191 // Load Pointer
 7192 instruct loadP(rRegP dst, memory mem)
 7193 %{
 7194   match(Set dst (LoadP mem));
 7195   predicate(n->as_Load()->barrier_data() == 0);
 7196 
 7197   ins_cost(125); // XXX
 7198   format %{ "movq    $dst, $mem\t# ptr" %}
 7199   ins_encode %{
 7200     __ movq($dst$$Register, $mem$$Address);
 7201   %}
 7202   ins_pipe(ialu_reg_mem); // XXX
 7203 %}
 7204 
 7205 // Load Compressed Pointer
 7206 instruct loadN(rRegN dst, memory mem)
 7207 %{
 7208    predicate(n->as_Load()->barrier_data() == 0);
 7209    match(Set dst (LoadN mem));
 7210 
 7211    ins_cost(125); // XXX
 7212    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7213    ins_encode %{
 7214      __ movl($dst$$Register, $mem$$Address);
 7215    %}
 7216    ins_pipe(ialu_reg_mem); // XXX
 7217 %}
 7218 
 7219 
 7220 // Load Klass Pointer
 7221 instruct loadKlass(rRegP dst, memory mem)
 7222 %{
 7223   match(Set dst (LoadKlass mem));
 7224 
 7225   ins_cost(125); // XXX
 7226   format %{ "movq    $dst, $mem\t# class" %}
 7227   ins_encode %{
 7228     __ movq($dst$$Register, $mem$$Address);
 7229   %}
 7230   ins_pipe(ialu_reg_mem); // XXX
 7231 %}
 7232 
 7233 // Load narrow Klass Pointer
 7234 instruct loadNKlass(rRegN dst, memory mem)
 7235 %{
 7236   predicate(!UseCompactObjectHeaders);
 7237   match(Set dst (LoadNKlass mem));
 7238 
 7239   ins_cost(125); // XXX
 7240   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7241   ins_encode %{
 7242     __ movl($dst$$Register, $mem$$Address);
 7243   %}
 7244   ins_pipe(ialu_reg_mem); // XXX
 7245 %}
 7246 
 7247 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7248 %{
 7249   predicate(UseCompactObjectHeaders);
 7250   match(Set dst (LoadNKlass mem));
 7251   effect(KILL cr);
 7252   ins_cost(125);
 7253   format %{
 7254     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7255     "shrl    $dst, markWord::klass_shift_at_offset"
 7256   %}
 7257   ins_encode %{
 7258     if (UseAPX) {
 7259       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7260     }
 7261     else {
 7262       __ movl($dst$$Register, $mem$$Address);
 7263       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7264     }
 7265   %}
 7266   ins_pipe(ialu_reg_mem);
 7267 %}
 7268 
 7269 // Load Float
 7270 instruct loadF(regF dst, memory mem)
 7271 %{
 7272   match(Set dst (LoadF mem));
 7273 
 7274   ins_cost(145); // XXX
 7275   format %{ "movss   $dst, $mem\t# float" %}
 7276   ins_encode %{
 7277     __ movflt($dst$$XMMRegister, $mem$$Address);
 7278   %}
 7279   ins_pipe(pipe_slow); // XXX
 7280 %}
 7281 
 7282 // Load Double
 7283 instruct loadD_partial(regD dst, memory mem)
 7284 %{
 7285   predicate(!UseXmmLoadAndClearUpper);
 7286   match(Set dst (LoadD mem));
 7287 
 7288   ins_cost(145); // XXX
 7289   format %{ "movlpd  $dst, $mem\t# double" %}
 7290   ins_encode %{
 7291     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7292   %}
 7293   ins_pipe(pipe_slow); // XXX
 7294 %}
 7295 
 7296 instruct loadD(regD dst, memory mem)
 7297 %{
 7298   predicate(UseXmmLoadAndClearUpper);
 7299   match(Set dst (LoadD mem));
 7300 
 7301   ins_cost(145); // XXX
 7302   format %{ "movsd   $dst, $mem\t# double" %}
 7303   ins_encode %{
 7304     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7305   %}
 7306   ins_pipe(pipe_slow); // XXX
 7307 %}
 7308 
 7309 // max = java.lang.Math.max(float a, float b)
 7310 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
 7311   predicate(VM_Version::supports_avx10_2());
 7312   match(Set dst (MaxF a b));
 7313   format %{ "maxF $dst, $a, $b" %}
 7314   ins_encode %{
 7315     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7316   %}
 7317   ins_pipe( pipe_slow );
 7318 %}
 7319 
 7320 // max = java.lang.Math.max(float a, float b)
 7321 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7322   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7323   match(Set dst (MaxF a b));
 7324   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7325   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7326   ins_encode %{
 7327     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7328   %}
 7329   ins_pipe( pipe_slow );
 7330 %}
 7331 
 7332 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7333   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7334   match(Set dst (MaxF a b));
 7335   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7336 
 7337   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7338   ins_encode %{
 7339     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7340                     false /*min*/, true /*single*/);
 7341   %}
 7342   ins_pipe( pipe_slow );
 7343 %}
 7344 
 7345 // max = java.lang.Math.max(double a, double b)
 7346 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
 7347   predicate(VM_Version::supports_avx10_2());
 7348   match(Set dst (MaxD a b));
 7349   format %{ "maxD $dst, $a, $b" %}
 7350   ins_encode %{
 7351     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7352   %}
 7353   ins_pipe( pipe_slow );
 7354 %}
 7355 
 7356 // max = java.lang.Math.max(double a, double b)
 7357 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7358   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7359   match(Set dst (MaxD a b));
 7360   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7361   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7362   ins_encode %{
 7363     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7364   %}
 7365   ins_pipe( pipe_slow );
 7366 %}
 7367 
 7368 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7369   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7370   match(Set dst (MaxD a b));
 7371   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7372 
 7373   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7374   ins_encode %{
 7375     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7376                     false /*min*/, false /*single*/);
 7377   %}
 7378   ins_pipe( pipe_slow );
 7379 %}
 7380 
 7381 // max = java.lang.Math.min(float a, float b)
 7382 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
 7383   predicate(VM_Version::supports_avx10_2());
 7384   match(Set dst (MinF a b));
 7385   format %{ "minF $dst, $a, $b" %}
 7386   ins_encode %{
 7387     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7388   %}
 7389   ins_pipe( pipe_slow );
 7390 %}
 7391 
 7392 // min = java.lang.Math.min(float a, float b)
 7393 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7394   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7395   match(Set dst (MinF a b));
 7396   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7397   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7398   ins_encode %{
 7399     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7400   %}
 7401   ins_pipe( pipe_slow );
 7402 %}
 7403 
 7404 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7405   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7406   match(Set dst (MinF a b));
 7407   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7408 
 7409   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7410   ins_encode %{
 7411     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7412                     true /*min*/, true /*single*/);
 7413   %}
 7414   ins_pipe( pipe_slow );
 7415 %}
 7416 
 7417 // max = java.lang.Math.min(double a, double b)
 7418 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
 7419   predicate(VM_Version::supports_avx10_2());
 7420   match(Set dst (MinD a b));
 7421   format %{ "minD $dst, $a, $b" %}
 7422   ins_encode %{
 7423     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7424   %}
 7425   ins_pipe( pipe_slow );
 7426 %}
 7427 
 7428 // min = java.lang.Math.min(double a, double b)
 7429 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7430   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7431   match(Set dst (MinD a b));
 7432   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7433     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7434   ins_encode %{
 7435     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7436   %}
 7437   ins_pipe( pipe_slow );
 7438 %}
 7439 
 7440 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7441   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7442   match(Set dst (MinD a b));
 7443   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7444 
 7445   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7446   ins_encode %{
 7447     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7448                     true /*min*/, false /*single*/);
 7449   %}
 7450   ins_pipe( pipe_slow );
 7451 %}
 7452 
 7453 // Load Effective Address
 7454 instruct leaP8(rRegP dst, indOffset8 mem)
 7455 %{
 7456   match(Set dst mem);
 7457 
 7458   ins_cost(110); // XXX
 7459   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7460   ins_encode %{
 7461     __ leaq($dst$$Register, $mem$$Address);
 7462   %}
 7463   ins_pipe(ialu_reg_reg_fat);
 7464 %}
 7465 
 7466 instruct leaP32(rRegP dst, indOffset32 mem)
 7467 %{
 7468   match(Set dst mem);
 7469 
 7470   ins_cost(110);
 7471   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7472   ins_encode %{
 7473     __ leaq($dst$$Register, $mem$$Address);
 7474   %}
 7475   ins_pipe(ialu_reg_reg_fat);
 7476 %}
 7477 
 7478 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7479 %{
 7480   match(Set dst mem);
 7481 
 7482   ins_cost(110);
 7483   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7484   ins_encode %{
 7485     __ leaq($dst$$Register, $mem$$Address);
 7486   %}
 7487   ins_pipe(ialu_reg_reg_fat);
 7488 %}
 7489 
 7490 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7491 %{
 7492   match(Set dst mem);
 7493 
 7494   ins_cost(110);
 7495   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7496   ins_encode %{
 7497     __ leaq($dst$$Register, $mem$$Address);
 7498   %}
 7499   ins_pipe(ialu_reg_reg_fat);
 7500 %}
 7501 
 7502 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7503 %{
 7504   match(Set dst mem);
 7505 
 7506   ins_cost(110);
 7507   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7508   ins_encode %{
 7509     __ leaq($dst$$Register, $mem$$Address);
 7510   %}
 7511   ins_pipe(ialu_reg_reg_fat);
 7512 %}
 7513 
 7514 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7515 %{
 7516   match(Set dst mem);
 7517 
 7518   ins_cost(110);
 7519   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7520   ins_encode %{
 7521     __ leaq($dst$$Register, $mem$$Address);
 7522   %}
 7523   ins_pipe(ialu_reg_reg_fat);
 7524 %}
 7525 
 7526 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7527 %{
 7528   match(Set dst mem);
 7529 
 7530   ins_cost(110);
 7531   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7532   ins_encode %{
 7533     __ leaq($dst$$Register, $mem$$Address);
 7534   %}
 7535   ins_pipe(ialu_reg_reg_fat);
 7536 %}
 7537 
 7538 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7539 %{
 7540   match(Set dst mem);
 7541 
 7542   ins_cost(110);
 7543   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7544   ins_encode %{
 7545     __ leaq($dst$$Register, $mem$$Address);
 7546   %}
 7547   ins_pipe(ialu_reg_reg_fat);
 7548 %}
 7549 
 7550 // Load Effective Address which uses Narrow (32-bits) oop
 7551 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7552 %{
 7553   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7554   match(Set dst mem);
 7555 
 7556   ins_cost(110);
 7557   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7558   ins_encode %{
 7559     __ leaq($dst$$Register, $mem$$Address);
 7560   %}
 7561   ins_pipe(ialu_reg_reg_fat);
 7562 %}
 7563 
 7564 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7565 %{
 7566   predicate(CompressedOops::shift() == 0);
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110); // XXX
 7570   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7578 %{
 7579   predicate(CompressedOops::shift() == 0);
 7580   match(Set dst mem);
 7581 
 7582   ins_cost(110);
 7583   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7584   ins_encode %{
 7585     __ leaq($dst$$Register, $mem$$Address);
 7586   %}
 7587   ins_pipe(ialu_reg_reg_fat);
 7588 %}
 7589 
 7590 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7591 %{
 7592   predicate(CompressedOops::shift() == 0);
 7593   match(Set dst mem);
 7594 
 7595   ins_cost(110);
 7596   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7597   ins_encode %{
 7598     __ leaq($dst$$Register, $mem$$Address);
 7599   %}
 7600   ins_pipe(ialu_reg_reg_fat);
 7601 %}
 7602 
 7603 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7604 %{
 7605   predicate(CompressedOops::shift() == 0);
 7606   match(Set dst mem);
 7607 
 7608   ins_cost(110);
 7609   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7610   ins_encode %{
 7611     __ leaq($dst$$Register, $mem$$Address);
 7612   %}
 7613   ins_pipe(ialu_reg_reg_fat);
 7614 %}
 7615 
 7616 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7617 %{
 7618   predicate(CompressedOops::shift() == 0);
 7619   match(Set dst mem);
 7620 
 7621   ins_cost(110);
 7622   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7623   ins_encode %{
 7624     __ leaq($dst$$Register, $mem$$Address);
 7625   %}
 7626   ins_pipe(ialu_reg_reg_fat);
 7627 %}
 7628 
 7629 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7630 %{
 7631   predicate(CompressedOops::shift() == 0);
 7632   match(Set dst mem);
 7633 
 7634   ins_cost(110);
 7635   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7636   ins_encode %{
 7637     __ leaq($dst$$Register, $mem$$Address);
 7638   %}
 7639   ins_pipe(ialu_reg_reg_fat);
 7640 %}
 7641 
 7642 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7643 %{
 7644   predicate(CompressedOops::shift() == 0);
 7645   match(Set dst mem);
 7646 
 7647   ins_cost(110);
 7648   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7649   ins_encode %{
 7650     __ leaq($dst$$Register, $mem$$Address);
 7651   %}
 7652   ins_pipe(ialu_reg_reg_fat);
 7653 %}
 7654 
 7655 instruct loadConI(rRegI dst, immI src)
 7656 %{
 7657   match(Set dst src);
 7658 
 7659   format %{ "movl    $dst, $src\t# int" %}
 7660   ins_encode %{
 7661     __ movl($dst$$Register, $src$$constant);
 7662   %}
 7663   ins_pipe(ialu_reg_fat); // XXX
 7664 %}
 7665 
 7666 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7667 %{
 7668   match(Set dst src);
 7669   effect(KILL cr);
 7670 
 7671   ins_cost(50);
 7672   format %{ "xorl    $dst, $dst\t# int" %}
 7673   ins_encode %{
 7674     __ xorl($dst$$Register, $dst$$Register);
 7675   %}
 7676   ins_pipe(ialu_reg);
 7677 %}
 7678 
 7679 instruct loadConL(rRegL dst, immL src)
 7680 %{
 7681   match(Set dst src);
 7682 
 7683   ins_cost(150);
 7684   format %{ "movq    $dst, $src\t# long" %}
 7685   ins_encode %{
 7686     __ mov64($dst$$Register, $src$$constant);
 7687   %}
 7688   ins_pipe(ialu_reg);
 7689 %}
 7690 
 7691 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7692 %{
 7693   match(Set dst src);
 7694   effect(KILL cr);
 7695 
 7696   ins_cost(50);
 7697   format %{ "xorl    $dst, $dst\t# long" %}
 7698   ins_encode %{
 7699     __ xorl($dst$$Register, $dst$$Register);
 7700   %}
 7701   ins_pipe(ialu_reg); // XXX
 7702 %}
 7703 
 7704 instruct loadConUL32(rRegL dst, immUL32 src)
 7705 %{
 7706   match(Set dst src);
 7707 
 7708   ins_cost(60);
 7709   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7710   ins_encode %{
 7711     __ movl($dst$$Register, $src$$constant);
 7712   %}
 7713   ins_pipe(ialu_reg);
 7714 %}
 7715 
 7716 instruct loadConL32(rRegL dst, immL32 src)
 7717 %{
 7718   match(Set dst src);
 7719 
 7720   ins_cost(70);
 7721   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7722   ins_encode %{
 7723     __ movq($dst$$Register, $src$$constant);
 7724   %}
 7725   ins_pipe(ialu_reg);
 7726 %}
 7727 
 7728 instruct loadConP(rRegP dst, immP con) %{
 7729   match(Set dst con);
 7730 
 7731   format %{ "movq    $dst, $con\t# ptr" %}
 7732   ins_encode %{
 7733     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7734   %}
 7735   ins_pipe(ialu_reg_fat); // XXX
 7736 %}
 7737 
 7738 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7739 %{
 7740   match(Set dst src);
 7741   effect(KILL cr);
 7742 
 7743   ins_cost(50);
 7744   format %{ "xorl    $dst, $dst\t# ptr" %}
 7745   ins_encode %{
 7746     __ xorl($dst$$Register, $dst$$Register);
 7747   %}
 7748   ins_pipe(ialu_reg);
 7749 %}
 7750 
 7751 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7752 %{
 7753   match(Set dst src);
 7754   effect(KILL cr);
 7755 
 7756   ins_cost(60);
 7757   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7758   ins_encode %{
 7759     __ movl($dst$$Register, $src$$constant);
 7760   %}
 7761   ins_pipe(ialu_reg);
 7762 %}
 7763 
 7764 instruct loadConF(regF dst, immF con) %{
 7765   match(Set dst con);
 7766   ins_cost(125);
 7767   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7768   ins_encode %{
 7769     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7770   %}
 7771   ins_pipe(pipe_slow);
 7772 %}
 7773 
 7774 instruct loadConH(regF dst, immH con) %{
 7775   match(Set dst con);
 7776   ins_cost(125);
 7777   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7778   ins_encode %{
 7779     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7780   %}
 7781   ins_pipe(pipe_slow);
 7782 %}
 7783 
 7784 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7785   match(Set dst src);
 7786   effect(KILL cr);
 7787   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7788   ins_encode %{
 7789     __ xorq($dst$$Register, $dst$$Register);
 7790   %}
 7791   ins_pipe(ialu_reg);
 7792 %}
 7793 
 7794 instruct loadConN(rRegN dst, immN src) %{
 7795   match(Set dst src);
 7796 
 7797   ins_cost(125);
 7798   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7799   ins_encode %{
 7800     address con = (address)$src$$constant;
 7801     if (con == nullptr) {
 7802       ShouldNotReachHere();
 7803     } else {
 7804       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7805     }
 7806   %}
 7807   ins_pipe(ialu_reg_fat); // XXX
 7808 %}
 7809 
 7810 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7811   match(Set dst src);
 7812 
 7813   ins_cost(125);
 7814   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7815   ins_encode %{
 7816     address con = (address)$src$$constant;
 7817     if (con == nullptr) {
 7818       ShouldNotReachHere();
 7819     } else {
 7820       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7821     }
 7822   %}
 7823   ins_pipe(ialu_reg_fat); // XXX
 7824 %}
 7825 
 7826 instruct loadConF0(regF dst, immF0 src)
 7827 %{
 7828   match(Set dst src);
 7829   ins_cost(100);
 7830 
 7831   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7832   ins_encode %{
 7833     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7834   %}
 7835   ins_pipe(pipe_slow);
 7836 %}
 7837 
 7838 // Use the same format since predicate() can not be used here.
 7839 instruct loadConD(regD dst, immD con) %{
 7840   match(Set dst con);
 7841   ins_cost(125);
 7842   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7843   ins_encode %{
 7844     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7845   %}
 7846   ins_pipe(pipe_slow);
 7847 %}
 7848 
 7849 instruct loadConD0(regD dst, immD0 src)
 7850 %{
 7851   match(Set dst src);
 7852   ins_cost(100);
 7853 
 7854   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7855   ins_encode %{
 7856     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7857   %}
 7858   ins_pipe(pipe_slow);
 7859 %}
 7860 
 7861 instruct loadSSI(rRegI dst, stackSlotI src)
 7862 %{
 7863   match(Set dst src);
 7864 
 7865   ins_cost(125);
 7866   format %{ "movl    $dst, $src\t# int stk" %}
 7867   ins_encode %{
 7868     __ movl($dst$$Register, $src$$Address);
 7869   %}
 7870   ins_pipe(ialu_reg_mem);
 7871 %}
 7872 
 7873 instruct loadSSL(rRegL dst, stackSlotL src)
 7874 %{
 7875   match(Set dst src);
 7876 
 7877   ins_cost(125);
 7878   format %{ "movq    $dst, $src\t# long stk" %}
 7879   ins_encode %{
 7880     __ movq($dst$$Register, $src$$Address);
 7881   %}
 7882   ins_pipe(ialu_reg_mem);
 7883 %}
 7884 
 7885 instruct loadSSP(rRegP dst, stackSlotP src)
 7886 %{
 7887   match(Set dst src);
 7888 
 7889   ins_cost(125);
 7890   format %{ "movq    $dst, $src\t# ptr stk" %}
 7891   ins_encode %{
 7892     __ movq($dst$$Register, $src$$Address);
 7893   %}
 7894   ins_pipe(ialu_reg_mem);
 7895 %}
 7896 
 7897 instruct loadSSF(regF dst, stackSlotF src)
 7898 %{
 7899   match(Set dst src);
 7900 
 7901   ins_cost(125);
 7902   format %{ "movss   $dst, $src\t# float stk" %}
 7903   ins_encode %{
 7904     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7905   %}
 7906   ins_pipe(pipe_slow); // XXX
 7907 %}
 7908 
 7909 // Use the same format since predicate() can not be used here.
 7910 instruct loadSSD(regD dst, stackSlotD src)
 7911 %{
 7912   match(Set dst src);
 7913 
 7914   ins_cost(125);
 7915   format %{ "movsd   $dst, $src\t# double stk" %}
 7916   ins_encode  %{
 7917     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7918   %}
 7919   ins_pipe(pipe_slow); // XXX
 7920 %}
 7921 
 7922 // Prefetch instructions for allocation.
 7923 // Must be safe to execute with invalid address (cannot fault).
 7924 
 7925 instruct prefetchAlloc( memory mem ) %{
 7926   predicate(AllocatePrefetchInstr==3);
 7927   match(PrefetchAllocation mem);
 7928   ins_cost(125);
 7929 
 7930   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7931   ins_encode %{
 7932     __ prefetchw($mem$$Address);
 7933   %}
 7934   ins_pipe(ialu_mem);
 7935 %}
 7936 
 7937 instruct prefetchAllocNTA( memory mem ) %{
 7938   predicate(AllocatePrefetchInstr==0);
 7939   match(PrefetchAllocation mem);
 7940   ins_cost(125);
 7941 
 7942   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7943   ins_encode %{
 7944     __ prefetchnta($mem$$Address);
 7945   %}
 7946   ins_pipe(ialu_mem);
 7947 %}
 7948 
 7949 instruct prefetchAllocT0( memory mem ) %{
 7950   predicate(AllocatePrefetchInstr==1);
 7951   match(PrefetchAllocation mem);
 7952   ins_cost(125);
 7953 
 7954   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7955   ins_encode %{
 7956     __ prefetcht0($mem$$Address);
 7957   %}
 7958   ins_pipe(ialu_mem);
 7959 %}
 7960 
 7961 instruct prefetchAllocT2( memory mem ) %{
 7962   predicate(AllocatePrefetchInstr==2);
 7963   match(PrefetchAllocation mem);
 7964   ins_cost(125);
 7965 
 7966   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7967   ins_encode %{
 7968     __ prefetcht2($mem$$Address);
 7969   %}
 7970   ins_pipe(ialu_mem);
 7971 %}
 7972 
 7973 //----------Store Instructions-------------------------------------------------
 7974 
 7975 // Store Byte
 7976 instruct storeB(memory mem, rRegI src)
 7977 %{
 7978   match(Set mem (StoreB mem src));
 7979 
 7980   ins_cost(125); // XXX
 7981   format %{ "movb    $mem, $src\t# byte" %}
 7982   ins_encode %{
 7983     __ movb($mem$$Address, $src$$Register);
 7984   %}
 7985   ins_pipe(ialu_mem_reg);
 7986 %}
 7987 
 7988 // Store Char/Short
 7989 instruct storeC(memory mem, rRegI src)
 7990 %{
 7991   match(Set mem (StoreC mem src));
 7992 
 7993   ins_cost(125); // XXX
 7994   format %{ "movw    $mem, $src\t# char/short" %}
 7995   ins_encode %{
 7996     __ movw($mem$$Address, $src$$Register);
 7997   %}
 7998   ins_pipe(ialu_mem_reg);
 7999 %}
 8000 
 8001 // Store Integer
 8002 instruct storeI(memory mem, rRegI src)
 8003 %{
 8004   match(Set mem (StoreI mem src));
 8005 
 8006   ins_cost(125); // XXX
 8007   format %{ "movl    $mem, $src\t# int" %}
 8008   ins_encode %{
 8009     __ movl($mem$$Address, $src$$Register);
 8010   %}
 8011   ins_pipe(ialu_mem_reg);
 8012 %}
 8013 
 8014 // Store Long
 8015 instruct storeL(memory mem, rRegL src)
 8016 %{
 8017   match(Set mem (StoreL mem src));
 8018 
 8019   ins_cost(125); // XXX
 8020   format %{ "movq    $mem, $src\t# long" %}
 8021   ins_encode %{
 8022     __ movq($mem$$Address, $src$$Register);
 8023   %}
 8024   ins_pipe(ialu_mem_reg); // XXX
 8025 %}
 8026 
 8027 // Store Pointer
 8028 instruct storeP(memory mem, any_RegP src)
 8029 %{
 8030   predicate(n->as_Store()->barrier_data() == 0);
 8031   match(Set mem (StoreP mem src));
 8032 
 8033   ins_cost(125); // XXX
 8034   format %{ "movq    $mem, $src\t# ptr" %}
 8035   ins_encode %{
 8036     __ movq($mem$$Address, $src$$Register);
 8037   %}
 8038   ins_pipe(ialu_mem_reg);
 8039 %}
 8040 
 8041 instruct storeImmP0(memory mem, immP0 zero)
 8042 %{
 8043   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8044   match(Set mem (StoreP mem zero));
 8045 
 8046   ins_cost(125); // XXX
 8047   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8048   ins_encode %{
 8049     __ movq($mem$$Address, r12);
 8050   %}
 8051   ins_pipe(ialu_mem_reg);
 8052 %}
 8053 
 8054 // Store Null Pointer, mark word, or other simple pointer constant.
 8055 instruct storeImmP(memory mem, immP31 src)
 8056 %{
 8057   predicate(n->as_Store()->barrier_data() == 0);
 8058   match(Set mem (StoreP mem src));
 8059 
 8060   ins_cost(150); // XXX
 8061   format %{ "movq    $mem, $src\t# ptr" %}
 8062   ins_encode %{
 8063     __ movq($mem$$Address, $src$$constant);
 8064   %}
 8065   ins_pipe(ialu_mem_imm);
 8066 %}
 8067 
 8068 // Store Compressed Pointer
 8069 instruct storeN(memory mem, rRegN src)
 8070 %{
 8071   predicate(n->as_Store()->barrier_data() == 0);
 8072   match(Set mem (StoreN mem src));
 8073 
 8074   ins_cost(125); // XXX
 8075   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8076   ins_encode %{
 8077     __ movl($mem$$Address, $src$$Register);
 8078   %}
 8079   ins_pipe(ialu_mem_reg);
 8080 %}
 8081 
 8082 instruct storeNKlass(memory mem, rRegN src)
 8083 %{
 8084   match(Set mem (StoreNKlass mem src));
 8085 
 8086   ins_cost(125); // XXX
 8087   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8088   ins_encode %{
 8089     __ movl($mem$$Address, $src$$Register);
 8090   %}
 8091   ins_pipe(ialu_mem_reg);
 8092 %}
 8093 
 8094 instruct storeImmN0(memory mem, immN0 zero)
 8095 %{
 8096   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8097   match(Set mem (StoreN mem zero));
 8098 
 8099   ins_cost(125); // XXX
 8100   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8101   ins_encode %{
 8102     __ movl($mem$$Address, r12);
 8103   %}
 8104   ins_pipe(ialu_mem_reg);
 8105 %}
 8106 
 8107 instruct storeImmN(memory mem, immN src)
 8108 %{
 8109   predicate(n->as_Store()->barrier_data() == 0);
 8110   match(Set mem (StoreN mem src));
 8111 
 8112   ins_cost(150); // XXX
 8113   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8114   ins_encode %{
 8115     address con = (address)$src$$constant;
 8116     if (con == nullptr) {
 8117       __ movl($mem$$Address, 0);
 8118     } else {
 8119       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8120     }
 8121   %}
 8122   ins_pipe(ialu_mem_imm);
 8123 %}
 8124 
 8125 instruct storeImmNKlass(memory mem, immNKlass src)
 8126 %{
 8127   match(Set mem (StoreNKlass mem src));
 8128 
 8129   ins_cost(150); // XXX
 8130   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8131   ins_encode %{
 8132     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8133   %}
 8134   ins_pipe(ialu_mem_imm);
 8135 %}
 8136 
 8137 // Store Integer Immediate
 8138 instruct storeImmI0(memory mem, immI_0 zero)
 8139 %{
 8140   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8141   match(Set mem (StoreI mem zero));
 8142 
 8143   ins_cost(125); // XXX
 8144   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8145   ins_encode %{
 8146     __ movl($mem$$Address, r12);
 8147   %}
 8148   ins_pipe(ialu_mem_reg);
 8149 %}
 8150 
 8151 instruct storeImmI(memory mem, immI src)
 8152 %{
 8153   match(Set mem (StoreI mem src));
 8154 
 8155   ins_cost(150);
 8156   format %{ "movl    $mem, $src\t# int" %}
 8157   ins_encode %{
 8158     __ movl($mem$$Address, $src$$constant);
 8159   %}
 8160   ins_pipe(ialu_mem_imm);
 8161 %}
 8162 
 8163 // Store Long Immediate
 8164 instruct storeImmL0(memory mem, immL0 zero)
 8165 %{
 8166   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8167   match(Set mem (StoreL mem zero));
 8168 
 8169   ins_cost(125); // XXX
 8170   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8171   ins_encode %{
 8172     __ movq($mem$$Address, r12);
 8173   %}
 8174   ins_pipe(ialu_mem_reg);
 8175 %}
 8176 
 8177 instruct storeImmL(memory mem, immL32 src)
 8178 %{
 8179   match(Set mem (StoreL mem src));
 8180 
 8181   ins_cost(150);
 8182   format %{ "movq    $mem, $src\t# long" %}
 8183   ins_encode %{
 8184     __ movq($mem$$Address, $src$$constant);
 8185   %}
 8186   ins_pipe(ialu_mem_imm);
 8187 %}
 8188 
 8189 // Store Short/Char Immediate
 8190 instruct storeImmC0(memory mem, immI_0 zero)
 8191 %{
 8192   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8193   match(Set mem (StoreC mem zero));
 8194 
 8195   ins_cost(125); // XXX
 8196   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8197   ins_encode %{
 8198     __ movw($mem$$Address, r12);
 8199   %}
 8200   ins_pipe(ialu_mem_reg);
 8201 %}
 8202 
 8203 instruct storeImmI16(memory mem, immI16 src)
 8204 %{
 8205   predicate(UseStoreImmI16);
 8206   match(Set mem (StoreC mem src));
 8207 
 8208   ins_cost(150);
 8209   format %{ "movw    $mem, $src\t# short/char" %}
 8210   ins_encode %{
 8211     __ movw($mem$$Address, $src$$constant);
 8212   %}
 8213   ins_pipe(ialu_mem_imm);
 8214 %}
 8215 
 8216 // Store Byte Immediate
 8217 instruct storeImmB0(memory mem, immI_0 zero)
 8218 %{
 8219   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8220   match(Set mem (StoreB mem zero));
 8221 
 8222   ins_cost(125); // XXX
 8223   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8224   ins_encode %{
 8225     __ movb($mem$$Address, r12);
 8226   %}
 8227   ins_pipe(ialu_mem_reg);
 8228 %}
 8229 
 8230 instruct storeImmB(memory mem, immI8 src)
 8231 %{
 8232   match(Set mem (StoreB mem src));
 8233 
 8234   ins_cost(150); // XXX
 8235   format %{ "movb    $mem, $src\t# byte" %}
 8236   ins_encode %{
 8237     __ movb($mem$$Address, $src$$constant);
 8238   %}
 8239   ins_pipe(ialu_mem_imm);
 8240 %}
 8241 
 8242 // Store Float
 8243 instruct storeF(memory mem, regF src)
 8244 %{
 8245   match(Set mem (StoreF mem src));
 8246 
 8247   ins_cost(95); // XXX
 8248   format %{ "movss   $mem, $src\t# float" %}
 8249   ins_encode %{
 8250     __ movflt($mem$$Address, $src$$XMMRegister);
 8251   %}
 8252   ins_pipe(pipe_slow); // XXX
 8253 %}
 8254 
 8255 // Store immediate Float value (it is faster than store from XMM register)
 8256 instruct storeF0(memory mem, immF0 zero)
 8257 %{
 8258   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8259   match(Set mem (StoreF mem zero));
 8260 
 8261   ins_cost(25); // XXX
 8262   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8263   ins_encode %{
 8264     __ movl($mem$$Address, r12);
 8265   %}
 8266   ins_pipe(ialu_mem_reg);
 8267 %}
 8268 
 8269 instruct storeF_imm(memory mem, immF src)
 8270 %{
 8271   match(Set mem (StoreF mem src));
 8272 
 8273   ins_cost(50);
 8274   format %{ "movl    $mem, $src\t# float" %}
 8275   ins_encode %{
 8276     __ movl($mem$$Address, jint_cast($src$$constant));
 8277   %}
 8278   ins_pipe(ialu_mem_imm);
 8279 %}
 8280 
 8281 // Store Double
 8282 instruct storeD(memory mem, regD src)
 8283 %{
 8284   match(Set mem (StoreD mem src));
 8285 
 8286   ins_cost(95); // XXX
 8287   format %{ "movsd   $mem, $src\t# double" %}
 8288   ins_encode %{
 8289     __ movdbl($mem$$Address, $src$$XMMRegister);
 8290   %}
 8291   ins_pipe(pipe_slow); // XXX
 8292 %}
 8293 
 8294 // Store immediate double 0.0 (it is faster than store from XMM register)
 8295 instruct storeD0_imm(memory mem, immD0 src)
 8296 %{
 8297   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8298   match(Set mem (StoreD mem src));
 8299 
 8300   ins_cost(50);
 8301   format %{ "movq    $mem, $src\t# double 0." %}
 8302   ins_encode %{
 8303     __ movq($mem$$Address, $src$$constant);
 8304   %}
 8305   ins_pipe(ialu_mem_imm);
 8306 %}
 8307 
 8308 instruct storeD0(memory mem, immD0 zero)
 8309 %{
 8310   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8311   match(Set mem (StoreD mem zero));
 8312 
 8313   ins_cost(25); // XXX
 8314   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8315   ins_encode %{
 8316     __ movq($mem$$Address, r12);
 8317   %}
 8318   ins_pipe(ialu_mem_reg);
 8319 %}
 8320 
 8321 instruct storeSSI(stackSlotI dst, rRegI src)
 8322 %{
 8323   match(Set dst src);
 8324 
 8325   ins_cost(100);
 8326   format %{ "movl    $dst, $src\t# int stk" %}
 8327   ins_encode %{
 8328     __ movl($dst$$Address, $src$$Register);
 8329   %}
 8330   ins_pipe( ialu_mem_reg );
 8331 %}
 8332 
 8333 instruct storeSSL(stackSlotL dst, rRegL src)
 8334 %{
 8335   match(Set dst src);
 8336 
 8337   ins_cost(100);
 8338   format %{ "movq    $dst, $src\t# long stk" %}
 8339   ins_encode %{
 8340     __ movq($dst$$Address, $src$$Register);
 8341   %}
 8342   ins_pipe(ialu_mem_reg);
 8343 %}
 8344 
 8345 instruct storeSSP(stackSlotP dst, rRegP src)
 8346 %{
 8347   match(Set dst src);
 8348 
 8349   ins_cost(100);
 8350   format %{ "movq    $dst, $src\t# ptr stk" %}
 8351   ins_encode %{
 8352     __ movq($dst$$Address, $src$$Register);
 8353   %}
 8354   ins_pipe(ialu_mem_reg);
 8355 %}
 8356 
 8357 instruct storeSSF(stackSlotF dst, regF src)
 8358 %{
 8359   match(Set dst src);
 8360 
 8361   ins_cost(95); // XXX
 8362   format %{ "movss   $dst, $src\t# float stk" %}
 8363   ins_encode %{
 8364     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8365   %}
 8366   ins_pipe(pipe_slow); // XXX
 8367 %}
 8368 
 8369 instruct storeSSD(stackSlotD dst, regD src)
 8370 %{
 8371   match(Set dst src);
 8372 
 8373   ins_cost(95); // XXX
 8374   format %{ "movsd   $dst, $src\t# double stk" %}
 8375   ins_encode %{
 8376     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8377   %}
 8378   ins_pipe(pipe_slow); // XXX
 8379 %}
 8380 
 8381 instruct cacheWB(indirect addr)
 8382 %{
 8383   predicate(VM_Version::supports_data_cache_line_flush());
 8384   match(CacheWB addr);
 8385 
 8386   ins_cost(100);
 8387   format %{"cache wb $addr" %}
 8388   ins_encode %{
 8389     assert($addr->index_position() < 0, "should be");
 8390     assert($addr$$disp == 0, "should be");
 8391     __ cache_wb(Address($addr$$base$$Register, 0));
 8392   %}
 8393   ins_pipe(pipe_slow); // XXX
 8394 %}
 8395 
 8396 instruct cacheWBPreSync()
 8397 %{
 8398   predicate(VM_Version::supports_data_cache_line_flush());
 8399   match(CacheWBPreSync);
 8400 
 8401   ins_cost(100);
 8402   format %{"cache wb presync" %}
 8403   ins_encode %{
 8404     __ cache_wbsync(true);
 8405   %}
 8406   ins_pipe(pipe_slow); // XXX
 8407 %}
 8408 
 8409 instruct cacheWBPostSync()
 8410 %{
 8411   predicate(VM_Version::supports_data_cache_line_flush());
 8412   match(CacheWBPostSync);
 8413 
 8414   ins_cost(100);
 8415   format %{"cache wb postsync" %}
 8416   ins_encode %{
 8417     __ cache_wbsync(false);
 8418   %}
 8419   ins_pipe(pipe_slow); // XXX
 8420 %}
 8421 
 8422 //----------BSWAP Instructions-------------------------------------------------
 8423 instruct bytes_reverse_int(rRegI dst) %{
 8424   match(Set dst (ReverseBytesI dst));
 8425 
 8426   format %{ "bswapl  $dst" %}
 8427   ins_encode %{
 8428     __ bswapl($dst$$Register);
 8429   %}
 8430   ins_pipe( ialu_reg );
 8431 %}
 8432 
 8433 instruct bytes_reverse_long(rRegL dst) %{
 8434   match(Set dst (ReverseBytesL dst));
 8435 
 8436   format %{ "bswapq  $dst" %}
 8437   ins_encode %{
 8438     __ bswapq($dst$$Register);
 8439   %}
 8440   ins_pipe( ialu_reg);
 8441 %}
 8442 
 8443 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8444   match(Set dst (ReverseBytesUS dst));
 8445   effect(KILL cr);
 8446 
 8447   format %{ "bswapl  $dst\n\t"
 8448             "shrl    $dst,16\n\t" %}
 8449   ins_encode %{
 8450     __ bswapl($dst$$Register);
 8451     __ shrl($dst$$Register, 16);
 8452   %}
 8453   ins_pipe( ialu_reg );
 8454 %}
 8455 
 8456 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8457   match(Set dst (ReverseBytesS dst));
 8458   effect(KILL cr);
 8459 
 8460   format %{ "bswapl  $dst\n\t"
 8461             "sar     $dst,16\n\t" %}
 8462   ins_encode %{
 8463     __ bswapl($dst$$Register);
 8464     __ sarl($dst$$Register, 16);
 8465   %}
 8466   ins_pipe( ialu_reg );
 8467 %}
 8468 
 8469 //---------- Zeros Count Instructions ------------------------------------------
 8470 
 8471 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8472   predicate(UseCountLeadingZerosInstruction);
 8473   match(Set dst (CountLeadingZerosI src));
 8474   effect(KILL cr);
 8475 
 8476   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8477   ins_encode %{
 8478     __ lzcntl($dst$$Register, $src$$Register);
 8479   %}
 8480   ins_pipe(ialu_reg);
 8481 %}
 8482 
 8483 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8484   predicate(UseCountLeadingZerosInstruction);
 8485   match(Set dst (CountLeadingZerosI (LoadI src)));
 8486   effect(KILL cr);
 8487   ins_cost(175);
 8488   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8489   ins_encode %{
 8490     __ lzcntl($dst$$Register, $src$$Address);
 8491   %}
 8492   ins_pipe(ialu_reg_mem);
 8493 %}
 8494 
 8495 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8496   predicate(!UseCountLeadingZerosInstruction);
 8497   match(Set dst (CountLeadingZerosI src));
 8498   effect(KILL cr);
 8499 
 8500   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8501             "jnz     skip\n\t"
 8502             "movl    $dst, -1\n"
 8503       "skip:\n\t"
 8504             "negl    $dst\n\t"
 8505             "addl    $dst, 31" %}
 8506   ins_encode %{
 8507     Register Rdst = $dst$$Register;
 8508     Register Rsrc = $src$$Register;
 8509     Label skip;
 8510     __ bsrl(Rdst, Rsrc);
 8511     __ jccb(Assembler::notZero, skip);
 8512     __ movl(Rdst, -1);
 8513     __ bind(skip);
 8514     __ negl(Rdst);
 8515     __ addl(Rdst, BitsPerInt - 1);
 8516   %}
 8517   ins_pipe(ialu_reg);
 8518 %}
 8519 
 8520 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8521   predicate(UseCountLeadingZerosInstruction);
 8522   match(Set dst (CountLeadingZerosL src));
 8523   effect(KILL cr);
 8524 
 8525   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8526   ins_encode %{
 8527     __ lzcntq($dst$$Register, $src$$Register);
 8528   %}
 8529   ins_pipe(ialu_reg);
 8530 %}
 8531 
 8532 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8533   predicate(UseCountLeadingZerosInstruction);
 8534   match(Set dst (CountLeadingZerosL (LoadL src)));
 8535   effect(KILL cr);
 8536   ins_cost(175);
 8537   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8538   ins_encode %{
 8539     __ lzcntq($dst$$Register, $src$$Address);
 8540   %}
 8541   ins_pipe(ialu_reg_mem);
 8542 %}
 8543 
 8544 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8545   predicate(!UseCountLeadingZerosInstruction);
 8546   match(Set dst (CountLeadingZerosL src));
 8547   effect(KILL cr);
 8548 
 8549   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8550             "jnz     skip\n\t"
 8551             "movl    $dst, -1\n"
 8552       "skip:\n\t"
 8553             "negl    $dst\n\t"
 8554             "addl    $dst, 63" %}
 8555   ins_encode %{
 8556     Register Rdst = $dst$$Register;
 8557     Register Rsrc = $src$$Register;
 8558     Label skip;
 8559     __ bsrq(Rdst, Rsrc);
 8560     __ jccb(Assembler::notZero, skip);
 8561     __ movl(Rdst, -1);
 8562     __ bind(skip);
 8563     __ negl(Rdst);
 8564     __ addl(Rdst, BitsPerLong - 1);
 8565   %}
 8566   ins_pipe(ialu_reg);
 8567 %}
 8568 
 8569 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8570   predicate(UseCountTrailingZerosInstruction);
 8571   match(Set dst (CountTrailingZerosI src));
 8572   effect(KILL cr);
 8573 
 8574   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8575   ins_encode %{
 8576     __ tzcntl($dst$$Register, $src$$Register);
 8577   %}
 8578   ins_pipe(ialu_reg);
 8579 %}
 8580 
 8581 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8582   predicate(UseCountTrailingZerosInstruction);
 8583   match(Set dst (CountTrailingZerosI (LoadI src)));
 8584   effect(KILL cr);
 8585   ins_cost(175);
 8586   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8587   ins_encode %{
 8588     __ tzcntl($dst$$Register, $src$$Address);
 8589   %}
 8590   ins_pipe(ialu_reg_mem);
 8591 %}
 8592 
 8593 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8594   predicate(!UseCountTrailingZerosInstruction);
 8595   match(Set dst (CountTrailingZerosI src));
 8596   effect(KILL cr);
 8597 
 8598   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8599             "jnz     done\n\t"
 8600             "movl    $dst, 32\n"
 8601       "done:" %}
 8602   ins_encode %{
 8603     Register Rdst = $dst$$Register;
 8604     Label done;
 8605     __ bsfl(Rdst, $src$$Register);
 8606     __ jccb(Assembler::notZero, done);
 8607     __ movl(Rdst, BitsPerInt);
 8608     __ bind(done);
 8609   %}
 8610   ins_pipe(ialu_reg);
 8611 %}
 8612 
 8613 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8614   predicate(UseCountTrailingZerosInstruction);
 8615   match(Set dst (CountTrailingZerosL src));
 8616   effect(KILL cr);
 8617 
 8618   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8619   ins_encode %{
 8620     __ tzcntq($dst$$Register, $src$$Register);
 8621   %}
 8622   ins_pipe(ialu_reg);
 8623 %}
 8624 
 8625 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8626   predicate(UseCountTrailingZerosInstruction);
 8627   match(Set dst (CountTrailingZerosL (LoadL src)));
 8628   effect(KILL cr);
 8629   ins_cost(175);
 8630   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8631   ins_encode %{
 8632     __ tzcntq($dst$$Register, $src$$Address);
 8633   %}
 8634   ins_pipe(ialu_reg_mem);
 8635 %}
 8636 
 8637 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8638   predicate(!UseCountTrailingZerosInstruction);
 8639   match(Set dst (CountTrailingZerosL src));
 8640   effect(KILL cr);
 8641 
 8642   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8643             "jnz     done\n\t"
 8644             "movl    $dst, 64\n"
 8645       "done:" %}
 8646   ins_encode %{
 8647     Register Rdst = $dst$$Register;
 8648     Label done;
 8649     __ bsfq(Rdst, $src$$Register);
 8650     __ jccb(Assembler::notZero, done);
 8651     __ movl(Rdst, BitsPerLong);
 8652     __ bind(done);
 8653   %}
 8654   ins_pipe(ialu_reg);
 8655 %}
 8656 
 8657 //--------------- Reverse Operation Instructions ----------------
 8658 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8659   predicate(!VM_Version::supports_gfni());
 8660   match(Set dst (ReverseI src));
 8661   effect(TEMP dst, TEMP rtmp, KILL cr);
 8662   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8663   ins_encode %{
 8664     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8665   %}
 8666   ins_pipe( ialu_reg );
 8667 %}
 8668 
 8669 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8670   predicate(VM_Version::supports_gfni());
 8671   match(Set dst (ReverseI src));
 8672   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8673   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8674   ins_encode %{
 8675     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8676   %}
 8677   ins_pipe( ialu_reg );
 8678 %}
 8679 
 8680 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8681   predicate(!VM_Version::supports_gfni());
 8682   match(Set dst (ReverseL src));
 8683   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8684   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8685   ins_encode %{
 8686     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8687   %}
 8688   ins_pipe( ialu_reg );
 8689 %}
 8690 
 8691 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8692   predicate(VM_Version::supports_gfni());
 8693   match(Set dst (ReverseL src));
 8694   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8695   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8696   ins_encode %{
 8697     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8698   %}
 8699   ins_pipe( ialu_reg );
 8700 %}
 8701 
 8702 //---------- Population Count Instructions -------------------------------------
 8703 
 8704 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8705   predicate(UsePopCountInstruction);
 8706   match(Set dst (PopCountI src));
 8707   effect(KILL cr);
 8708 
 8709   format %{ "popcnt  $dst, $src" %}
 8710   ins_encode %{
 8711     __ popcntl($dst$$Register, $src$$Register);
 8712   %}
 8713   ins_pipe(ialu_reg);
 8714 %}
 8715 
 8716 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8717   predicate(UsePopCountInstruction);
 8718   match(Set dst (PopCountI (LoadI mem)));
 8719   effect(KILL cr);
 8720 
 8721   format %{ "popcnt  $dst, $mem" %}
 8722   ins_encode %{
 8723     __ popcntl($dst$$Register, $mem$$Address);
 8724   %}
 8725   ins_pipe(ialu_reg);
 8726 %}
 8727 
 8728 // Note: Long.bitCount(long) returns an int.
 8729 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8730   predicate(UsePopCountInstruction);
 8731   match(Set dst (PopCountL src));
 8732   effect(KILL cr);
 8733 
 8734   format %{ "popcnt  $dst, $src" %}
 8735   ins_encode %{
 8736     __ popcntq($dst$$Register, $src$$Register);
 8737   %}
 8738   ins_pipe(ialu_reg);
 8739 %}
 8740 
 8741 // Note: Long.bitCount(long) returns an int.
 8742 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8743   predicate(UsePopCountInstruction);
 8744   match(Set dst (PopCountL (LoadL mem)));
 8745   effect(KILL cr);
 8746 
 8747   format %{ "popcnt  $dst, $mem" %}
 8748   ins_encode %{
 8749     __ popcntq($dst$$Register, $mem$$Address);
 8750   %}
 8751   ins_pipe(ialu_reg);
 8752 %}
 8753 
 8754 
 8755 //----------MemBar Instructions-----------------------------------------------
 8756 // Memory barrier flavors
 8757 
 8758 instruct membar_acquire()
 8759 %{
 8760   match(MemBarAcquire);
 8761   match(LoadFence);
 8762   ins_cost(0);
 8763 
 8764   size(0);
 8765   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8766   ins_encode();
 8767   ins_pipe(empty);
 8768 %}
 8769 
 8770 instruct membar_acquire_lock()
 8771 %{
 8772   match(MemBarAcquireLock);
 8773   ins_cost(0);
 8774 
 8775   size(0);
 8776   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8777   ins_encode();
 8778   ins_pipe(empty);
 8779 %}
 8780 
 8781 instruct membar_release()
 8782 %{
 8783   match(MemBarRelease);
 8784   match(StoreFence);
 8785   ins_cost(0);
 8786 
 8787   size(0);
 8788   format %{ "MEMBAR-release ! (empty encoding)" %}
 8789   ins_encode();
 8790   ins_pipe(empty);
 8791 %}
 8792 
 8793 instruct membar_release_lock()
 8794 %{
 8795   match(MemBarReleaseLock);
 8796   ins_cost(0);
 8797 
 8798   size(0);
 8799   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8800   ins_encode();
 8801   ins_pipe(empty);
 8802 %}
 8803 
 8804 instruct membar_volatile(rFlagsReg cr) %{
 8805   match(MemBarVolatile);
 8806   effect(KILL cr);
 8807   ins_cost(400);
 8808 
 8809   format %{
 8810     $$template
 8811     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8812   %}
 8813   ins_encode %{
 8814     __ membar(Assembler::StoreLoad);
 8815   %}
 8816   ins_pipe(pipe_slow);
 8817 %}
 8818 
 8819 instruct unnecessary_membar_volatile()
 8820 %{
 8821   match(MemBarVolatile);
 8822   predicate(Matcher::post_store_load_barrier(n));
 8823   ins_cost(0);
 8824 
 8825   size(0);
 8826   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8827   ins_encode();
 8828   ins_pipe(empty);
 8829 %}
 8830 
 8831 instruct membar_storestore() %{
 8832   match(MemBarStoreStore);
 8833   match(StoreStoreFence);
 8834   ins_cost(0);
 8835 
 8836   size(0);
 8837   format %{ "MEMBAR-storestore (empty encoding)" %}
 8838   ins_encode( );
 8839   ins_pipe(empty);
 8840 %}
 8841 
 8842 //----------Move Instructions--------------------------------------------------
 8843 
 8844 instruct castX2P(rRegP dst, rRegL src)
 8845 %{
 8846   match(Set dst (CastX2P src));
 8847 
 8848   format %{ "movq    $dst, $src\t# long->ptr" %}
 8849   ins_encode %{
 8850     if ($dst$$reg != $src$$reg) {
 8851       __ movptr($dst$$Register, $src$$Register);
 8852     }
 8853   %}
 8854   ins_pipe(ialu_reg_reg); // XXX
 8855 %}
 8856 
 8857 instruct castI2N(rRegN dst, rRegI src)
 8858 %{
 8859   match(Set dst (CastI2N src));
 8860 
 8861   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8862   ins_encode %{
 8863     if ($dst$$reg != $src$$reg) {
 8864       __ movl($dst$$Register, $src$$Register);
 8865     }
 8866   %}
 8867   ins_pipe(ialu_reg_reg); // XXX
 8868 %}
 8869 
 8870 instruct castN2X(rRegL dst, rRegN src)
 8871 %{
 8872   match(Set dst (CastP2X src));
 8873 
 8874   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8875   ins_encode %{
 8876     if ($dst$$reg != $src$$reg) {
 8877       __ movptr($dst$$Register, $src$$Register);
 8878     }
 8879   %}
 8880   ins_pipe(ialu_reg_reg); // XXX
 8881 %}
 8882 
 8883 instruct castP2X(rRegL dst, rRegP src)
 8884 %{
 8885   match(Set dst (CastP2X src));
 8886 
 8887   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8888   ins_encode %{
 8889     if ($dst$$reg != $src$$reg) {
 8890       __ movptr($dst$$Register, $src$$Register);
 8891     }
 8892   %}
 8893   ins_pipe(ialu_reg_reg); // XXX
 8894 %}
 8895 
 8896 // Convert oop into int for vectors alignment masking
 8897 instruct convP2I(rRegI dst, rRegP src)
 8898 %{
 8899   match(Set dst (ConvL2I (CastP2X src)));
 8900 
 8901   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8902   ins_encode %{
 8903     __ movl($dst$$Register, $src$$Register);
 8904   %}
 8905   ins_pipe(ialu_reg_reg); // XXX
 8906 %}
 8907 
 8908 // Convert compressed oop into int for vectors alignment masking
 8909 // in case of 32bit oops (heap < 4Gb).
 8910 instruct convN2I(rRegI dst, rRegN src)
 8911 %{
 8912   predicate(CompressedOops::shift() == 0);
 8913   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8914 
 8915   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8916   ins_encode %{
 8917     __ movl($dst$$Register, $src$$Register);
 8918   %}
 8919   ins_pipe(ialu_reg_reg); // XXX
 8920 %}
 8921 
 8922 // Convert oop pointer into compressed form
 8923 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8924   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8925   match(Set dst (EncodeP src));
 8926   effect(KILL cr);
 8927   format %{ "encode_heap_oop $dst,$src" %}
 8928   ins_encode %{
 8929     Register s = $src$$Register;
 8930     Register d = $dst$$Register;
 8931     if (s != d) {
 8932       __ movq(d, s);
 8933     }
 8934     __ encode_heap_oop(d);
 8935   %}
 8936   ins_pipe(ialu_reg_long);
 8937 %}
 8938 
 8939 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8940   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8941   match(Set dst (EncodeP src));
 8942   effect(KILL cr);
 8943   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8944   ins_encode %{
 8945     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8946   %}
 8947   ins_pipe(ialu_reg_long);
 8948 %}
 8949 
 8950 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8951   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8952             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8953   match(Set dst (DecodeN src));
 8954   effect(KILL cr);
 8955   format %{ "decode_heap_oop $dst,$src" %}
 8956   ins_encode %{
 8957     Register s = $src$$Register;
 8958     Register d = $dst$$Register;
 8959     if (s != d) {
 8960       __ movq(d, s);
 8961     }
 8962     __ decode_heap_oop(d);
 8963   %}
 8964   ins_pipe(ialu_reg_long);
 8965 %}
 8966 
 8967 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8968   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8969             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8970   match(Set dst (DecodeN src));
 8971   effect(KILL cr);
 8972   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8973   ins_encode %{
 8974     Register s = $src$$Register;
 8975     Register d = $dst$$Register;
 8976     if (s != d) {
 8977       __ decode_heap_oop_not_null(d, s);
 8978     } else {
 8979       __ decode_heap_oop_not_null(d);
 8980     }
 8981   %}
 8982   ins_pipe(ialu_reg_long);
 8983 %}
 8984 
 8985 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8986   match(Set dst (EncodePKlass src));
 8987   effect(TEMP dst, KILL cr);
 8988   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8989   ins_encode %{
 8990     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8991   %}
 8992   ins_pipe(ialu_reg_long);
 8993 %}
 8994 
 8995 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8996   match(Set dst (DecodeNKlass src));
 8997   effect(TEMP dst, KILL cr);
 8998   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8999   ins_encode %{
 9000     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9001   %}
 9002   ins_pipe(ialu_reg_long);
 9003 %}
 9004 
 9005 //----------Conditional Move---------------------------------------------------
 9006 // Jump
 9007 // dummy instruction for generating temp registers
 9008 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9009   match(Jump (LShiftL switch_val shift));
 9010   ins_cost(350);
 9011   predicate(false);
 9012   effect(TEMP dest);
 9013 
 9014   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9015             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9016   ins_encode %{
 9017     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9018     // to do that and the compiler is using that register as one it can allocate.
 9019     // So we build it all by hand.
 9020     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9021     // ArrayAddress dispatch(table, index);
 9022     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9023     __ lea($dest$$Register, $constantaddress);
 9024     __ jmp(dispatch);
 9025   %}
 9026   ins_pipe(pipe_jmp);
 9027 %}
 9028 
 9029 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9030   match(Jump (AddL (LShiftL switch_val shift) offset));
 9031   ins_cost(350);
 9032   effect(TEMP dest);
 9033 
 9034   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9035             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9036   ins_encode %{
 9037     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9038     // to do that and the compiler is using that register as one it can allocate.
 9039     // So we build it all by hand.
 9040     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9041     // ArrayAddress dispatch(table, index);
 9042     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9043     __ lea($dest$$Register, $constantaddress);
 9044     __ jmp(dispatch);
 9045   %}
 9046   ins_pipe(pipe_jmp);
 9047 %}
 9048 
 9049 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9050   match(Jump switch_val);
 9051   ins_cost(350);
 9052   effect(TEMP dest);
 9053 
 9054   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9055             "jmp     [$dest + $switch_val]\n\t" %}
 9056   ins_encode %{
 9057     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9058     // to do that and the compiler is using that register as one it can allocate.
 9059     // So we build it all by hand.
 9060     // Address index(noreg, switch_reg, Address::times_1);
 9061     // ArrayAddress dispatch(table, index);
 9062     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9063     __ lea($dest$$Register, $constantaddress);
 9064     __ jmp(dispatch);
 9065   %}
 9066   ins_pipe(pipe_jmp);
 9067 %}
 9068 
 9069 // Conditional move
 9070 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9071 %{
 9072   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9073   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9074 
 9075   ins_cost(100); // XXX
 9076   format %{ "setbn$cop $dst\t# signed, int" %}
 9077   ins_encode %{
 9078     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9079     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9080   %}
 9081   ins_pipe(ialu_reg);
 9082 %}
 9083 
 9084 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9085 %{
 9086   predicate(!UseAPX);
 9087   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9088 
 9089   ins_cost(200); // XXX
 9090   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9091   ins_encode %{
 9092     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9093   %}
 9094   ins_pipe(pipe_cmov_reg);
 9095 %}
 9096 
 9097 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9098 %{
 9099   predicate(UseAPX);
 9100   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9101 
 9102   ins_cost(200);
 9103   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9104   ins_encode %{
 9105     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9106   %}
 9107   ins_pipe(pipe_cmov_reg);
 9108 %}
 9109 
 9110 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9111 %{
 9112   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9113   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9114 
 9115   ins_cost(100); // XXX
 9116   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9117   ins_encode %{
 9118     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9119     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9120   %}
 9121   ins_pipe(ialu_reg);
 9122 %}
 9123 
 9124 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9125   predicate(!UseAPX);
 9126   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9127 
 9128   ins_cost(200); // XXX
 9129   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9130   ins_encode %{
 9131     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9132   %}
 9133   ins_pipe(pipe_cmov_reg);
 9134 %}
 9135 
 9136 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9137   predicate(UseAPX);
 9138   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9139 
 9140   ins_cost(200);
 9141   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9142   ins_encode %{
 9143     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9144   %}
 9145   ins_pipe(pipe_cmov_reg);
 9146 %}
 9147 
 9148 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9149 %{
 9150   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9151   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9152 
 9153   ins_cost(100); // XXX
 9154   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9155   ins_encode %{
 9156     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9157     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9158   %}
 9159   ins_pipe(ialu_reg);
 9160 %}
 9161 
 9162 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9163   predicate(!UseAPX);
 9164   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9165   ins_cost(200);
 9166   expand %{
 9167     cmovI_regU(cop, cr, dst, src);
 9168   %}
 9169 %}
 9170 
 9171 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9172   predicate(UseAPX);
 9173   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9174   ins_cost(200);
 9175   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9176   ins_encode %{
 9177     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9178   %}
 9179   ins_pipe(pipe_cmov_reg);
 9180 %}
 9181 
 9182 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9183   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9184   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9185 
 9186   ins_cost(200); // XXX
 9187   format %{ "cmovpl  $dst, $src\n\t"
 9188             "cmovnel $dst, $src" %}
 9189   ins_encode %{
 9190     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9191     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9192   %}
 9193   ins_pipe(pipe_cmov_reg);
 9194 %}
 9195 
 9196 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9197   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9198   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9199   effect(TEMP dst);
 9200 
 9201   ins_cost(200);
 9202   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9203             "cmovnel  $dst, $src2" %}
 9204   ins_encode %{
 9205     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9206     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9207   %}
 9208   ins_pipe(pipe_cmov_reg);
 9209 %}
 9210 
 9211 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9212 // inputs of the CMove
 9213 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9214   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9215   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9216   effect(TEMP dst);
 9217 
 9218   ins_cost(200); // XXX
 9219   format %{ "cmovpl  $dst, $src\n\t"
 9220             "cmovnel $dst, $src" %}
 9221   ins_encode %{
 9222     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9223     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9224   %}
 9225   ins_pipe(pipe_cmov_reg);
 9226 %}
 9227 
 9228 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9229 // and parity flag bit is set if any of the operand is a NaN.
 9230 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9231   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9233   effect(TEMP dst);
 9234 
 9235   ins_cost(200);
 9236   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9237             "cmovnel  $dst, $src2" %}
 9238   ins_encode %{
 9239     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9240     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9241   %}
 9242   ins_pipe(pipe_cmov_reg);
 9243 %}
 9244 
 9245 // Conditional move
 9246 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9247   predicate(!UseAPX);
 9248   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9249 
 9250   ins_cost(250); // XXX
 9251   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9252   ins_encode %{
 9253     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9254   %}
 9255   ins_pipe(pipe_cmov_mem);
 9256 %}
 9257 
 9258 // Conditional move
 9259 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9260 %{
 9261   predicate(UseAPX);
 9262   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9263 
 9264   ins_cost(250);
 9265   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9266   ins_encode %{
 9267     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9268   %}
 9269   ins_pipe(pipe_cmov_mem);
 9270 %}
 9271 
 9272 // Conditional move
 9273 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9274 %{
 9275   predicate(!UseAPX);
 9276   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9277 
 9278   ins_cost(250); // XXX
 9279   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9280   ins_encode %{
 9281     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9282   %}
 9283   ins_pipe(pipe_cmov_mem);
 9284 %}
 9285 
 9286 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9287   predicate(!UseAPX);
 9288   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9289   ins_cost(250);
 9290   expand %{
 9291     cmovI_memU(cop, cr, dst, src);
 9292   %}
 9293 %}
 9294 
 9295 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9296 %{
 9297   predicate(UseAPX);
 9298   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9299 
 9300   ins_cost(250);
 9301   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9302   ins_encode %{
 9303     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9304   %}
 9305   ins_pipe(pipe_cmov_mem);
 9306 %}
 9307 
 9308 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9309 %{
 9310   predicate(UseAPX);
 9311   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9312   ins_cost(250);
 9313   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9314   ins_encode %{
 9315     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9316   %}
 9317   ins_pipe(pipe_cmov_mem);
 9318 %}
 9319 
 9320 // Conditional move
 9321 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9322 %{
 9323   predicate(!UseAPX);
 9324   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9325 
 9326   ins_cost(200); // XXX
 9327   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9328   ins_encode %{
 9329     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9330   %}
 9331   ins_pipe(pipe_cmov_reg);
 9332 %}
 9333 
 9334 // Conditional move ndd
 9335 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9336 %{
 9337   predicate(UseAPX);
 9338   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9339 
 9340   ins_cost(200);
 9341   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9342   ins_encode %{
 9343     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9344   %}
 9345   ins_pipe(pipe_cmov_reg);
 9346 %}
 9347 
 9348 // Conditional move
 9349 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9350 %{
 9351   predicate(!UseAPX);
 9352   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9353 
 9354   ins_cost(200); // XXX
 9355   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9356   ins_encode %{
 9357     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9358   %}
 9359   ins_pipe(pipe_cmov_reg);
 9360 %}
 9361 
 9362 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9363   predicate(!UseAPX);
 9364   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9365   ins_cost(200);
 9366   expand %{
 9367     cmovN_regU(cop, cr, dst, src);
 9368   %}
 9369 %}
 9370 
 9371 // Conditional move ndd
 9372 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9373 %{
 9374   predicate(UseAPX);
 9375   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9376 
 9377   ins_cost(200);
 9378   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9379   ins_encode %{
 9380     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9381   %}
 9382   ins_pipe(pipe_cmov_reg);
 9383 %}
 9384 
 9385 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9386   predicate(UseAPX);
 9387   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9388   ins_cost(200);
 9389   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9390   ins_encode %{
 9391     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9392   %}
 9393   ins_pipe(pipe_cmov_reg);
 9394 %}
 9395 
 9396 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9397   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9398   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9399 
 9400   ins_cost(200); // XXX
 9401   format %{ "cmovpl  $dst, $src\n\t"
 9402             "cmovnel $dst, $src" %}
 9403   ins_encode %{
 9404     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9405     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9406   %}
 9407   ins_pipe(pipe_cmov_reg);
 9408 %}
 9409 
 9410 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9411 // inputs of the CMove
 9412 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9413   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9414   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9415 
 9416   ins_cost(200); // XXX
 9417   format %{ "cmovpl  $dst, $src\n\t"
 9418             "cmovnel $dst, $src" %}
 9419   ins_encode %{
 9420     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9421     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9422   %}
 9423   ins_pipe(pipe_cmov_reg);
 9424 %}
 9425 
 9426 // Conditional move
 9427 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9428 %{
 9429   predicate(!UseAPX);
 9430   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9431 
 9432   ins_cost(200); // XXX
 9433   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9434   ins_encode %{
 9435     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9436   %}
 9437   ins_pipe(pipe_cmov_reg);  // XXX
 9438 %}
 9439 
 9440 // Conditional move ndd
 9441 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9442 %{
 9443   predicate(UseAPX);
 9444   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9445 
 9446   ins_cost(200);
 9447   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9448   ins_encode %{
 9449     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9450   %}
 9451   ins_pipe(pipe_cmov_reg);
 9452 %}
 9453 
 9454 // Conditional move
 9455 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9456 %{
 9457   predicate(!UseAPX);
 9458   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9459 
 9460   ins_cost(200); // XXX
 9461   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9462   ins_encode %{
 9463     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9464   %}
 9465   ins_pipe(pipe_cmov_reg); // XXX
 9466 %}
 9467 
 9468 // Conditional move ndd
 9469 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9470 %{
 9471   predicate(UseAPX);
 9472   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9473 
 9474   ins_cost(200);
 9475   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9476   ins_encode %{
 9477     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9478   %}
 9479   ins_pipe(pipe_cmov_reg);
 9480 %}
 9481 
 9482 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9483   predicate(!UseAPX);
 9484   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9485   ins_cost(200);
 9486   expand %{
 9487     cmovP_regU(cop, cr, dst, src);
 9488   %}
 9489 %}
 9490 
 9491 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9492   predicate(UseAPX);
 9493   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9494   ins_cost(200);
 9495   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9496   ins_encode %{
 9497     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9498   %}
 9499   ins_pipe(pipe_cmov_reg);
 9500 %}
 9501 
 9502 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9503   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9504   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9505 
 9506   ins_cost(200); // XXX
 9507   format %{ "cmovpq  $dst, $src\n\t"
 9508             "cmovneq $dst, $src" %}
 9509   ins_encode %{
 9510     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9511     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9512   %}
 9513   ins_pipe(pipe_cmov_reg);
 9514 %}
 9515 
 9516 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9517   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9518   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9519   effect(TEMP dst);
 9520 
 9521   ins_cost(200);
 9522   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9523             "cmovneq  $dst, $src2" %}
 9524   ins_encode %{
 9525     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9526     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9527   %}
 9528   ins_pipe(pipe_cmov_reg);
 9529 %}
 9530 
 9531 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9532 // inputs of the CMove
 9533 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9534   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9535   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9536 
 9537   ins_cost(200); // XXX
 9538   format %{ "cmovpq  $dst, $src\n\t"
 9539             "cmovneq $dst, $src" %}
 9540   ins_encode %{
 9541     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9542     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9543   %}
 9544   ins_pipe(pipe_cmov_reg);
 9545 %}
 9546 
 9547 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9548   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9549   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9550   effect(TEMP dst);
 9551 
 9552   ins_cost(200);
 9553   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9554             "cmovneq  $dst, $src2" %}
 9555   ins_encode %{
 9556     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9557     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9558   %}
 9559   ins_pipe(pipe_cmov_reg);
 9560 %}
 9561 
 9562 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9563 %{
 9564   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9565   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9566 
 9567   ins_cost(100); // XXX
 9568   format %{ "setbn$cop $dst\t# signed, long" %}
 9569   ins_encode %{
 9570     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9571     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9572   %}
 9573   ins_pipe(ialu_reg);
 9574 %}
 9575 
 9576 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9577 %{
 9578   predicate(!UseAPX);
 9579   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9580 
 9581   ins_cost(200); // XXX
 9582   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9583   ins_encode %{
 9584     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9585   %}
 9586   ins_pipe(pipe_cmov_reg);  // XXX
 9587 %}
 9588 
 9589 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9590 %{
 9591   predicate(UseAPX);
 9592   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9593 
 9594   ins_cost(200);
 9595   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9596   ins_encode %{
 9597     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9598   %}
 9599   ins_pipe(pipe_cmov_reg);
 9600 %}
 9601 
 9602 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9603 %{
 9604   predicate(!UseAPX);
 9605   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9606 
 9607   ins_cost(200); // XXX
 9608   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9609   ins_encode %{
 9610     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9611   %}
 9612   ins_pipe(pipe_cmov_mem);  // XXX
 9613 %}
 9614 
 9615 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9616 %{
 9617   predicate(UseAPX);
 9618   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9619 
 9620   ins_cost(200);
 9621   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9622   ins_encode %{
 9623     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9624   %}
 9625   ins_pipe(pipe_cmov_mem);
 9626 %}
 9627 
 9628 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9629 %{
 9630   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9631   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9632 
 9633   ins_cost(100); // XXX
 9634   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9635   ins_encode %{
 9636     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9637     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9638   %}
 9639   ins_pipe(ialu_reg);
 9640 %}
 9641 
 9642 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9643 %{
 9644   predicate(!UseAPX);
 9645   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9646 
 9647   ins_cost(200); // XXX
 9648   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9649   ins_encode %{
 9650     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9651   %}
 9652   ins_pipe(pipe_cmov_reg); // XXX
 9653 %}
 9654 
 9655 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9656 %{
 9657   predicate(UseAPX);
 9658   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9659 
 9660   ins_cost(200);
 9661   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9662   ins_encode %{
 9663     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9664   %}
 9665   ins_pipe(pipe_cmov_reg);
 9666 %}
 9667 
 9668 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9669 %{
 9670   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9671   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9672 
 9673   ins_cost(100); // XXX
 9674   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9675   ins_encode %{
 9676     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9677     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9678   %}
 9679   ins_pipe(ialu_reg);
 9680 %}
 9681 
 9682 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9683   predicate(!UseAPX);
 9684   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9685   ins_cost(200);
 9686   expand %{
 9687     cmovL_regU(cop, cr, dst, src);
 9688   %}
 9689 %}
 9690 
 9691 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9692 %{
 9693   predicate(UseAPX);
 9694   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9695   ins_cost(200);
 9696   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9697   ins_encode %{
 9698     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9699   %}
 9700   ins_pipe(pipe_cmov_reg);
 9701 %}
 9702 
 9703 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9704   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9705   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9706 
 9707   ins_cost(200); // XXX
 9708   format %{ "cmovpq  $dst, $src\n\t"
 9709             "cmovneq $dst, $src" %}
 9710   ins_encode %{
 9711     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9712     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9713   %}
 9714   ins_pipe(pipe_cmov_reg);
 9715 %}
 9716 
 9717 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9718   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9719   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9720   effect(TEMP dst);
 9721 
 9722   ins_cost(200);
 9723   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9724             "cmovneq  $dst, $src2" %}
 9725   ins_encode %{
 9726     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9727     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9728   %}
 9729   ins_pipe(pipe_cmov_reg);
 9730 %}
 9731 
 9732 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9733 // inputs of the CMove
 9734 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9735   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9736   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9737 
 9738   ins_cost(200); // XXX
 9739   format %{ "cmovpq  $dst, $src\n\t"
 9740             "cmovneq $dst, $src" %}
 9741   ins_encode %{
 9742     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9743     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9744   %}
 9745   ins_pipe(pipe_cmov_reg);
 9746 %}
 9747 
 9748 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9749   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9750   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9751   effect(TEMP dst);
 9752 
 9753   ins_cost(200);
 9754   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9755             "cmovneq $dst, $src2" %}
 9756   ins_encode %{
 9757     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9758     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9759   %}
 9760   ins_pipe(pipe_cmov_reg);
 9761 %}
 9762 
 9763 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9764 %{
 9765   predicate(!UseAPX);
 9766   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9767 
 9768   ins_cost(200); // XXX
 9769   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9770   ins_encode %{
 9771     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9772   %}
 9773   ins_pipe(pipe_cmov_mem); // XXX
 9774 %}
 9775 
 9776 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9777   predicate(!UseAPX);
 9778   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9779   ins_cost(200);
 9780   expand %{
 9781     cmovL_memU(cop, cr, dst, src);
 9782   %}
 9783 %}
 9784 
 9785 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9786 %{
 9787   predicate(UseAPX);
 9788   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9789 
 9790   ins_cost(200);
 9791   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9792   ins_encode %{
 9793     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9794   %}
 9795   ins_pipe(pipe_cmov_mem);
 9796 %}
 9797 
 9798 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9799 %{
 9800   predicate(UseAPX);
 9801   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9802   ins_cost(200);
 9803   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9804   ins_encode %{
 9805     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9806   %}
 9807   ins_pipe(pipe_cmov_mem);
 9808 %}
 9809 
 9810 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9811 %{
 9812   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9813 
 9814   ins_cost(200); // XXX
 9815   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9816             "movss     $dst, $src\n"
 9817     "skip:" %}
 9818   ins_encode %{
 9819     Label Lskip;
 9820     // Invert sense of branch from sense of CMOV
 9821     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9822     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9823     __ bind(Lskip);
 9824   %}
 9825   ins_pipe(pipe_slow);
 9826 %}
 9827 
 9828 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9829 %{
 9830   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9831 
 9832   ins_cost(200); // XXX
 9833   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9834             "movss     $dst, $src\n"
 9835     "skip:" %}
 9836   ins_encode %{
 9837     Label Lskip;
 9838     // Invert sense of branch from sense of CMOV
 9839     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9840     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9841     __ bind(Lskip);
 9842   %}
 9843   ins_pipe(pipe_slow);
 9844 %}
 9845 
 9846 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9847   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9848   ins_cost(200);
 9849   expand %{
 9850     cmovF_regU(cop, cr, dst, src);
 9851   %}
 9852 %}
 9853 
 9854 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9855 %{
 9856   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9857 
 9858   ins_cost(200); // XXX
 9859   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9860             "movsd     $dst, $src\n"
 9861     "skip:" %}
 9862   ins_encode %{
 9863     Label Lskip;
 9864     // Invert sense of branch from sense of CMOV
 9865     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9866     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9867     __ bind(Lskip);
 9868   %}
 9869   ins_pipe(pipe_slow);
 9870 %}
 9871 
 9872 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9873 %{
 9874   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9875 
 9876   ins_cost(200); // XXX
 9877   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9878             "movsd     $dst, $src\n"
 9879     "skip:" %}
 9880   ins_encode %{
 9881     Label Lskip;
 9882     // Invert sense of branch from sense of CMOV
 9883     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9884     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9885     __ bind(Lskip);
 9886   %}
 9887   ins_pipe(pipe_slow);
 9888 %}
 9889 
 9890 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9891   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9892   ins_cost(200);
 9893   expand %{
 9894     cmovD_regU(cop, cr, dst, src);
 9895   %}
 9896 %}
 9897 
 9898 //----------Arithmetic Instructions--------------------------------------------
 9899 //----------Addition Instructions----------------------------------------------
 9900 
 9901 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9902 %{
 9903   predicate(!UseAPX);
 9904   match(Set dst (AddI dst src));
 9905   effect(KILL cr);
 9906   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9907   format %{ "addl    $dst, $src\t# int" %}
 9908   ins_encode %{
 9909     __ addl($dst$$Register, $src$$Register);
 9910   %}
 9911   ins_pipe(ialu_reg_reg);
 9912 %}
 9913 
 9914 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9915 %{
 9916   predicate(UseAPX);
 9917   match(Set dst (AddI src1 src2));
 9918   effect(KILL cr);
 9919   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9920 
 9921   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9922   ins_encode %{
 9923     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9924   %}
 9925   ins_pipe(ialu_reg_reg);
 9926 %}
 9927 
 9928 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9929 %{
 9930   predicate(!UseAPX);
 9931   match(Set dst (AddI dst src));
 9932   effect(KILL cr);
 9933   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9934 
 9935   format %{ "addl    $dst, $src\t# int" %}
 9936   ins_encode %{
 9937     __ addl($dst$$Register, $src$$constant);
 9938   %}
 9939   ins_pipe( ialu_reg );
 9940 %}
 9941 
 9942 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9943 %{
 9944   predicate(UseAPX);
 9945   match(Set dst (AddI src1 src2));
 9946   effect(KILL cr);
 9947   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9948 
 9949   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9950   ins_encode %{
 9951     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9952   %}
 9953   ins_pipe( ialu_reg );
 9954 %}
 9955 
 9956 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9957 %{
 9958   predicate(UseAPX);
 9959   match(Set dst (AddI (LoadI src1) src2));
 9960   effect(KILL cr);
 9961   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9962 
 9963   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9964   ins_encode %{
 9965     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9966   %}
 9967   ins_pipe( ialu_reg );
 9968 %}
 9969 
 9970 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9971 %{
 9972   predicate(!UseAPX);
 9973   match(Set dst (AddI dst (LoadI src)));
 9974   effect(KILL cr);
 9975   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9976 
 9977   ins_cost(150); // XXX
 9978   format %{ "addl    $dst, $src\t# int" %}
 9979   ins_encode %{
 9980     __ addl($dst$$Register, $src$$Address);
 9981   %}
 9982   ins_pipe(ialu_reg_mem);
 9983 %}
 9984 
 9985 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9986 %{
 9987   predicate(UseAPX);
 9988   match(Set dst (AddI src1 (LoadI src2)));
 9989   effect(KILL cr);
 9990   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9991 
 9992   ins_cost(150);
 9993   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9994   ins_encode %{
 9995     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9996   %}
 9997   ins_pipe(ialu_reg_mem);
 9998 %}
 9999 
10000 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10001 %{
10002   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10003   effect(KILL cr);
10004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005 
10006   ins_cost(150); // XXX
10007   format %{ "addl    $dst, $src\t# int" %}
10008   ins_encode %{
10009     __ addl($dst$$Address, $src$$Register);
10010   %}
10011   ins_pipe(ialu_mem_reg);
10012 %}
10013 
10014 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10015 %{
10016   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10017   effect(KILL cr);
10018   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019 
10020 
10021   ins_cost(125); // XXX
10022   format %{ "addl    $dst, $src\t# int" %}
10023   ins_encode %{
10024     __ addl($dst$$Address, $src$$constant);
10025   %}
10026   ins_pipe(ialu_mem_imm);
10027 %}
10028 
10029 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10030 %{
10031   predicate(!UseAPX && UseIncDec);
10032   match(Set dst (AddI dst src));
10033   effect(KILL cr);
10034 
10035   format %{ "incl    $dst\t# int" %}
10036   ins_encode %{
10037     __ incrementl($dst$$Register);
10038   %}
10039   ins_pipe(ialu_reg);
10040 %}
10041 
10042 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10043 %{
10044   predicate(UseAPX && UseIncDec);
10045   match(Set dst (AddI src val));
10046   effect(KILL cr);
10047 
10048   format %{ "eincl    $dst, $src\t# int ndd" %}
10049   ins_encode %{
10050     __ eincl($dst$$Register, $src$$Register, false);
10051   %}
10052   ins_pipe(ialu_reg);
10053 %}
10054 
10055 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10056 %{
10057   predicate(UseAPX && UseIncDec);
10058   match(Set dst (AddI (LoadI src) val));
10059   effect(KILL cr);
10060 
10061   format %{ "eincl    $dst, $src\t# int ndd" %}
10062   ins_encode %{
10063     __ eincl($dst$$Register, $src$$Address, false);
10064   %}
10065   ins_pipe(ialu_reg);
10066 %}
10067 
10068 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10069 %{
10070   predicate(UseIncDec);
10071   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10072   effect(KILL cr);
10073 
10074   ins_cost(125); // XXX
10075   format %{ "incl    $dst\t# int" %}
10076   ins_encode %{
10077     __ incrementl($dst$$Address);
10078   %}
10079   ins_pipe(ialu_mem_imm);
10080 %}
10081 
10082 // XXX why does that use AddI
10083 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10084 %{
10085   predicate(!UseAPX && UseIncDec);
10086   match(Set dst (AddI dst src));
10087   effect(KILL cr);
10088 
10089   format %{ "decl    $dst\t# int" %}
10090   ins_encode %{
10091     __ decrementl($dst$$Register);
10092   %}
10093   ins_pipe(ialu_reg);
10094 %}
10095 
10096 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10097 %{
10098   predicate(UseAPX && UseIncDec);
10099   match(Set dst (AddI src val));
10100   effect(KILL cr);
10101 
10102   format %{ "edecl    $dst, $src\t# int ndd" %}
10103   ins_encode %{
10104     __ edecl($dst$$Register, $src$$Register, false);
10105   %}
10106   ins_pipe(ialu_reg);
10107 %}
10108 
10109 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10110 %{
10111   predicate(UseAPX && UseIncDec);
10112   match(Set dst (AddI (LoadI src) val));
10113   effect(KILL cr);
10114 
10115   format %{ "edecl    $dst, $src\t# int ndd" %}
10116   ins_encode %{
10117     __ edecl($dst$$Register, $src$$Address, false);
10118   %}
10119   ins_pipe(ialu_reg);
10120 %}
10121 
10122 // XXX why does that use AddI
10123 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10124 %{
10125   predicate(UseIncDec);
10126   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10127   effect(KILL cr);
10128 
10129   ins_cost(125); // XXX
10130   format %{ "decl    $dst\t# int" %}
10131   ins_encode %{
10132     __ decrementl($dst$$Address);
10133   %}
10134   ins_pipe(ialu_mem_imm);
10135 %}
10136 
10137 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10138 %{
10139   predicate(VM_Version::supports_fast_2op_lea());
10140   match(Set dst (AddI (LShiftI index scale) disp));
10141 
10142   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10143   ins_encode %{
10144     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10145     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10146   %}
10147   ins_pipe(ialu_reg_reg);
10148 %}
10149 
10150 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10151 %{
10152   predicate(VM_Version::supports_fast_3op_lea());
10153   match(Set dst (AddI (AddI base index) disp));
10154 
10155   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10156   ins_encode %{
10157     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10158   %}
10159   ins_pipe(ialu_reg_reg);
10160 %}
10161 
10162 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10163 %{
10164   predicate(VM_Version::supports_fast_2op_lea());
10165   match(Set dst (AddI base (LShiftI index scale)));
10166 
10167   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10168   ins_encode %{
10169     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10170     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10171   %}
10172   ins_pipe(ialu_reg_reg);
10173 %}
10174 
10175 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10176 %{
10177   predicate(VM_Version::supports_fast_3op_lea());
10178   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10179 
10180   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10181   ins_encode %{
10182     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10183     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10184   %}
10185   ins_pipe(ialu_reg_reg);
10186 %}
10187 
10188 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10189 %{
10190   predicate(!UseAPX);
10191   match(Set dst (AddL dst src));
10192   effect(KILL cr);
10193   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10194 
10195   format %{ "addq    $dst, $src\t# long" %}
10196   ins_encode %{
10197     __ addq($dst$$Register, $src$$Register);
10198   %}
10199   ins_pipe(ialu_reg_reg);
10200 %}
10201 
10202 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10203 %{
10204   predicate(UseAPX);
10205   match(Set dst (AddL src1 src2));
10206   effect(KILL cr);
10207   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10208 
10209   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10210   ins_encode %{
10211     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10212   %}
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10217 %{
10218   predicate(!UseAPX);
10219   match(Set dst (AddL dst src));
10220   effect(KILL cr);
10221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222 
10223   format %{ "addq    $dst, $src\t# long" %}
10224   ins_encode %{
10225     __ addq($dst$$Register, $src$$constant);
10226   %}
10227   ins_pipe( ialu_reg );
10228 %}
10229 
10230 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10231 %{
10232   predicate(UseAPX);
10233   match(Set dst (AddL src1 src2));
10234   effect(KILL cr);
10235   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10236 
10237   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10238   ins_encode %{
10239     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10240   %}
10241   ins_pipe( ialu_reg );
10242 %}
10243 
10244 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10245 %{
10246   predicate(UseAPX);
10247   match(Set dst (AddL (LoadL src1) src2));
10248   effect(KILL cr);
10249   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250 
10251   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10252   ins_encode %{
10253     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10254   %}
10255   ins_pipe( ialu_reg );
10256 %}
10257 
10258 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10259 %{
10260   predicate(!UseAPX);
10261   match(Set dst (AddL dst (LoadL src)));
10262   effect(KILL cr);
10263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10264 
10265   ins_cost(150); // XXX
10266   format %{ "addq    $dst, $src\t# long" %}
10267   ins_encode %{
10268     __ addq($dst$$Register, $src$$Address);
10269   %}
10270   ins_pipe(ialu_reg_mem);
10271 %}
10272 
10273 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10274 %{
10275   predicate(UseAPX);
10276   match(Set dst (AddL src1 (LoadL src2)));
10277   effect(KILL cr);
10278   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10279 
10280   ins_cost(150);
10281   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10282   ins_encode %{
10283     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10284   %}
10285   ins_pipe(ialu_reg_mem);
10286 %}
10287 
10288 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10289 %{
10290   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10291   effect(KILL cr);
10292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10293 
10294   ins_cost(150); // XXX
10295   format %{ "addq    $dst, $src\t# long" %}
10296   ins_encode %{
10297     __ addq($dst$$Address, $src$$Register);
10298   %}
10299   ins_pipe(ialu_mem_reg);
10300 %}
10301 
10302 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10303 %{
10304   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10305   effect(KILL cr);
10306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10307 
10308   ins_cost(125); // XXX
10309   format %{ "addq    $dst, $src\t# long" %}
10310   ins_encode %{
10311     __ addq($dst$$Address, $src$$constant);
10312   %}
10313   ins_pipe(ialu_mem_imm);
10314 %}
10315 
10316 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10317 %{
10318   predicate(!UseAPX && UseIncDec);
10319   match(Set dst (AddL dst src));
10320   effect(KILL cr);
10321 
10322   format %{ "incq    $dst\t# long" %}
10323   ins_encode %{
10324     __ incrementq($dst$$Register);
10325   %}
10326   ins_pipe(ialu_reg);
10327 %}
10328 
10329 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10330 %{
10331   predicate(UseAPX && UseIncDec);
10332   match(Set dst (AddL src val));
10333   effect(KILL cr);
10334 
10335   format %{ "eincq    $dst, $src\t# long ndd" %}
10336   ins_encode %{
10337     __ eincq($dst$$Register, $src$$Register, false);
10338   %}
10339   ins_pipe(ialu_reg);
10340 %}
10341 
10342 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10343 %{
10344   predicate(UseAPX && UseIncDec);
10345   match(Set dst (AddL (LoadL src) val));
10346   effect(KILL cr);
10347 
10348   format %{ "eincq    $dst, $src\t# long ndd" %}
10349   ins_encode %{
10350     __ eincq($dst$$Register, $src$$Address, false);
10351   %}
10352   ins_pipe(ialu_reg);
10353 %}
10354 
10355 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10356 %{
10357   predicate(UseIncDec);
10358   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10359   effect(KILL cr);
10360 
10361   ins_cost(125); // XXX
10362   format %{ "incq    $dst\t# long" %}
10363   ins_encode %{
10364     __ incrementq($dst$$Address);
10365   %}
10366   ins_pipe(ialu_mem_imm);
10367 %}
10368 
10369 // XXX why does that use AddL
10370 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10371 %{
10372   predicate(!UseAPX && UseIncDec);
10373   match(Set dst (AddL dst src));
10374   effect(KILL cr);
10375 
10376   format %{ "decq    $dst\t# long" %}
10377   ins_encode %{
10378     __ decrementq($dst$$Register);
10379   %}
10380   ins_pipe(ialu_reg);
10381 %}
10382 
10383 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10384 %{
10385   predicate(UseAPX && UseIncDec);
10386   match(Set dst (AddL src val));
10387   effect(KILL cr);
10388 
10389   format %{ "edecq    $dst, $src\t# long ndd" %}
10390   ins_encode %{
10391     __ edecq($dst$$Register, $src$$Register, false);
10392   %}
10393   ins_pipe(ialu_reg);
10394 %}
10395 
10396 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10397 %{
10398   predicate(UseAPX && UseIncDec);
10399   match(Set dst (AddL (LoadL src) val));
10400   effect(KILL cr);
10401 
10402   format %{ "edecq    $dst, $src\t# long ndd" %}
10403   ins_encode %{
10404     __ edecq($dst$$Register, $src$$Address, false);
10405   %}
10406   ins_pipe(ialu_reg);
10407 %}
10408 
10409 // XXX why does that use AddL
10410 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10411 %{
10412   predicate(UseIncDec);
10413   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10414   effect(KILL cr);
10415 
10416   ins_cost(125); // XXX
10417   format %{ "decq    $dst\t# long" %}
10418   ins_encode %{
10419     __ decrementq($dst$$Address);
10420   %}
10421   ins_pipe(ialu_mem_imm);
10422 %}
10423 
10424 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10425 %{
10426   predicate(VM_Version::supports_fast_2op_lea());
10427   match(Set dst (AddL (LShiftL index scale) disp));
10428 
10429   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10430   ins_encode %{
10431     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10432     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10433   %}
10434   ins_pipe(ialu_reg_reg);
10435 %}
10436 
10437 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10438 %{
10439   predicate(VM_Version::supports_fast_3op_lea());
10440   match(Set dst (AddL (AddL base index) disp));
10441 
10442   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10443   ins_encode %{
10444     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10445   %}
10446   ins_pipe(ialu_reg_reg);
10447 %}
10448 
10449 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10450 %{
10451   predicate(VM_Version::supports_fast_2op_lea());
10452   match(Set dst (AddL base (LShiftL index scale)));
10453 
10454   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10455   ins_encode %{
10456     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10457     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10458   %}
10459   ins_pipe(ialu_reg_reg);
10460 %}
10461 
10462 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10463 %{
10464   predicate(VM_Version::supports_fast_3op_lea());
10465   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10466 
10467   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10468   ins_encode %{
10469     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10470     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10471   %}
10472   ins_pipe(ialu_reg_reg);
10473 %}
10474 
10475 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10476 %{
10477   match(Set dst (AddP dst src));
10478   effect(KILL cr);
10479   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10480 
10481   format %{ "addq    $dst, $src\t# ptr" %}
10482   ins_encode %{
10483     __ addq($dst$$Register, $src$$Register);
10484   %}
10485   ins_pipe(ialu_reg_reg);
10486 %}
10487 
10488 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10489 %{
10490   match(Set dst (AddP dst src));
10491   effect(KILL cr);
10492   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10493 
10494   format %{ "addq    $dst, $src\t# ptr" %}
10495   ins_encode %{
10496     __ addq($dst$$Register, $src$$constant);
10497   %}
10498   ins_pipe( ialu_reg );
10499 %}
10500 
10501 // XXX addP mem ops ????
10502 
10503 instruct checkCastPP(rRegP dst)
10504 %{
10505   match(Set dst (CheckCastPP dst));
10506 
10507   size(0);
10508   format %{ "# checkcastPP of $dst" %}
10509   ins_encode(/* empty encoding */);
10510   ins_pipe(empty);
10511 %}
10512 
10513 instruct castPP(rRegP dst)
10514 %{
10515   match(Set dst (CastPP dst));
10516 
10517   size(0);
10518   format %{ "# castPP of $dst" %}
10519   ins_encode(/* empty encoding */);
10520   ins_pipe(empty);
10521 %}
10522 
10523 instruct castII(rRegI dst)
10524 %{
10525   predicate(VerifyConstraintCasts == 0);
10526   match(Set dst (CastII dst));
10527 
10528   size(0);
10529   format %{ "# castII of $dst" %}
10530   ins_encode(/* empty encoding */);
10531   ins_cost(0);
10532   ins_pipe(empty);
10533 %}
10534 
10535 instruct castII_checked(rRegI dst, rFlagsReg cr)
10536 %{
10537   predicate(VerifyConstraintCasts > 0);
10538   match(Set dst (CastII dst));
10539 
10540   effect(KILL cr);
10541   format %{ "# cast_checked_II $dst" %}
10542   ins_encode %{
10543     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10544   %}
10545   ins_pipe(pipe_slow);
10546 %}
10547 
10548 instruct castLL(rRegL dst)
10549 %{
10550   predicate(VerifyConstraintCasts == 0);
10551   match(Set dst (CastLL dst));
10552 
10553   size(0);
10554   format %{ "# castLL of $dst" %}
10555   ins_encode(/* empty encoding */);
10556   ins_cost(0);
10557   ins_pipe(empty);
10558 %}
10559 
10560 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10561 %{
10562   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10563   match(Set dst (CastLL dst));
10564 
10565   effect(KILL cr);
10566   format %{ "# cast_checked_LL $dst" %}
10567   ins_encode %{
10568     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10569   %}
10570   ins_pipe(pipe_slow);
10571 %}
10572 
10573 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10574 %{
10575   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10576   match(Set dst (CastLL dst));
10577 
10578   effect(KILL cr, TEMP tmp);
10579   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10580   ins_encode %{
10581     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10582   %}
10583   ins_pipe(pipe_slow);
10584 %}
10585 
10586 instruct castFF(regF dst)
10587 %{
10588   match(Set dst (CastFF dst));
10589 
10590   size(0);
10591   format %{ "# castFF of $dst" %}
10592   ins_encode(/* empty encoding */);
10593   ins_cost(0);
10594   ins_pipe(empty);
10595 %}
10596 
10597 instruct castHH(regF dst)
10598 %{
10599   match(Set dst (CastHH dst));
10600 
10601   size(0);
10602   format %{ "# castHH of $dst" %}
10603   ins_encode(/* empty encoding */);
10604   ins_cost(0);
10605   ins_pipe(empty);
10606 %}
10607 
10608 instruct castDD(regD dst)
10609 %{
10610   match(Set dst (CastDD dst));
10611 
10612   size(0);
10613   format %{ "# castDD of $dst" %}
10614   ins_encode(/* empty encoding */);
10615   ins_cost(0);
10616   ins_pipe(empty);
10617 %}
10618 
10619 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10620 instruct compareAndSwapP(rRegI res,
10621                          memory mem_ptr,
10622                          rax_RegP oldval, rRegP newval,
10623                          rFlagsReg cr)
10624 %{
10625   predicate(n->as_LoadStore()->barrier_data() == 0);
10626   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10627   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10628   effect(KILL cr, KILL oldval);
10629 
10630   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10631             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10632             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10633   ins_encode %{
10634     __ lock();
10635     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10636     __ setcc(Assembler::equal, $res$$Register);
10637   %}
10638   ins_pipe( pipe_cmpxchg );
10639 %}
10640 
10641 instruct compareAndSwapL(rRegI res,
10642                          memory mem_ptr,
10643                          rax_RegL oldval, rRegL newval,
10644                          rFlagsReg cr)
10645 %{
10646   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10647   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10648   effect(KILL cr, KILL oldval);
10649 
10650   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10651             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10652             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10653   ins_encode %{
10654     __ lock();
10655     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10656     __ setcc(Assembler::equal, $res$$Register);
10657   %}
10658   ins_pipe( pipe_cmpxchg );
10659 %}
10660 
10661 instruct compareAndSwapI(rRegI res,
10662                          memory mem_ptr,
10663                          rax_RegI oldval, rRegI newval,
10664                          rFlagsReg cr)
10665 %{
10666   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10667   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10668   effect(KILL cr, KILL oldval);
10669 
10670   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10671             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10672             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10673   ins_encode %{
10674     __ lock();
10675     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10676     __ setcc(Assembler::equal, $res$$Register);
10677   %}
10678   ins_pipe( pipe_cmpxchg );
10679 %}
10680 
10681 instruct compareAndSwapB(rRegI res,
10682                          memory mem_ptr,
10683                          rax_RegI oldval, rRegI newval,
10684                          rFlagsReg cr)
10685 %{
10686   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10687   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10688   effect(KILL cr, KILL oldval);
10689 
10690   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10691             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10692             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10693   ins_encode %{
10694     __ lock();
10695     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10696     __ setcc(Assembler::equal, $res$$Register);
10697   %}
10698   ins_pipe( pipe_cmpxchg );
10699 %}
10700 
10701 instruct compareAndSwapS(rRegI res,
10702                          memory mem_ptr,
10703                          rax_RegI oldval, rRegI newval,
10704                          rFlagsReg cr)
10705 %{
10706   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10707   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10708   effect(KILL cr, KILL oldval);
10709 
10710   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10711             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10712             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10713   ins_encode %{
10714     __ lock();
10715     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10716     __ setcc(Assembler::equal, $res$$Register);
10717   %}
10718   ins_pipe( pipe_cmpxchg );
10719 %}
10720 
10721 instruct compareAndSwapN(rRegI res,
10722                           memory mem_ptr,
10723                           rax_RegN oldval, rRegN newval,
10724                           rFlagsReg cr) %{
10725   predicate(n->as_LoadStore()->barrier_data() == 0);
10726   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10727   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10728   effect(KILL cr, KILL oldval);
10729 
10730   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10731             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10732             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10733   ins_encode %{
10734     __ lock();
10735     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10736     __ setcc(Assembler::equal, $res$$Register);
10737   %}
10738   ins_pipe( pipe_cmpxchg );
10739 %}
10740 
10741 instruct compareAndExchangeB(
10742                          memory mem_ptr,
10743                          rax_RegI oldval, rRegI newval,
10744                          rFlagsReg cr)
10745 %{
10746   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10747   effect(KILL cr);
10748 
10749   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10750             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10751   ins_encode %{
10752     __ lock();
10753     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10754   %}
10755   ins_pipe( pipe_cmpxchg );
10756 %}
10757 
10758 instruct compareAndExchangeS(
10759                          memory mem_ptr,
10760                          rax_RegI oldval, rRegI newval,
10761                          rFlagsReg cr)
10762 %{
10763   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10764   effect(KILL cr);
10765 
10766   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10767             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10768   ins_encode %{
10769     __ lock();
10770     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10771   %}
10772   ins_pipe( pipe_cmpxchg );
10773 %}
10774 
10775 instruct compareAndExchangeI(
10776                          memory mem_ptr,
10777                          rax_RegI oldval, rRegI newval,
10778                          rFlagsReg cr)
10779 %{
10780   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10781   effect(KILL cr);
10782 
10783   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10784             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10785   ins_encode %{
10786     __ lock();
10787     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10788   %}
10789   ins_pipe( pipe_cmpxchg );
10790 %}
10791 
10792 instruct compareAndExchangeL(
10793                          memory mem_ptr,
10794                          rax_RegL oldval, rRegL newval,
10795                          rFlagsReg cr)
10796 %{
10797   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10798   effect(KILL cr);
10799 
10800   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10801             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10802   ins_encode %{
10803     __ lock();
10804     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10805   %}
10806   ins_pipe( pipe_cmpxchg );
10807 %}
10808 
10809 instruct compareAndExchangeN(
10810                           memory mem_ptr,
10811                           rax_RegN oldval, rRegN newval,
10812                           rFlagsReg cr) %{
10813   predicate(n->as_LoadStore()->barrier_data() == 0);
10814   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10815   effect(KILL cr);
10816 
10817   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10818             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10819   ins_encode %{
10820     __ lock();
10821     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10822   %}
10823   ins_pipe( pipe_cmpxchg );
10824 %}
10825 
10826 instruct compareAndExchangeP(
10827                          memory mem_ptr,
10828                          rax_RegP oldval, rRegP newval,
10829                          rFlagsReg cr)
10830 %{
10831   predicate(n->as_LoadStore()->barrier_data() == 0);
10832   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10833   effect(KILL cr);
10834 
10835   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10836             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10837   ins_encode %{
10838     __ lock();
10839     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10840   %}
10841   ins_pipe( pipe_cmpxchg );
10842 %}
10843 
10844 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10845   predicate(n->as_LoadStore()->result_not_used());
10846   match(Set dummy (GetAndAddB mem add));
10847   effect(KILL cr);
10848   format %{ "addb_lock   $mem, $add" %}
10849   ins_encode %{
10850     __ lock();
10851     __ addb($mem$$Address, $add$$Register);
10852   %}
10853   ins_pipe(pipe_cmpxchg);
10854 %}
10855 
10856 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10857   predicate(n->as_LoadStore()->result_not_used());
10858   match(Set dummy (GetAndAddB mem add));
10859   effect(KILL cr);
10860   format %{ "addb_lock   $mem, $add" %}
10861   ins_encode %{
10862     __ lock();
10863     __ addb($mem$$Address, $add$$constant);
10864   %}
10865   ins_pipe(pipe_cmpxchg);
10866 %}
10867 
10868 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10869   predicate(!n->as_LoadStore()->result_not_used());
10870   match(Set newval (GetAndAddB mem newval));
10871   effect(KILL cr);
10872   format %{ "xaddb_lock  $mem, $newval" %}
10873   ins_encode %{
10874     __ lock();
10875     __ xaddb($mem$$Address, $newval$$Register);
10876   %}
10877   ins_pipe(pipe_cmpxchg);
10878 %}
10879 
10880 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10881   predicate(n->as_LoadStore()->result_not_used());
10882   match(Set dummy (GetAndAddS mem add));
10883   effect(KILL cr);
10884   format %{ "addw_lock   $mem, $add" %}
10885   ins_encode %{
10886     __ lock();
10887     __ addw($mem$$Address, $add$$Register);
10888   %}
10889   ins_pipe(pipe_cmpxchg);
10890 %}
10891 
10892 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10893   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10894   match(Set dummy (GetAndAddS mem add));
10895   effect(KILL cr);
10896   format %{ "addw_lock   $mem, $add" %}
10897   ins_encode %{
10898     __ lock();
10899     __ addw($mem$$Address, $add$$constant);
10900   %}
10901   ins_pipe(pipe_cmpxchg);
10902 %}
10903 
10904 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10905   predicate(!n->as_LoadStore()->result_not_used());
10906   match(Set newval (GetAndAddS mem newval));
10907   effect(KILL cr);
10908   format %{ "xaddw_lock  $mem, $newval" %}
10909   ins_encode %{
10910     __ lock();
10911     __ xaddw($mem$$Address, $newval$$Register);
10912   %}
10913   ins_pipe(pipe_cmpxchg);
10914 %}
10915 
10916 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10917   predicate(n->as_LoadStore()->result_not_used());
10918   match(Set dummy (GetAndAddI mem add));
10919   effect(KILL cr);
10920   format %{ "addl_lock   $mem, $add" %}
10921   ins_encode %{
10922     __ lock();
10923     __ addl($mem$$Address, $add$$Register);
10924   %}
10925   ins_pipe(pipe_cmpxchg);
10926 %}
10927 
10928 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10929   predicate(n->as_LoadStore()->result_not_used());
10930   match(Set dummy (GetAndAddI mem add));
10931   effect(KILL cr);
10932   format %{ "addl_lock   $mem, $add" %}
10933   ins_encode %{
10934     __ lock();
10935     __ addl($mem$$Address, $add$$constant);
10936   %}
10937   ins_pipe(pipe_cmpxchg);
10938 %}
10939 
10940 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10941   predicate(!n->as_LoadStore()->result_not_used());
10942   match(Set newval (GetAndAddI mem newval));
10943   effect(KILL cr);
10944   format %{ "xaddl_lock  $mem, $newval" %}
10945   ins_encode %{
10946     __ lock();
10947     __ xaddl($mem$$Address, $newval$$Register);
10948   %}
10949   ins_pipe(pipe_cmpxchg);
10950 %}
10951 
10952 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10953   predicate(n->as_LoadStore()->result_not_used());
10954   match(Set dummy (GetAndAddL mem add));
10955   effect(KILL cr);
10956   format %{ "addq_lock   $mem, $add" %}
10957   ins_encode %{
10958     __ lock();
10959     __ addq($mem$$Address, $add$$Register);
10960   %}
10961   ins_pipe(pipe_cmpxchg);
10962 %}
10963 
10964 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10965   predicate(n->as_LoadStore()->result_not_used());
10966   match(Set dummy (GetAndAddL mem add));
10967   effect(KILL cr);
10968   format %{ "addq_lock   $mem, $add" %}
10969   ins_encode %{
10970     __ lock();
10971     __ addq($mem$$Address, $add$$constant);
10972   %}
10973   ins_pipe(pipe_cmpxchg);
10974 %}
10975 
10976 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10977   predicate(!n->as_LoadStore()->result_not_used());
10978   match(Set newval (GetAndAddL mem newval));
10979   effect(KILL cr);
10980   format %{ "xaddq_lock  $mem, $newval" %}
10981   ins_encode %{
10982     __ lock();
10983     __ xaddq($mem$$Address, $newval$$Register);
10984   %}
10985   ins_pipe(pipe_cmpxchg);
10986 %}
10987 
10988 instruct xchgB( memory mem, rRegI newval) %{
10989   match(Set newval (GetAndSetB mem newval));
10990   format %{ "XCHGB  $newval,[$mem]" %}
10991   ins_encode %{
10992     __ xchgb($newval$$Register, $mem$$Address);
10993   %}
10994   ins_pipe( pipe_cmpxchg );
10995 %}
10996 
10997 instruct xchgS( memory mem, rRegI newval) %{
10998   match(Set newval (GetAndSetS mem newval));
10999   format %{ "XCHGW  $newval,[$mem]" %}
11000   ins_encode %{
11001     __ xchgw($newval$$Register, $mem$$Address);
11002   %}
11003   ins_pipe( pipe_cmpxchg );
11004 %}
11005 
11006 instruct xchgI( memory mem, rRegI newval) %{
11007   match(Set newval (GetAndSetI mem newval));
11008   format %{ "XCHGL  $newval,[$mem]" %}
11009   ins_encode %{
11010     __ xchgl($newval$$Register, $mem$$Address);
11011   %}
11012   ins_pipe( pipe_cmpxchg );
11013 %}
11014 
11015 instruct xchgL( memory mem, rRegL newval) %{
11016   match(Set newval (GetAndSetL mem newval));
11017   format %{ "XCHGL  $newval,[$mem]" %}
11018   ins_encode %{
11019     __ xchgq($newval$$Register, $mem$$Address);
11020   %}
11021   ins_pipe( pipe_cmpxchg );
11022 %}
11023 
11024 instruct xchgP( memory mem, rRegP newval) %{
11025   match(Set newval (GetAndSetP mem newval));
11026   predicate(n->as_LoadStore()->barrier_data() == 0);
11027   format %{ "XCHGQ  $newval,[$mem]" %}
11028   ins_encode %{
11029     __ xchgq($newval$$Register, $mem$$Address);
11030   %}
11031   ins_pipe( pipe_cmpxchg );
11032 %}
11033 
11034 instruct xchgN( memory mem, rRegN newval) %{
11035   predicate(n->as_LoadStore()->barrier_data() == 0);
11036   match(Set newval (GetAndSetN mem newval));
11037   format %{ "XCHGL  $newval,$mem]" %}
11038   ins_encode %{
11039     __ xchgl($newval$$Register, $mem$$Address);
11040   %}
11041   ins_pipe( pipe_cmpxchg );
11042 %}
11043 
11044 //----------Abs Instructions-------------------------------------------
11045 
11046 // Integer Absolute Instructions
11047 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11048 %{
11049   match(Set dst (AbsI src));
11050   effect(TEMP dst, KILL cr);
11051   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11052             "subl    $dst, $src\n\t"
11053             "cmovll  $dst, $src" %}
11054   ins_encode %{
11055     __ xorl($dst$$Register, $dst$$Register);
11056     __ subl($dst$$Register, $src$$Register);
11057     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11058   %}
11059 
11060   ins_pipe(ialu_reg_reg);
11061 %}
11062 
11063 // Long Absolute Instructions
11064 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11065 %{
11066   match(Set dst (AbsL src));
11067   effect(TEMP dst, KILL cr);
11068   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11069             "subq    $dst, $src\n\t"
11070             "cmovlq  $dst, $src" %}
11071   ins_encode %{
11072     __ xorl($dst$$Register, $dst$$Register);
11073     __ subq($dst$$Register, $src$$Register);
11074     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11075   %}
11076 
11077   ins_pipe(ialu_reg_reg);
11078 %}
11079 
11080 //----------Subtraction Instructions-------------------------------------------
11081 
11082 // Integer Subtraction Instructions
11083 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11084 %{
11085   predicate(!UseAPX);
11086   match(Set dst (SubI dst src));
11087   effect(KILL cr);
11088   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11089 
11090   format %{ "subl    $dst, $src\t# int" %}
11091   ins_encode %{
11092     __ subl($dst$$Register, $src$$Register);
11093   %}
11094   ins_pipe(ialu_reg_reg);
11095 %}
11096 
11097 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11098 %{
11099   predicate(UseAPX);
11100   match(Set dst (SubI src1 src2));
11101   effect(KILL cr);
11102   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11103 
11104   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11105   ins_encode %{
11106     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11107   %}
11108   ins_pipe(ialu_reg_reg);
11109 %}
11110 
11111 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11112 %{
11113   predicate(UseAPX);
11114   match(Set dst (SubI src1 src2));
11115   effect(KILL cr);
11116   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11117 
11118   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11119   ins_encode %{
11120     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11121   %}
11122   ins_pipe(ialu_reg_reg);
11123 %}
11124 
11125 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11126 %{
11127   predicate(UseAPX);
11128   match(Set dst (SubI (LoadI src1) src2));
11129   effect(KILL cr);
11130   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11131 
11132   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11133   ins_encode %{
11134     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11135   %}
11136   ins_pipe(ialu_reg_reg);
11137 %}
11138 
11139 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11140 %{
11141   predicate(!UseAPX);
11142   match(Set dst (SubI dst (LoadI src)));
11143   effect(KILL cr);
11144   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11145 
11146   ins_cost(150);
11147   format %{ "subl    $dst, $src\t# int" %}
11148   ins_encode %{
11149     __ subl($dst$$Register, $src$$Address);
11150   %}
11151   ins_pipe(ialu_reg_mem);
11152 %}
11153 
11154 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11155 %{
11156   predicate(UseAPX);
11157   match(Set dst (SubI src1 (LoadI src2)));
11158   effect(KILL cr);
11159   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11160 
11161   ins_cost(150);
11162   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11163   ins_encode %{
11164     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11165   %}
11166   ins_pipe(ialu_reg_mem);
11167 %}
11168 
11169 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11170 %{
11171   predicate(UseAPX);
11172   match(Set dst (SubI (LoadI src1) src2));
11173   effect(KILL cr);
11174   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175 
11176   ins_cost(150);
11177   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11178   ins_encode %{
11179     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11180   %}
11181   ins_pipe(ialu_reg_mem);
11182 %}
11183 
11184 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11185 %{
11186   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11187   effect(KILL cr);
11188   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11189 
11190   ins_cost(150);
11191   format %{ "subl    $dst, $src\t# int" %}
11192   ins_encode %{
11193     __ subl($dst$$Address, $src$$Register);
11194   %}
11195   ins_pipe(ialu_mem_reg);
11196 %}
11197 
11198 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11199 %{
11200   predicate(!UseAPX);
11201   match(Set dst (SubL dst src));
11202   effect(KILL cr);
11203   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11204 
11205   format %{ "subq    $dst, $src\t# long" %}
11206   ins_encode %{
11207     __ subq($dst$$Register, $src$$Register);
11208   %}
11209   ins_pipe(ialu_reg_reg);
11210 %}
11211 
11212 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11213 %{
11214   predicate(UseAPX);
11215   match(Set dst (SubL src1 src2));
11216   effect(KILL cr);
11217   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11218 
11219   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11220   ins_encode %{
11221     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11222   %}
11223   ins_pipe(ialu_reg_reg);
11224 %}
11225 
11226 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11227 %{
11228   predicate(UseAPX);
11229   match(Set dst (SubL src1 src2));
11230   effect(KILL cr);
11231   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11232 
11233   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11234   ins_encode %{
11235     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11236   %}
11237   ins_pipe(ialu_reg_reg);
11238 %}
11239 
11240 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11241 %{
11242   predicate(UseAPX);
11243   match(Set dst (SubL (LoadL src1) src2));
11244   effect(KILL cr);
11245   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11246 
11247   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11248   ins_encode %{
11249     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11250   %}
11251   ins_pipe(ialu_reg_reg);
11252 %}
11253 
11254 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11255 %{
11256   predicate(!UseAPX);
11257   match(Set dst (SubL dst (LoadL src)));
11258   effect(KILL cr);
11259   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11260 
11261   ins_cost(150);
11262   format %{ "subq    $dst, $src\t# long" %}
11263   ins_encode %{
11264     __ subq($dst$$Register, $src$$Address);
11265   %}
11266   ins_pipe(ialu_reg_mem);
11267 %}
11268 
11269 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11270 %{
11271   predicate(UseAPX);
11272   match(Set dst (SubL src1 (LoadL src2)));
11273   effect(KILL cr);
11274   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11275 
11276   ins_cost(150);
11277   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11278   ins_encode %{
11279     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11280   %}
11281   ins_pipe(ialu_reg_mem);
11282 %}
11283 
11284 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11285 %{
11286   predicate(UseAPX);
11287   match(Set dst (SubL (LoadL src1) src2));
11288   effect(KILL cr);
11289   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290 
11291   ins_cost(150);
11292   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11293   ins_encode %{
11294     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11295   %}
11296   ins_pipe(ialu_reg_mem);
11297 %}
11298 
11299 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11300 %{
11301   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11302   effect(KILL cr);
11303   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11304 
11305   ins_cost(150);
11306   format %{ "subq    $dst, $src\t# long" %}
11307   ins_encode %{
11308     __ subq($dst$$Address, $src$$Register);
11309   %}
11310   ins_pipe(ialu_mem_reg);
11311 %}
11312 
11313 // Subtract from a pointer
11314 // XXX hmpf???
11315 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11316 %{
11317   match(Set dst (AddP dst (SubI zero src)));
11318   effect(KILL cr);
11319 
11320   format %{ "subq    $dst, $src\t# ptr - int" %}
11321   ins_encode %{
11322     __ subq($dst$$Register, $src$$Register);
11323   %}
11324   ins_pipe(ialu_reg_reg);
11325 %}
11326 
11327 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11328 %{
11329   predicate(!UseAPX);
11330   match(Set dst (SubI zero dst));
11331   effect(KILL cr);
11332   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11333 
11334   format %{ "negl    $dst\t# int" %}
11335   ins_encode %{
11336     __ negl($dst$$Register);
11337   %}
11338   ins_pipe(ialu_reg);
11339 %}
11340 
11341 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11342 %{
11343   predicate(UseAPX);
11344   match(Set dst (SubI zero src));
11345   effect(KILL cr);
11346   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11347 
11348   format %{ "enegl    $dst, $src\t# int ndd" %}
11349   ins_encode %{
11350     __ enegl($dst$$Register, $src$$Register, false);
11351   %}
11352   ins_pipe(ialu_reg);
11353 %}
11354 
11355 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11356 %{
11357   predicate(!UseAPX);
11358   match(Set dst (NegI dst));
11359   effect(KILL cr);
11360   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11361 
11362   format %{ "negl    $dst\t# int" %}
11363   ins_encode %{
11364     __ negl($dst$$Register);
11365   %}
11366   ins_pipe(ialu_reg);
11367 %}
11368 
11369 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11370 %{
11371   predicate(UseAPX);
11372   match(Set dst (NegI src));
11373   effect(KILL cr);
11374   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11375 
11376   format %{ "enegl    $dst, $src\t# int ndd" %}
11377   ins_encode %{
11378     __ enegl($dst$$Register, $src$$Register, false);
11379   %}
11380   ins_pipe(ialu_reg);
11381 %}
11382 
11383 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11384 %{
11385   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11386   effect(KILL cr);
11387   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11388 
11389   format %{ "negl    $dst\t# int" %}
11390   ins_encode %{
11391     __ negl($dst$$Address);
11392   %}
11393   ins_pipe(ialu_reg);
11394 %}
11395 
11396 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11397 %{
11398   predicate(!UseAPX);
11399   match(Set dst (SubL zero dst));
11400   effect(KILL cr);
11401   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11402 
11403   format %{ "negq    $dst\t# long" %}
11404   ins_encode %{
11405     __ negq($dst$$Register);
11406   %}
11407   ins_pipe(ialu_reg);
11408 %}
11409 
11410 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11411 %{
11412   predicate(UseAPX);
11413   match(Set dst (SubL zero src));
11414   effect(KILL cr);
11415   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11416 
11417   format %{ "enegq    $dst, $src\t# long ndd" %}
11418   ins_encode %{
11419     __ enegq($dst$$Register, $src$$Register, false);
11420   %}
11421   ins_pipe(ialu_reg);
11422 %}
11423 
11424 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11425 %{
11426   predicate(!UseAPX);
11427   match(Set dst (NegL dst));
11428   effect(KILL cr);
11429   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11430 
11431   format %{ "negq    $dst\t# int" %}
11432   ins_encode %{
11433     __ negq($dst$$Register);
11434   %}
11435   ins_pipe(ialu_reg);
11436 %}
11437 
11438 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11439 %{
11440   predicate(UseAPX);
11441   match(Set dst (NegL src));
11442   effect(KILL cr);
11443   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11444 
11445   format %{ "enegq    $dst, $src\t# long ndd" %}
11446   ins_encode %{
11447     __ enegq($dst$$Register, $src$$Register, false);
11448   %}
11449   ins_pipe(ialu_reg);
11450 %}
11451 
11452 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11453 %{
11454   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11455   effect(KILL cr);
11456   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11457 
11458   format %{ "negq    $dst\t# long" %}
11459   ins_encode %{
11460     __ negq($dst$$Address);
11461   %}
11462   ins_pipe(ialu_reg);
11463 %}
11464 
11465 //----------Multiplication/Division Instructions-------------------------------
11466 // Integer Multiplication Instructions
11467 // Multiply Register
11468 
11469 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11470 %{
11471   predicate(!UseAPX);
11472   match(Set dst (MulI dst src));
11473   effect(KILL cr);
11474 
11475   ins_cost(300);
11476   format %{ "imull   $dst, $src\t# int" %}
11477   ins_encode %{
11478     __ imull($dst$$Register, $src$$Register);
11479   %}
11480   ins_pipe(ialu_reg_reg_alu0);
11481 %}
11482 
11483 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11484 %{
11485   predicate(UseAPX);
11486   match(Set dst (MulI src1 src2));
11487   effect(KILL cr);
11488 
11489   ins_cost(300);
11490   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11491   ins_encode %{
11492     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11493   %}
11494   ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496 
11497 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11498 %{
11499   match(Set dst (MulI src imm));
11500   effect(KILL cr);
11501 
11502   ins_cost(300);
11503   format %{ "imull   $dst, $src, $imm\t# int" %}
11504   ins_encode %{
11505     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11506   %}
11507   ins_pipe(ialu_reg_reg_alu0);
11508 %}
11509 
11510 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11511 %{
11512   predicate(!UseAPX);
11513   match(Set dst (MulI dst (LoadI src)));
11514   effect(KILL cr);
11515 
11516   ins_cost(350);
11517   format %{ "imull   $dst, $src\t# int" %}
11518   ins_encode %{
11519     __ imull($dst$$Register, $src$$Address);
11520   %}
11521   ins_pipe(ialu_reg_mem_alu0);
11522 %}
11523 
11524 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11525 %{
11526   predicate(UseAPX);
11527   match(Set dst (MulI src1 (LoadI src2)));
11528   effect(KILL cr);
11529 
11530   ins_cost(350);
11531   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11532   ins_encode %{
11533     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11534   %}
11535   ins_pipe(ialu_reg_mem_alu0);
11536 %}
11537 
11538 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11539 %{
11540   match(Set dst (MulI (LoadI src) imm));
11541   effect(KILL cr);
11542 
11543   ins_cost(300);
11544   format %{ "imull   $dst, $src, $imm\t# int" %}
11545   ins_encode %{
11546     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11547   %}
11548   ins_pipe(ialu_reg_mem_alu0);
11549 %}
11550 
11551 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11552 %{
11553   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11554   effect(KILL cr, KILL src2);
11555 
11556   expand %{ mulI_rReg(dst, src1, cr);
11557            mulI_rReg(src2, src3, cr);
11558            addI_rReg(dst, src2, cr); %}
11559 %}
11560 
11561 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11562 %{
11563   predicate(!UseAPX);
11564   match(Set dst (MulL dst src));
11565   effect(KILL cr);
11566 
11567   ins_cost(300);
11568   format %{ "imulq   $dst, $src\t# long" %}
11569   ins_encode %{
11570     __ imulq($dst$$Register, $src$$Register);
11571   %}
11572   ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574 
11575 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11576 %{
11577   predicate(UseAPX);
11578   match(Set dst (MulL src1 src2));
11579   effect(KILL cr);
11580 
11581   ins_cost(300);
11582   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11583   ins_encode %{
11584     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11585   %}
11586   ins_pipe(ialu_reg_reg_alu0);
11587 %}
11588 
11589 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11590 %{
11591   match(Set dst (MulL src imm));
11592   effect(KILL cr);
11593 
11594   ins_cost(300);
11595   format %{ "imulq   $dst, $src, $imm\t# long" %}
11596   ins_encode %{
11597     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11598   %}
11599   ins_pipe(ialu_reg_reg_alu0);
11600 %}
11601 
11602 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11603 %{
11604   predicate(!UseAPX);
11605   match(Set dst (MulL dst (LoadL src)));
11606   effect(KILL cr);
11607 
11608   ins_cost(350);
11609   format %{ "imulq   $dst, $src\t# long" %}
11610   ins_encode %{
11611     __ imulq($dst$$Register, $src$$Address);
11612   %}
11613   ins_pipe(ialu_reg_mem_alu0);
11614 %}
11615 
11616 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11617 %{
11618   predicate(UseAPX);
11619   match(Set dst (MulL src1 (LoadL src2)));
11620   effect(KILL cr);
11621 
11622   ins_cost(350);
11623   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11624   ins_encode %{
11625     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11626   %}
11627   ins_pipe(ialu_reg_mem_alu0);
11628 %}
11629 
11630 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11631 %{
11632   match(Set dst (MulL (LoadL src) imm));
11633   effect(KILL cr);
11634 
11635   ins_cost(300);
11636   format %{ "imulq   $dst, $src, $imm\t# long" %}
11637   ins_encode %{
11638     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11639   %}
11640   ins_pipe(ialu_reg_mem_alu0);
11641 %}
11642 
11643 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11644 %{
11645   match(Set dst (MulHiL src rax));
11646   effect(USE_KILL rax, KILL cr);
11647 
11648   ins_cost(300);
11649   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11650   ins_encode %{
11651     __ imulq($src$$Register);
11652   %}
11653   ins_pipe(ialu_reg_reg_alu0);
11654 %}
11655 
11656 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11657 %{
11658   match(Set dst (UMulHiL src rax));
11659   effect(USE_KILL rax, KILL cr);
11660 
11661   ins_cost(300);
11662   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11663   ins_encode %{
11664     __ mulq($src$$Register);
11665   %}
11666   ins_pipe(ialu_reg_reg_alu0);
11667 %}
11668 
11669 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11670                    rFlagsReg cr)
11671 %{
11672   match(Set rax (DivI rax div));
11673   effect(KILL rdx, KILL cr);
11674 
11675   ins_cost(30*100+10*100); // XXX
11676   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11677             "jne,s   normal\n\t"
11678             "xorl    rdx, rdx\n\t"
11679             "cmpl    $div, -1\n\t"
11680             "je,s    done\n"
11681     "normal: cdql\n\t"
11682             "idivl   $div\n"
11683     "done:"        %}
11684   ins_encode(cdql_enc(div));
11685   ins_pipe(ialu_reg_reg_alu0);
11686 %}
11687 
11688 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11689                    rFlagsReg cr)
11690 %{
11691   match(Set rax (DivL rax div));
11692   effect(KILL rdx, KILL cr);
11693 
11694   ins_cost(30*100+10*100); // XXX
11695   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11696             "cmpq    rax, rdx\n\t"
11697             "jne,s   normal\n\t"
11698             "xorl    rdx, rdx\n\t"
11699             "cmpq    $div, -1\n\t"
11700             "je,s    done\n"
11701     "normal: cdqq\n\t"
11702             "idivq   $div\n"
11703     "done:"        %}
11704   ins_encode(cdqq_enc(div));
11705   ins_pipe(ialu_reg_reg_alu0);
11706 %}
11707 
11708 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11709 %{
11710   match(Set rax (UDivI rax div));
11711   effect(KILL rdx, KILL cr);
11712 
11713   ins_cost(300);
11714   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11715   ins_encode %{
11716     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11717   %}
11718   ins_pipe(ialu_reg_reg_alu0);
11719 %}
11720 
11721 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11722 %{
11723   match(Set rax (UDivL rax div));
11724   effect(KILL rdx, KILL cr);
11725 
11726   ins_cost(300);
11727   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11728   ins_encode %{
11729      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11730   %}
11731   ins_pipe(ialu_reg_reg_alu0);
11732 %}
11733 
11734 // Integer DIVMOD with Register, both quotient and mod results
11735 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11736                              rFlagsReg cr)
11737 %{
11738   match(DivModI rax div);
11739   effect(KILL cr);
11740 
11741   ins_cost(30*100+10*100); // XXX
11742   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11743             "jne,s   normal\n\t"
11744             "xorl    rdx, rdx\n\t"
11745             "cmpl    $div, -1\n\t"
11746             "je,s    done\n"
11747     "normal: cdql\n\t"
11748             "idivl   $div\n"
11749     "done:"        %}
11750   ins_encode(cdql_enc(div));
11751   ins_pipe(pipe_slow);
11752 %}
11753 
11754 // Long DIVMOD with Register, both quotient and mod results
11755 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11756                              rFlagsReg cr)
11757 %{
11758   match(DivModL rax div);
11759   effect(KILL cr);
11760 
11761   ins_cost(30*100+10*100); // XXX
11762   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11763             "cmpq    rax, rdx\n\t"
11764             "jne,s   normal\n\t"
11765             "xorl    rdx, rdx\n\t"
11766             "cmpq    $div, -1\n\t"
11767             "je,s    done\n"
11768     "normal: cdqq\n\t"
11769             "idivq   $div\n"
11770     "done:"        %}
11771   ins_encode(cdqq_enc(div));
11772   ins_pipe(pipe_slow);
11773 %}
11774 
11775 // Unsigned integer DIVMOD with Register, both quotient and mod results
11776 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11777                               no_rax_rdx_RegI div, rFlagsReg cr)
11778 %{
11779   match(UDivModI rax div);
11780   effect(TEMP tmp, KILL cr);
11781 
11782   ins_cost(300);
11783   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11784             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11785           %}
11786   ins_encode %{
11787     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11788   %}
11789   ins_pipe(pipe_slow);
11790 %}
11791 
11792 // Unsigned long DIVMOD with Register, both quotient and mod results
11793 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11794                               no_rax_rdx_RegL div, rFlagsReg cr)
11795 %{
11796   match(UDivModL rax div);
11797   effect(TEMP tmp, KILL cr);
11798 
11799   ins_cost(300);
11800   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11801             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11802           %}
11803   ins_encode %{
11804     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11805   %}
11806   ins_pipe(pipe_slow);
11807 %}
11808 
11809 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11810                    rFlagsReg cr)
11811 %{
11812   match(Set rdx (ModI rax div));
11813   effect(KILL rax, KILL cr);
11814 
11815   ins_cost(300); // XXX
11816   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11817             "jne,s   normal\n\t"
11818             "xorl    rdx, rdx\n\t"
11819             "cmpl    $div, -1\n\t"
11820             "je,s    done\n"
11821     "normal: cdql\n\t"
11822             "idivl   $div\n"
11823     "done:"        %}
11824   ins_encode(cdql_enc(div));
11825   ins_pipe(ialu_reg_reg_alu0);
11826 %}
11827 
11828 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11829                    rFlagsReg cr)
11830 %{
11831   match(Set rdx (ModL rax div));
11832   effect(KILL rax, KILL cr);
11833 
11834   ins_cost(300); // XXX
11835   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11836             "cmpq    rax, rdx\n\t"
11837             "jne,s   normal\n\t"
11838             "xorl    rdx, rdx\n\t"
11839             "cmpq    $div, -1\n\t"
11840             "je,s    done\n"
11841     "normal: cdqq\n\t"
11842             "idivq   $div\n"
11843     "done:"        %}
11844   ins_encode(cdqq_enc(div));
11845   ins_pipe(ialu_reg_reg_alu0);
11846 %}
11847 
11848 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11849 %{
11850   match(Set rdx (UModI rax div));
11851   effect(KILL rax, KILL cr);
11852 
11853   ins_cost(300);
11854   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11855   ins_encode %{
11856     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11857   %}
11858   ins_pipe(ialu_reg_reg_alu0);
11859 %}
11860 
11861 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11862 %{
11863   match(Set rdx (UModL rax div));
11864   effect(KILL rax, KILL cr);
11865 
11866   ins_cost(300);
11867   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11868   ins_encode %{
11869     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11870   %}
11871   ins_pipe(ialu_reg_reg_alu0);
11872 %}
11873 
11874 // Integer Shift Instructions
11875 // Shift Left by one, two, three
11876 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11877 %{
11878   predicate(!UseAPX);
11879   match(Set dst (LShiftI dst shift));
11880   effect(KILL cr);
11881 
11882   format %{ "sall    $dst, $shift" %}
11883   ins_encode %{
11884     __ sall($dst$$Register, $shift$$constant);
11885   %}
11886   ins_pipe(ialu_reg);
11887 %}
11888 
11889 // Shift Left by one, two, three
11890 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11891 %{
11892   predicate(UseAPX);
11893   match(Set dst (LShiftI src shift));
11894   effect(KILL cr);
11895 
11896   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11897   ins_encode %{
11898     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11899   %}
11900   ins_pipe(ialu_reg);
11901 %}
11902 
11903 // Shift Left by 8-bit immediate
11904 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11905 %{
11906   predicate(!UseAPX);
11907   match(Set dst (LShiftI dst shift));
11908   effect(KILL cr);
11909 
11910   format %{ "sall    $dst, $shift" %}
11911   ins_encode %{
11912     __ sall($dst$$Register, $shift$$constant);
11913   %}
11914   ins_pipe(ialu_reg);
11915 %}
11916 
11917 // Shift Left by 8-bit immediate
11918 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11919 %{
11920   predicate(UseAPX);
11921   match(Set dst (LShiftI src shift));
11922   effect(KILL cr);
11923 
11924   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11925   ins_encode %{
11926     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11927   %}
11928   ins_pipe(ialu_reg);
11929 %}
11930 
11931 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11932 %{
11933   predicate(UseAPX);
11934   match(Set dst (LShiftI (LoadI src) shift));
11935   effect(KILL cr);
11936 
11937   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11938   ins_encode %{
11939     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11940   %}
11941   ins_pipe(ialu_reg);
11942 %}
11943 
11944 // Shift Left by 8-bit immediate
11945 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11946 %{
11947   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11948   effect(KILL cr);
11949 
11950   format %{ "sall    $dst, $shift" %}
11951   ins_encode %{
11952     __ sall($dst$$Address, $shift$$constant);
11953   %}
11954   ins_pipe(ialu_mem_imm);
11955 %}
11956 
11957 // Shift Left by variable
11958 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11959 %{
11960   predicate(!VM_Version::supports_bmi2());
11961   match(Set dst (LShiftI dst shift));
11962   effect(KILL cr);
11963 
11964   format %{ "sall    $dst, $shift" %}
11965   ins_encode %{
11966     __ sall($dst$$Register);
11967   %}
11968   ins_pipe(ialu_reg_reg);
11969 %}
11970 
11971 // Shift Left by variable
11972 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11973 %{
11974   predicate(!VM_Version::supports_bmi2());
11975   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11976   effect(KILL cr);
11977 
11978   format %{ "sall    $dst, $shift" %}
11979   ins_encode %{
11980     __ sall($dst$$Address);
11981   %}
11982   ins_pipe(ialu_mem_reg);
11983 %}
11984 
11985 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11986 %{
11987   predicate(VM_Version::supports_bmi2());
11988   match(Set dst (LShiftI src shift));
11989 
11990   format %{ "shlxl   $dst, $src, $shift" %}
11991   ins_encode %{
11992     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11993   %}
11994   ins_pipe(ialu_reg_reg);
11995 %}
11996 
11997 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11998 %{
11999   predicate(VM_Version::supports_bmi2());
12000   match(Set dst (LShiftI (LoadI src) shift));
12001   ins_cost(175);
12002   format %{ "shlxl   $dst, $src, $shift" %}
12003   ins_encode %{
12004     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12005   %}
12006   ins_pipe(ialu_reg_mem);
12007 %}
12008 
12009 // Arithmetic Shift Right by 8-bit immediate
12010 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12011 %{
12012   predicate(!UseAPX);
12013   match(Set dst (RShiftI dst shift));
12014   effect(KILL cr);
12015 
12016   format %{ "sarl    $dst, $shift" %}
12017   ins_encode %{
12018     __ sarl($dst$$Register, $shift$$constant);
12019   %}
12020   ins_pipe(ialu_mem_imm);
12021 %}
12022 
12023 // Arithmetic Shift Right by 8-bit immediate
12024 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12025 %{
12026   predicate(UseAPX);
12027   match(Set dst (RShiftI src shift));
12028   effect(KILL cr);
12029 
12030   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12031   ins_encode %{
12032     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12033   %}
12034   ins_pipe(ialu_mem_imm);
12035 %}
12036 
12037 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12038 %{
12039   predicate(UseAPX);
12040   match(Set dst (RShiftI (LoadI src) shift));
12041   effect(KILL cr);
12042 
12043   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12044   ins_encode %{
12045     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12046   %}
12047   ins_pipe(ialu_mem_imm);
12048 %}
12049 
12050 // Arithmetic Shift Right by 8-bit immediate
12051 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12052 %{
12053   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12054   effect(KILL cr);
12055 
12056   format %{ "sarl    $dst, $shift" %}
12057   ins_encode %{
12058     __ sarl($dst$$Address, $shift$$constant);
12059   %}
12060   ins_pipe(ialu_mem_imm);
12061 %}
12062 
12063 // Arithmetic Shift Right by variable
12064 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12065 %{
12066   predicate(!VM_Version::supports_bmi2());
12067   match(Set dst (RShiftI dst shift));
12068   effect(KILL cr);
12069 
12070   format %{ "sarl    $dst, $shift" %}
12071   ins_encode %{
12072     __ sarl($dst$$Register);
12073   %}
12074   ins_pipe(ialu_reg_reg);
12075 %}
12076 
12077 // Arithmetic Shift Right by variable
12078 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12079 %{
12080   predicate(!VM_Version::supports_bmi2());
12081   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12082   effect(KILL cr);
12083 
12084   format %{ "sarl    $dst, $shift" %}
12085   ins_encode %{
12086     __ sarl($dst$$Address);
12087   %}
12088   ins_pipe(ialu_mem_reg);
12089 %}
12090 
12091 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12092 %{
12093   predicate(VM_Version::supports_bmi2());
12094   match(Set dst (RShiftI src shift));
12095 
12096   format %{ "sarxl   $dst, $src, $shift" %}
12097   ins_encode %{
12098     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12099   %}
12100   ins_pipe(ialu_reg_reg);
12101 %}
12102 
12103 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12104 %{
12105   predicate(VM_Version::supports_bmi2());
12106   match(Set dst (RShiftI (LoadI src) shift));
12107   ins_cost(175);
12108   format %{ "sarxl   $dst, $src, $shift" %}
12109   ins_encode %{
12110     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12111   %}
12112   ins_pipe(ialu_reg_mem);
12113 %}
12114 
12115 // Logical Shift Right by 8-bit immediate
12116 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12117 %{
12118   predicate(!UseAPX);
12119   match(Set dst (URShiftI dst shift));
12120   effect(KILL cr);
12121 
12122   format %{ "shrl    $dst, $shift" %}
12123   ins_encode %{
12124     __ shrl($dst$$Register, $shift$$constant);
12125   %}
12126   ins_pipe(ialu_reg);
12127 %}
12128 
12129 // Logical Shift Right by 8-bit immediate
12130 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12131 %{
12132   predicate(UseAPX);
12133   match(Set dst (URShiftI src shift));
12134   effect(KILL cr);
12135 
12136   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12137   ins_encode %{
12138     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12139   %}
12140   ins_pipe(ialu_reg);
12141 %}
12142 
12143 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12144 %{
12145   predicate(UseAPX);
12146   match(Set dst (URShiftI (LoadI src) shift));
12147   effect(KILL cr);
12148 
12149   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12150   ins_encode %{
12151     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12152   %}
12153   ins_pipe(ialu_reg);
12154 %}
12155 
12156 // Logical Shift Right by 8-bit immediate
12157 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12158 %{
12159   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12160   effect(KILL cr);
12161 
12162   format %{ "shrl    $dst, $shift" %}
12163   ins_encode %{
12164     __ shrl($dst$$Address, $shift$$constant);
12165   %}
12166   ins_pipe(ialu_mem_imm);
12167 %}
12168 
12169 // Logical Shift Right by variable
12170 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12171 %{
12172   predicate(!VM_Version::supports_bmi2());
12173   match(Set dst (URShiftI dst shift));
12174   effect(KILL cr);
12175 
12176   format %{ "shrl    $dst, $shift" %}
12177   ins_encode %{
12178     __ shrl($dst$$Register);
12179   %}
12180   ins_pipe(ialu_reg_reg);
12181 %}
12182 
12183 // Logical Shift Right by variable
12184 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12185 %{
12186   predicate(!VM_Version::supports_bmi2());
12187   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12188   effect(KILL cr);
12189 
12190   format %{ "shrl    $dst, $shift" %}
12191   ins_encode %{
12192     __ shrl($dst$$Address);
12193   %}
12194   ins_pipe(ialu_mem_reg);
12195 %}
12196 
12197 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12198 %{
12199   predicate(VM_Version::supports_bmi2());
12200   match(Set dst (URShiftI src shift));
12201 
12202   format %{ "shrxl   $dst, $src, $shift" %}
12203   ins_encode %{
12204     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12205   %}
12206   ins_pipe(ialu_reg_reg);
12207 %}
12208 
12209 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12210 %{
12211   predicate(VM_Version::supports_bmi2());
12212   match(Set dst (URShiftI (LoadI src) shift));
12213   ins_cost(175);
12214   format %{ "shrxl   $dst, $src, $shift" %}
12215   ins_encode %{
12216     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12217   %}
12218   ins_pipe(ialu_reg_mem);
12219 %}
12220 
12221 // Long Shift Instructions
12222 // Shift Left by one, two, three
12223 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12224 %{
12225   predicate(!UseAPX);
12226   match(Set dst (LShiftL dst shift));
12227   effect(KILL cr);
12228 
12229   format %{ "salq    $dst, $shift" %}
12230   ins_encode %{
12231     __ salq($dst$$Register, $shift$$constant);
12232   %}
12233   ins_pipe(ialu_reg);
12234 %}
12235 
12236 // Shift Left by one, two, three
12237 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12238 %{
12239   predicate(UseAPX);
12240   match(Set dst (LShiftL src shift));
12241   effect(KILL cr);
12242 
12243   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12244   ins_encode %{
12245     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12246   %}
12247   ins_pipe(ialu_reg);
12248 %}
12249 
12250 // Shift Left by 8-bit immediate
12251 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12252 %{
12253   predicate(!UseAPX);
12254   match(Set dst (LShiftL dst shift));
12255   effect(KILL cr);
12256 
12257   format %{ "salq    $dst, $shift" %}
12258   ins_encode %{
12259     __ salq($dst$$Register, $shift$$constant);
12260   %}
12261   ins_pipe(ialu_reg);
12262 %}
12263 
12264 // Shift Left by 8-bit immediate
12265 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12266 %{
12267   predicate(UseAPX);
12268   match(Set dst (LShiftL src shift));
12269   effect(KILL cr);
12270 
12271   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12272   ins_encode %{
12273     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12274   %}
12275   ins_pipe(ialu_reg);
12276 %}
12277 
12278 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12279 %{
12280   predicate(UseAPX);
12281   match(Set dst (LShiftL (LoadL src) shift));
12282   effect(KILL cr);
12283 
12284   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12285   ins_encode %{
12286     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12287   %}
12288   ins_pipe(ialu_reg);
12289 %}
12290 
12291 // Shift Left by 8-bit immediate
12292 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12293 %{
12294   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12295   effect(KILL cr);
12296 
12297   format %{ "salq    $dst, $shift" %}
12298   ins_encode %{
12299     __ salq($dst$$Address, $shift$$constant);
12300   %}
12301   ins_pipe(ialu_mem_imm);
12302 %}
12303 
12304 // Shift Left by variable
12305 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12306 %{
12307   predicate(!VM_Version::supports_bmi2());
12308   match(Set dst (LShiftL dst shift));
12309   effect(KILL cr);
12310 
12311   format %{ "salq    $dst, $shift" %}
12312   ins_encode %{
12313     __ salq($dst$$Register);
12314   %}
12315   ins_pipe(ialu_reg_reg);
12316 %}
12317 
12318 // Shift Left by variable
12319 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12320 %{
12321   predicate(!VM_Version::supports_bmi2());
12322   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12323   effect(KILL cr);
12324 
12325   format %{ "salq    $dst, $shift" %}
12326   ins_encode %{
12327     __ salq($dst$$Address);
12328   %}
12329   ins_pipe(ialu_mem_reg);
12330 %}
12331 
12332 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12333 %{
12334   predicate(VM_Version::supports_bmi2());
12335   match(Set dst (LShiftL src shift));
12336 
12337   format %{ "shlxq   $dst, $src, $shift" %}
12338   ins_encode %{
12339     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12340   %}
12341   ins_pipe(ialu_reg_reg);
12342 %}
12343 
12344 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12345 %{
12346   predicate(VM_Version::supports_bmi2());
12347   match(Set dst (LShiftL (LoadL src) shift));
12348   ins_cost(175);
12349   format %{ "shlxq   $dst, $src, $shift" %}
12350   ins_encode %{
12351     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12352   %}
12353   ins_pipe(ialu_reg_mem);
12354 %}
12355 
12356 // Arithmetic Shift Right by 8-bit immediate
12357 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12358 %{
12359   predicate(!UseAPX);
12360   match(Set dst (RShiftL dst shift));
12361   effect(KILL cr);
12362 
12363   format %{ "sarq    $dst, $shift" %}
12364   ins_encode %{
12365     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12366   %}
12367   ins_pipe(ialu_mem_imm);
12368 %}
12369 
12370 // Arithmetic Shift Right by 8-bit immediate
12371 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12372 %{
12373   predicate(UseAPX);
12374   match(Set dst (RShiftL src shift));
12375   effect(KILL cr);
12376 
12377   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12378   ins_encode %{
12379     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12380   %}
12381   ins_pipe(ialu_mem_imm);
12382 %}
12383 
12384 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12385 %{
12386   predicate(UseAPX);
12387   match(Set dst (RShiftL (LoadL src) shift));
12388   effect(KILL cr);
12389 
12390   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12391   ins_encode %{
12392     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12393   %}
12394   ins_pipe(ialu_mem_imm);
12395 %}
12396 
12397 // Arithmetic Shift Right by 8-bit immediate
12398 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12399 %{
12400   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12401   effect(KILL cr);
12402 
12403   format %{ "sarq    $dst, $shift" %}
12404   ins_encode %{
12405     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12406   %}
12407   ins_pipe(ialu_mem_imm);
12408 %}
12409 
12410 // Arithmetic Shift Right by variable
12411 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12412 %{
12413   predicate(!VM_Version::supports_bmi2());
12414   match(Set dst (RShiftL dst shift));
12415   effect(KILL cr);
12416 
12417   format %{ "sarq    $dst, $shift" %}
12418   ins_encode %{
12419     __ sarq($dst$$Register);
12420   %}
12421   ins_pipe(ialu_reg_reg);
12422 %}
12423 
12424 // Arithmetic Shift Right by variable
12425 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12426 %{
12427   predicate(!VM_Version::supports_bmi2());
12428   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12429   effect(KILL cr);
12430 
12431   format %{ "sarq    $dst, $shift" %}
12432   ins_encode %{
12433     __ sarq($dst$$Address);
12434   %}
12435   ins_pipe(ialu_mem_reg);
12436 %}
12437 
12438 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12439 %{
12440   predicate(VM_Version::supports_bmi2());
12441   match(Set dst (RShiftL src shift));
12442 
12443   format %{ "sarxq   $dst, $src, $shift" %}
12444   ins_encode %{
12445     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12446   %}
12447   ins_pipe(ialu_reg_reg);
12448 %}
12449 
12450 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12451 %{
12452   predicate(VM_Version::supports_bmi2());
12453   match(Set dst (RShiftL (LoadL src) shift));
12454   ins_cost(175);
12455   format %{ "sarxq   $dst, $src, $shift" %}
12456   ins_encode %{
12457     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12458   %}
12459   ins_pipe(ialu_reg_mem);
12460 %}
12461 
12462 // Logical Shift Right by 8-bit immediate
12463 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12464 %{
12465   predicate(!UseAPX);
12466   match(Set dst (URShiftL dst shift));
12467   effect(KILL cr);
12468 
12469   format %{ "shrq    $dst, $shift" %}
12470   ins_encode %{
12471     __ shrq($dst$$Register, $shift$$constant);
12472   %}
12473   ins_pipe(ialu_reg);
12474 %}
12475 
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12478 %{
12479   predicate(UseAPX);
12480   match(Set dst (URShiftL src shift));
12481   effect(KILL cr);
12482 
12483   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12484   ins_encode %{
12485     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12486   %}
12487   ins_pipe(ialu_reg);
12488 %}
12489 
12490 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12491 %{
12492   predicate(UseAPX);
12493   match(Set dst (URShiftL (LoadL src) shift));
12494   effect(KILL cr);
12495 
12496   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12497   ins_encode %{
12498     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12499   %}
12500   ins_pipe(ialu_reg);
12501 %}
12502 
12503 // Logical Shift Right by 8-bit immediate
12504 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12505 %{
12506   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12507   effect(KILL cr);
12508 
12509   format %{ "shrq    $dst, $shift" %}
12510   ins_encode %{
12511     __ shrq($dst$$Address, $shift$$constant);
12512   %}
12513   ins_pipe(ialu_mem_imm);
12514 %}
12515 
12516 // Logical Shift Right by variable
12517 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12518 %{
12519   predicate(!VM_Version::supports_bmi2());
12520   match(Set dst (URShiftL dst shift));
12521   effect(KILL cr);
12522 
12523   format %{ "shrq    $dst, $shift" %}
12524   ins_encode %{
12525     __ shrq($dst$$Register);
12526   %}
12527   ins_pipe(ialu_reg_reg);
12528 %}
12529 
12530 // Logical Shift Right by variable
12531 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12532 %{
12533   predicate(!VM_Version::supports_bmi2());
12534   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12535   effect(KILL cr);
12536 
12537   format %{ "shrq    $dst, $shift" %}
12538   ins_encode %{
12539     __ shrq($dst$$Address);
12540   %}
12541   ins_pipe(ialu_mem_reg);
12542 %}
12543 
12544 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12545 %{
12546   predicate(VM_Version::supports_bmi2());
12547   match(Set dst (URShiftL src shift));
12548 
12549   format %{ "shrxq   $dst, $src, $shift" %}
12550   ins_encode %{
12551     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12552   %}
12553   ins_pipe(ialu_reg_reg);
12554 %}
12555 
12556 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12557 %{
12558   predicate(VM_Version::supports_bmi2());
12559   match(Set dst (URShiftL (LoadL src) shift));
12560   ins_cost(175);
12561   format %{ "shrxq   $dst, $src, $shift" %}
12562   ins_encode %{
12563     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12564   %}
12565   ins_pipe(ialu_reg_mem);
12566 %}
12567 
12568 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12569 // This idiom is used by the compiler for the i2b bytecode.
12570 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12571 %{
12572   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12573 
12574   format %{ "movsbl  $dst, $src\t# i2b" %}
12575   ins_encode %{
12576     __ movsbl($dst$$Register, $src$$Register);
12577   %}
12578   ins_pipe(ialu_reg_reg);
12579 %}
12580 
12581 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12582 // This idiom is used by the compiler the i2s bytecode.
12583 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12584 %{
12585   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12586 
12587   format %{ "movswl  $dst, $src\t# i2s" %}
12588   ins_encode %{
12589     __ movswl($dst$$Register, $src$$Register);
12590   %}
12591   ins_pipe(ialu_reg_reg);
12592 %}
12593 
12594 // ROL/ROR instructions
12595 
12596 // Rotate left by constant.
12597 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12598 %{
12599   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12600   match(Set dst (RotateLeft dst shift));
12601   effect(KILL cr);
12602   format %{ "roll    $dst, $shift" %}
12603   ins_encode %{
12604     __ roll($dst$$Register, $shift$$constant);
12605   %}
12606   ins_pipe(ialu_reg);
12607 %}
12608 
12609 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12610 %{
12611   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12612   match(Set dst (RotateLeft src shift));
12613   format %{ "rolxl   $dst, $src, $shift" %}
12614   ins_encode %{
12615     int shift = 32 - ($shift$$constant & 31);
12616     __ rorxl($dst$$Register, $src$$Register, shift);
12617   %}
12618   ins_pipe(ialu_reg_reg);
12619 %}
12620 
12621 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12622 %{
12623   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12624   match(Set dst (RotateLeft (LoadI src) shift));
12625   ins_cost(175);
12626   format %{ "rolxl   $dst, $src, $shift" %}
12627   ins_encode %{
12628     int shift = 32 - ($shift$$constant & 31);
12629     __ rorxl($dst$$Register, $src$$Address, shift);
12630   %}
12631   ins_pipe(ialu_reg_mem);
12632 %}
12633 
12634 // Rotate Left by variable
12635 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12636 %{
12637   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12638   match(Set dst (RotateLeft dst shift));
12639   effect(KILL cr);
12640   format %{ "roll    $dst, $shift" %}
12641   ins_encode %{
12642     __ roll($dst$$Register);
12643   %}
12644   ins_pipe(ialu_reg_reg);
12645 %}
12646 
12647 // Rotate Left by variable
12648 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12649 %{
12650   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12651   match(Set dst (RotateLeft src shift));
12652   effect(KILL cr);
12653 
12654   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12655   ins_encode %{
12656     __ eroll($dst$$Register, $src$$Register, false);
12657   %}
12658   ins_pipe(ialu_reg_reg);
12659 %}
12660 
12661 // Rotate Right by constant.
12662 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12663 %{
12664   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12665   match(Set dst (RotateRight dst shift));
12666   effect(KILL cr);
12667   format %{ "rorl    $dst, $shift" %}
12668   ins_encode %{
12669     __ rorl($dst$$Register, $shift$$constant);
12670   %}
12671   ins_pipe(ialu_reg);
12672 %}
12673 
12674 // Rotate Right by constant.
12675 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12676 %{
12677   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12678   match(Set dst (RotateRight src shift));
12679   format %{ "rorxl   $dst, $src, $shift" %}
12680   ins_encode %{
12681     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12682   %}
12683   ins_pipe(ialu_reg_reg);
12684 %}
12685 
12686 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12687 %{
12688   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12689   match(Set dst (RotateRight (LoadI src) shift));
12690   ins_cost(175);
12691   format %{ "rorxl   $dst, $src, $shift" %}
12692   ins_encode %{
12693     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12694   %}
12695   ins_pipe(ialu_reg_mem);
12696 %}
12697 
12698 // Rotate Right by variable
12699 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12700 %{
12701   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12702   match(Set dst (RotateRight dst shift));
12703   effect(KILL cr);
12704   format %{ "rorl    $dst, $shift" %}
12705   ins_encode %{
12706     __ rorl($dst$$Register);
12707   %}
12708   ins_pipe(ialu_reg_reg);
12709 %}
12710 
12711 // Rotate Right by variable
12712 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12713 %{
12714   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12715   match(Set dst (RotateRight src shift));
12716   effect(KILL cr);
12717 
12718   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12719   ins_encode %{
12720     __ erorl($dst$$Register, $src$$Register, false);
12721   %}
12722   ins_pipe(ialu_reg_reg);
12723 %}
12724 
12725 // Rotate Left by constant.
12726 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12727 %{
12728   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12729   match(Set dst (RotateLeft dst shift));
12730   effect(KILL cr);
12731   format %{ "rolq    $dst, $shift" %}
12732   ins_encode %{
12733     __ rolq($dst$$Register, $shift$$constant);
12734   %}
12735   ins_pipe(ialu_reg);
12736 %}
12737 
12738 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12739 %{
12740   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12741   match(Set dst (RotateLeft src shift));
12742   format %{ "rolxq   $dst, $src, $shift" %}
12743   ins_encode %{
12744     int shift = 64 - ($shift$$constant & 63);
12745     __ rorxq($dst$$Register, $src$$Register, shift);
12746   %}
12747   ins_pipe(ialu_reg_reg);
12748 %}
12749 
12750 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12751 %{
12752   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12753   match(Set dst (RotateLeft (LoadL src) shift));
12754   ins_cost(175);
12755   format %{ "rolxq   $dst, $src, $shift" %}
12756   ins_encode %{
12757     int shift = 64 - ($shift$$constant & 63);
12758     __ rorxq($dst$$Register, $src$$Address, shift);
12759   %}
12760   ins_pipe(ialu_reg_mem);
12761 %}
12762 
12763 // Rotate Left by variable
12764 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12765 %{
12766   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12767   match(Set dst (RotateLeft dst shift));
12768   effect(KILL cr);
12769   format %{ "rolq    $dst, $shift" %}
12770   ins_encode %{
12771     __ rolq($dst$$Register);
12772   %}
12773   ins_pipe(ialu_reg_reg);
12774 %}
12775 
12776 // Rotate Left by variable
12777 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12778 %{
12779   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12780   match(Set dst (RotateLeft src shift));
12781   effect(KILL cr);
12782 
12783   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12784   ins_encode %{
12785     __ erolq($dst$$Register, $src$$Register, false);
12786   %}
12787   ins_pipe(ialu_reg_reg);
12788 %}
12789 
12790 // Rotate Right by constant.
12791 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12792 %{
12793   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12794   match(Set dst (RotateRight dst shift));
12795   effect(KILL cr);
12796   format %{ "rorq    $dst, $shift" %}
12797   ins_encode %{
12798     __ rorq($dst$$Register, $shift$$constant);
12799   %}
12800   ins_pipe(ialu_reg);
12801 %}
12802 
12803 // Rotate Right by constant
12804 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12805 %{
12806   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12807   match(Set dst (RotateRight src shift));
12808   format %{ "rorxq   $dst, $src, $shift" %}
12809   ins_encode %{
12810     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12811   %}
12812   ins_pipe(ialu_reg_reg);
12813 %}
12814 
12815 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12816 %{
12817   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12818   match(Set dst (RotateRight (LoadL src) shift));
12819   ins_cost(175);
12820   format %{ "rorxq   $dst, $src, $shift" %}
12821   ins_encode %{
12822     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12823   %}
12824   ins_pipe(ialu_reg_mem);
12825 %}
12826 
12827 // Rotate Right by variable
12828 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12831   match(Set dst (RotateRight dst shift));
12832   effect(KILL cr);
12833   format %{ "rorq    $dst, $shift" %}
12834   ins_encode %{
12835     __ rorq($dst$$Register);
12836   %}
12837   ins_pipe(ialu_reg_reg);
12838 %}
12839 
12840 // Rotate Right by variable
12841 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12842 %{
12843   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12844   match(Set dst (RotateRight src shift));
12845   effect(KILL cr);
12846 
12847   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12848   ins_encode %{
12849     __ erorq($dst$$Register, $src$$Register, false);
12850   %}
12851   ins_pipe(ialu_reg_reg);
12852 %}
12853 
12854 //----------------------------- CompressBits/ExpandBits ------------------------
12855 
12856 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12857   predicate(n->bottom_type()->isa_long());
12858   match(Set dst (CompressBits src mask));
12859   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12860   ins_encode %{
12861     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12862   %}
12863   ins_pipe( pipe_slow );
12864 %}
12865 
12866 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12867   predicate(n->bottom_type()->isa_long());
12868   match(Set dst (ExpandBits src mask));
12869   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12870   ins_encode %{
12871     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12872   %}
12873   ins_pipe( pipe_slow );
12874 %}
12875 
12876 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12877   predicate(n->bottom_type()->isa_long());
12878   match(Set dst (CompressBits src (LoadL mask)));
12879   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12880   ins_encode %{
12881     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12882   %}
12883   ins_pipe( pipe_slow );
12884 %}
12885 
12886 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12887   predicate(n->bottom_type()->isa_long());
12888   match(Set dst (ExpandBits src (LoadL mask)));
12889   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12890   ins_encode %{
12891     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12892   %}
12893   ins_pipe( pipe_slow );
12894 %}
12895 
12896 
12897 // Logical Instructions
12898 
12899 // Integer Logical Instructions
12900 
12901 // And Instructions
12902 // And Register with Register
12903 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12904 %{
12905   predicate(!UseAPX);
12906   match(Set dst (AndI dst src));
12907   effect(KILL cr);
12908   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12909 
12910   format %{ "andl    $dst, $src\t# int" %}
12911   ins_encode %{
12912     __ andl($dst$$Register, $src$$Register);
12913   %}
12914   ins_pipe(ialu_reg_reg);
12915 %}
12916 
12917 // And Register with Register using New Data Destination (NDD)
12918 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12919 %{
12920   predicate(UseAPX);
12921   match(Set dst (AndI src1 src2));
12922   effect(KILL cr);
12923   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12924 
12925   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12926   ins_encode %{
12927     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12928 
12929   %}
12930   ins_pipe(ialu_reg_reg);
12931 %}
12932 
12933 // And Register with Immediate 255
12934 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12935 %{
12936   match(Set dst (AndI src mask));
12937 
12938   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12939   ins_encode %{
12940     __ movzbl($dst$$Register, $src$$Register);
12941   %}
12942   ins_pipe(ialu_reg);
12943 %}
12944 
12945 // And Register with Immediate 255 and promote to long
12946 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12947 %{
12948   match(Set dst (ConvI2L (AndI src mask)));
12949 
12950   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12951   ins_encode %{
12952     __ movzbl($dst$$Register, $src$$Register);
12953   %}
12954   ins_pipe(ialu_reg);
12955 %}
12956 
12957 // And Register with Immediate 65535
12958 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12959 %{
12960   match(Set dst (AndI src mask));
12961 
12962   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12963   ins_encode %{
12964     __ movzwl($dst$$Register, $src$$Register);
12965   %}
12966   ins_pipe(ialu_reg);
12967 %}
12968 
12969 // And Register with Immediate 65535 and promote to long
12970 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12971 %{
12972   match(Set dst (ConvI2L (AndI src mask)));
12973 
12974   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12975   ins_encode %{
12976     __ movzwl($dst$$Register, $src$$Register);
12977   %}
12978   ins_pipe(ialu_reg);
12979 %}
12980 
12981 // Can skip int2long conversions after AND with small bitmask
12982 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12983 %{
12984   predicate(VM_Version::supports_bmi2());
12985   ins_cost(125);
12986   effect(TEMP tmp, KILL cr);
12987   match(Set dst (ConvI2L (AndI src mask)));
12988   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12989   ins_encode %{
12990     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12991     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12992   %}
12993   ins_pipe(ialu_reg_reg);
12994 %}
12995 
12996 // And Register with Immediate
12997 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12998 %{
12999   predicate(!UseAPX);
13000   match(Set dst (AndI dst src));
13001   effect(KILL cr);
13002   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13003 
13004   format %{ "andl    $dst, $src\t# int" %}
13005   ins_encode %{
13006     __ andl($dst$$Register, $src$$constant);
13007   %}
13008   ins_pipe(ialu_reg);
13009 %}
13010 
13011 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13012 %{
13013   predicate(UseAPX);
13014   match(Set dst (AndI src1 src2));
13015   effect(KILL cr);
13016   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13017 
13018   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13019   ins_encode %{
13020     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13021   %}
13022   ins_pipe(ialu_reg);
13023 %}
13024 
13025 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13026 %{
13027   predicate(UseAPX);
13028   match(Set dst (AndI (LoadI src1) src2));
13029   effect(KILL cr);
13030   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13031 
13032   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13033   ins_encode %{
13034     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13035   %}
13036   ins_pipe(ialu_reg);
13037 %}
13038 
13039 // And Register with Memory
13040 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13041 %{
13042   predicate(!UseAPX);
13043   match(Set dst (AndI dst (LoadI src)));
13044   effect(KILL cr);
13045   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13046 
13047   ins_cost(150);
13048   format %{ "andl    $dst, $src\t# int" %}
13049   ins_encode %{
13050     __ andl($dst$$Register, $src$$Address);
13051   %}
13052   ins_pipe(ialu_reg_mem);
13053 %}
13054 
13055 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13056 %{
13057   predicate(UseAPX);
13058   match(Set dst (AndI src1 (LoadI src2)));
13059   effect(KILL cr);
13060   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13061 
13062   ins_cost(150);
13063   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13064   ins_encode %{
13065     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13066   %}
13067   ins_pipe(ialu_reg_mem);
13068 %}
13069 
13070 // And Memory with Register
13071 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13072 %{
13073   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13074   effect(KILL cr);
13075   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13076 
13077   ins_cost(150);
13078   format %{ "andb    $dst, $src\t# byte" %}
13079   ins_encode %{
13080     __ andb($dst$$Address, $src$$Register);
13081   %}
13082   ins_pipe(ialu_mem_reg);
13083 %}
13084 
13085 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13086 %{
13087   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13088   effect(KILL cr);
13089   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13090 
13091   ins_cost(150);
13092   format %{ "andl    $dst, $src\t# int" %}
13093   ins_encode %{
13094     __ andl($dst$$Address, $src$$Register);
13095   %}
13096   ins_pipe(ialu_mem_reg);
13097 %}
13098 
13099 // And Memory with Immediate
13100 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13101 %{
13102   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13103   effect(KILL cr);
13104   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13105 
13106   ins_cost(125);
13107   format %{ "andl    $dst, $src\t# int" %}
13108   ins_encode %{
13109     __ andl($dst$$Address, $src$$constant);
13110   %}
13111   ins_pipe(ialu_mem_imm);
13112 %}
13113 
13114 // BMI1 instructions
13115 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13116   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13117   predicate(UseBMI1Instructions);
13118   effect(KILL cr);
13119   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13120 
13121   ins_cost(125);
13122   format %{ "andnl  $dst, $src1, $src2" %}
13123 
13124   ins_encode %{
13125     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13126   %}
13127   ins_pipe(ialu_reg_mem);
13128 %}
13129 
13130 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13131   match(Set dst (AndI (XorI src1 minus_1) src2));
13132   predicate(UseBMI1Instructions);
13133   effect(KILL cr);
13134   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13135 
13136   format %{ "andnl  $dst, $src1, $src2" %}
13137 
13138   ins_encode %{
13139     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13140   %}
13141   ins_pipe(ialu_reg);
13142 %}
13143 
13144 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13145   match(Set dst (AndI (SubI imm_zero src) src));
13146   predicate(UseBMI1Instructions);
13147   effect(KILL cr);
13148   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13149 
13150   format %{ "blsil  $dst, $src" %}
13151 
13152   ins_encode %{
13153     __ blsil($dst$$Register, $src$$Register);
13154   %}
13155   ins_pipe(ialu_reg);
13156 %}
13157 
13158 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13159   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13160   predicate(UseBMI1Instructions);
13161   effect(KILL cr);
13162   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13163 
13164   ins_cost(125);
13165   format %{ "blsil  $dst, $src" %}
13166 
13167   ins_encode %{
13168     __ blsil($dst$$Register, $src$$Address);
13169   %}
13170   ins_pipe(ialu_reg_mem);
13171 %}
13172 
13173 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13174 %{
13175   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13176   predicate(UseBMI1Instructions);
13177   effect(KILL cr);
13178   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13179 
13180   ins_cost(125);
13181   format %{ "blsmskl $dst, $src" %}
13182 
13183   ins_encode %{
13184     __ blsmskl($dst$$Register, $src$$Address);
13185   %}
13186   ins_pipe(ialu_reg_mem);
13187 %}
13188 
13189 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13190 %{
13191   match(Set dst (XorI (AddI src minus_1) src));
13192   predicate(UseBMI1Instructions);
13193   effect(KILL cr);
13194   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13195 
13196   format %{ "blsmskl $dst, $src" %}
13197 
13198   ins_encode %{
13199     __ blsmskl($dst$$Register, $src$$Register);
13200   %}
13201 
13202   ins_pipe(ialu_reg);
13203 %}
13204 
13205 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13206 %{
13207   match(Set dst (AndI (AddI src minus_1) src) );
13208   predicate(UseBMI1Instructions);
13209   effect(KILL cr);
13210   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13211 
13212   format %{ "blsrl  $dst, $src" %}
13213 
13214   ins_encode %{
13215     __ blsrl($dst$$Register, $src$$Register);
13216   %}
13217 
13218   ins_pipe(ialu_reg_mem);
13219 %}
13220 
13221 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13222 %{
13223   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13224   predicate(UseBMI1Instructions);
13225   effect(KILL cr);
13226   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13227 
13228   ins_cost(125);
13229   format %{ "blsrl  $dst, $src" %}
13230 
13231   ins_encode %{
13232     __ blsrl($dst$$Register, $src$$Address);
13233   %}
13234 
13235   ins_pipe(ialu_reg);
13236 %}
13237 
13238 // Or Instructions
13239 // Or Register with Register
13240 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13241 %{
13242   predicate(!UseAPX);
13243   match(Set dst (OrI dst src));
13244   effect(KILL cr);
13245   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13246 
13247   format %{ "orl     $dst, $src\t# int" %}
13248   ins_encode %{
13249     __ orl($dst$$Register, $src$$Register);
13250   %}
13251   ins_pipe(ialu_reg_reg);
13252 %}
13253 
13254 // Or Register with Register using New Data Destination (NDD)
13255 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13256 %{
13257   predicate(UseAPX);
13258   match(Set dst (OrI src1 src2));
13259   effect(KILL cr);
13260   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13261 
13262   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13263   ins_encode %{
13264     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13265   %}
13266   ins_pipe(ialu_reg_reg);
13267 %}
13268 
13269 // Or Register with Immediate
13270 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13271 %{
13272   predicate(!UseAPX);
13273   match(Set dst (OrI dst src));
13274   effect(KILL cr);
13275   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13276 
13277   format %{ "orl     $dst, $src\t# int" %}
13278   ins_encode %{
13279     __ orl($dst$$Register, $src$$constant);
13280   %}
13281   ins_pipe(ialu_reg);
13282 %}
13283 
13284 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13285 %{
13286   predicate(UseAPX);
13287   match(Set dst (OrI src1 src2));
13288   effect(KILL cr);
13289   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13290 
13291   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13292   ins_encode %{
13293     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13294   %}
13295   ins_pipe(ialu_reg);
13296 %}
13297 
13298 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13299 %{
13300   predicate(UseAPX);
13301   match(Set dst (OrI src1 src2));
13302   effect(KILL cr);
13303   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13304 
13305   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13306   ins_encode %{
13307     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13308   %}
13309   ins_pipe(ialu_reg);
13310 %}
13311 
13312 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13313 %{
13314   predicate(UseAPX);
13315   match(Set dst (OrI (LoadI src1) src2));
13316   effect(KILL cr);
13317   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318 
13319   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13320   ins_encode %{
13321     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13322   %}
13323   ins_pipe(ialu_reg);
13324 %}
13325 
13326 // Or Register with Memory
13327 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13328 %{
13329   predicate(!UseAPX);
13330   match(Set dst (OrI dst (LoadI src)));
13331   effect(KILL cr);
13332   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13333 
13334   ins_cost(150);
13335   format %{ "orl     $dst, $src\t# int" %}
13336   ins_encode %{
13337     __ orl($dst$$Register, $src$$Address);
13338   %}
13339   ins_pipe(ialu_reg_mem);
13340 %}
13341 
13342 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13343 %{
13344   predicate(UseAPX);
13345   match(Set dst (OrI src1 (LoadI src2)));
13346   effect(KILL cr);
13347   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13348 
13349   ins_cost(150);
13350   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13351   ins_encode %{
13352     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13353   %}
13354   ins_pipe(ialu_reg_mem);
13355 %}
13356 
13357 // Or Memory with Register
13358 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13359 %{
13360   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13361   effect(KILL cr);
13362   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13363 
13364   ins_cost(150);
13365   format %{ "orb    $dst, $src\t# byte" %}
13366   ins_encode %{
13367     __ orb($dst$$Address, $src$$Register);
13368   %}
13369   ins_pipe(ialu_mem_reg);
13370 %}
13371 
13372 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13373 %{
13374   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13375   effect(KILL cr);
13376   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13377 
13378   ins_cost(150);
13379   format %{ "orl     $dst, $src\t# int" %}
13380   ins_encode %{
13381     __ orl($dst$$Address, $src$$Register);
13382   %}
13383   ins_pipe(ialu_mem_reg);
13384 %}
13385 
13386 // Or Memory with Immediate
13387 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13388 %{
13389   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13390   effect(KILL cr);
13391   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13392 
13393   ins_cost(125);
13394   format %{ "orl     $dst, $src\t# int" %}
13395   ins_encode %{
13396     __ orl($dst$$Address, $src$$constant);
13397   %}
13398   ins_pipe(ialu_mem_imm);
13399 %}
13400 
13401 // Xor Instructions
13402 // Xor Register with Register
13403 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13404 %{
13405   predicate(!UseAPX);
13406   match(Set dst (XorI dst src));
13407   effect(KILL cr);
13408   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13409 
13410   format %{ "xorl    $dst, $src\t# int" %}
13411   ins_encode %{
13412     __ xorl($dst$$Register, $src$$Register);
13413   %}
13414   ins_pipe(ialu_reg_reg);
13415 %}
13416 
13417 // Xor Register with Register using New Data Destination (NDD)
13418 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13419 %{
13420   predicate(UseAPX);
13421   match(Set dst (XorI src1 src2));
13422   effect(KILL cr);
13423   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424 
13425   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13426   ins_encode %{
13427     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13428   %}
13429   ins_pipe(ialu_reg_reg);
13430 %}
13431 
13432 // Xor Register with Immediate -1
13433 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13434 %{
13435   predicate(!UseAPX);
13436   match(Set dst (XorI dst imm));
13437 
13438   format %{ "notl    $dst" %}
13439   ins_encode %{
13440      __ notl($dst$$Register);
13441   %}
13442   ins_pipe(ialu_reg);
13443 %}
13444 
13445 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13446 %{
13447   match(Set dst (XorI src imm));
13448   predicate(UseAPX);
13449 
13450   format %{ "enotl    $dst, $src" %}
13451   ins_encode %{
13452      __ enotl($dst$$Register, $src$$Register);
13453   %}
13454   ins_pipe(ialu_reg);
13455 %}
13456 
13457 // Xor Register with Immediate
13458 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13459 %{
13460   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13461   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13462   match(Set dst (XorI dst src));
13463   effect(KILL cr);
13464   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13465 
13466   format %{ "xorl    $dst, $src\t# int" %}
13467   ins_encode %{
13468     __ xorl($dst$$Register, $src$$constant);
13469   %}
13470   ins_pipe(ialu_reg);
13471 %}
13472 
13473 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13474 %{
13475   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13476   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13477   match(Set dst (XorI src1 src2));
13478   effect(KILL cr);
13479   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13480 
13481   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13482   ins_encode %{
13483     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13484   %}
13485   ins_pipe(ialu_reg);
13486 %}
13487 
13488 // Xor Memory with Immediate
13489 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13490 %{
13491   predicate(UseAPX);
13492   match(Set dst (XorI (LoadI src1) src2));
13493   effect(KILL cr);
13494   ins_cost(150);
13495   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13496 
13497   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13498   ins_encode %{
13499     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13500   %}
13501   ins_pipe(ialu_reg);
13502 %}
13503 
13504 // Xor Register with Memory
13505 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13506 %{
13507   predicate(!UseAPX);
13508   match(Set dst (XorI dst (LoadI src)));
13509   effect(KILL cr);
13510   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13511 
13512   ins_cost(150);
13513   format %{ "xorl    $dst, $src\t# int" %}
13514   ins_encode %{
13515     __ xorl($dst$$Register, $src$$Address);
13516   %}
13517   ins_pipe(ialu_reg_mem);
13518 %}
13519 
13520 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13521 %{
13522   predicate(UseAPX);
13523   match(Set dst (XorI src1 (LoadI src2)));
13524   effect(KILL cr);
13525   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13526 
13527   ins_cost(150);
13528   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13529   ins_encode %{
13530     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13531   %}
13532   ins_pipe(ialu_reg_mem);
13533 %}
13534 
13535 // Xor Memory with Register
13536 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13537 %{
13538   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13539   effect(KILL cr);
13540   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13541 
13542   ins_cost(150);
13543   format %{ "xorb    $dst, $src\t# byte" %}
13544   ins_encode %{
13545     __ xorb($dst$$Address, $src$$Register);
13546   %}
13547   ins_pipe(ialu_mem_reg);
13548 %}
13549 
13550 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13551 %{
13552   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13553   effect(KILL cr);
13554   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13555 
13556   ins_cost(150);
13557   format %{ "xorl    $dst, $src\t# int" %}
13558   ins_encode %{
13559     __ xorl($dst$$Address, $src$$Register);
13560   %}
13561   ins_pipe(ialu_mem_reg);
13562 %}
13563 
13564 // Xor Memory with Immediate
13565 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13566 %{
13567   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13568   effect(KILL cr);
13569   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13570 
13571   ins_cost(125);
13572   format %{ "xorl    $dst, $src\t# int" %}
13573   ins_encode %{
13574     __ xorl($dst$$Address, $src$$constant);
13575   %}
13576   ins_pipe(ialu_mem_imm);
13577 %}
13578 
13579 
13580 // Long Logical Instructions
13581 
13582 // And Instructions
13583 // And Register with Register
13584 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13585 %{
13586   predicate(!UseAPX);
13587   match(Set dst (AndL dst src));
13588   effect(KILL cr);
13589   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13590 
13591   format %{ "andq    $dst, $src\t# long" %}
13592   ins_encode %{
13593     __ andq($dst$$Register, $src$$Register);
13594   %}
13595   ins_pipe(ialu_reg_reg);
13596 %}
13597 
13598 // And Register with Register using New Data Destination (NDD)
13599 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13600 %{
13601   predicate(UseAPX);
13602   match(Set dst (AndL src1 src2));
13603   effect(KILL cr);
13604   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13605 
13606   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13607   ins_encode %{
13608     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13609 
13610   %}
13611   ins_pipe(ialu_reg_reg);
13612 %}
13613 
13614 // And Register with Immediate 255
13615 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13616 %{
13617   match(Set dst (AndL src mask));
13618 
13619   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13620   ins_encode %{
13621     // movzbl zeroes out the upper 32-bit and does not need REX.W
13622     __ movzbl($dst$$Register, $src$$Register);
13623   %}
13624   ins_pipe(ialu_reg);
13625 %}
13626 
13627 // And Register with Immediate 65535
13628 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13629 %{
13630   match(Set dst (AndL src mask));
13631 
13632   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13633   ins_encode %{
13634     // movzwl zeroes out the upper 32-bit and does not need REX.W
13635     __ movzwl($dst$$Register, $src$$Register);
13636   %}
13637   ins_pipe(ialu_reg);
13638 %}
13639 
13640 // And Register with Immediate
13641 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13642 %{
13643   predicate(!UseAPX);
13644   match(Set dst (AndL dst src));
13645   effect(KILL cr);
13646   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13647 
13648   format %{ "andq    $dst, $src\t# long" %}
13649   ins_encode %{
13650     __ andq($dst$$Register, $src$$constant);
13651   %}
13652   ins_pipe(ialu_reg);
13653 %}
13654 
13655 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13656 %{
13657   predicate(UseAPX);
13658   match(Set dst (AndL src1 src2));
13659   effect(KILL cr);
13660   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13661 
13662   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13663   ins_encode %{
13664     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13665   %}
13666   ins_pipe(ialu_reg);
13667 %}
13668 
13669 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13670 %{
13671   predicate(UseAPX);
13672   match(Set dst (AndL (LoadL src1) src2));
13673   effect(KILL cr);
13674   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13675 
13676   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13677   ins_encode %{
13678     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13679   %}
13680   ins_pipe(ialu_reg);
13681 %}
13682 
13683 // And Register with Memory
13684 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13685 %{
13686   predicate(!UseAPX);
13687   match(Set dst (AndL dst (LoadL src)));
13688   effect(KILL cr);
13689   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13690 
13691   ins_cost(150);
13692   format %{ "andq    $dst, $src\t# long" %}
13693   ins_encode %{
13694     __ andq($dst$$Register, $src$$Address);
13695   %}
13696   ins_pipe(ialu_reg_mem);
13697 %}
13698 
13699 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13700 %{
13701   predicate(UseAPX);
13702   match(Set dst (AndL src1 (LoadL src2)));
13703   effect(KILL cr);
13704   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13705 
13706   ins_cost(150);
13707   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13708   ins_encode %{
13709     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13710   %}
13711   ins_pipe(ialu_reg_mem);
13712 %}
13713 
13714 // And Memory with Register
13715 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13716 %{
13717   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13718   effect(KILL cr);
13719   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13720 
13721   ins_cost(150);
13722   format %{ "andq    $dst, $src\t# long" %}
13723   ins_encode %{
13724     __ andq($dst$$Address, $src$$Register);
13725   %}
13726   ins_pipe(ialu_mem_reg);
13727 %}
13728 
13729 // And Memory with Immediate
13730 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13731 %{
13732   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13733   effect(KILL cr);
13734   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13735 
13736   ins_cost(125);
13737   format %{ "andq    $dst, $src\t# long" %}
13738   ins_encode %{
13739     __ andq($dst$$Address, $src$$constant);
13740   %}
13741   ins_pipe(ialu_mem_imm);
13742 %}
13743 
13744 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13745 %{
13746   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13747   // because AND/OR works well enough for 8/32-bit values.
13748   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13749 
13750   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13751   effect(KILL cr);
13752 
13753   ins_cost(125);
13754   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13755   ins_encode %{
13756     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13757   %}
13758   ins_pipe(ialu_mem_imm);
13759 %}
13760 
13761 // BMI1 instructions
13762 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13763   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13764   predicate(UseBMI1Instructions);
13765   effect(KILL cr);
13766   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13767 
13768   ins_cost(125);
13769   format %{ "andnq  $dst, $src1, $src2" %}
13770 
13771   ins_encode %{
13772     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13773   %}
13774   ins_pipe(ialu_reg_mem);
13775 %}
13776 
13777 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13778   match(Set dst (AndL (XorL src1 minus_1) src2));
13779   predicate(UseBMI1Instructions);
13780   effect(KILL cr);
13781   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782 
13783   format %{ "andnq  $dst, $src1, $src2" %}
13784 
13785   ins_encode %{
13786   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13787   %}
13788   ins_pipe(ialu_reg_mem);
13789 %}
13790 
13791 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13792   match(Set dst (AndL (SubL imm_zero src) src));
13793   predicate(UseBMI1Instructions);
13794   effect(KILL cr);
13795   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13796 
13797   format %{ "blsiq  $dst, $src" %}
13798 
13799   ins_encode %{
13800     __ blsiq($dst$$Register, $src$$Register);
13801   %}
13802   ins_pipe(ialu_reg);
13803 %}
13804 
13805 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13806   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13807   predicate(UseBMI1Instructions);
13808   effect(KILL cr);
13809   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13810 
13811   ins_cost(125);
13812   format %{ "blsiq  $dst, $src" %}
13813 
13814   ins_encode %{
13815     __ blsiq($dst$$Register, $src$$Address);
13816   %}
13817   ins_pipe(ialu_reg_mem);
13818 %}
13819 
13820 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13821 %{
13822   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13823   predicate(UseBMI1Instructions);
13824   effect(KILL cr);
13825   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13826 
13827   ins_cost(125);
13828   format %{ "blsmskq $dst, $src" %}
13829 
13830   ins_encode %{
13831     __ blsmskq($dst$$Register, $src$$Address);
13832   %}
13833   ins_pipe(ialu_reg_mem);
13834 %}
13835 
13836 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13837 %{
13838   match(Set dst (XorL (AddL src minus_1) src));
13839   predicate(UseBMI1Instructions);
13840   effect(KILL cr);
13841   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13842 
13843   format %{ "blsmskq $dst, $src" %}
13844 
13845   ins_encode %{
13846     __ blsmskq($dst$$Register, $src$$Register);
13847   %}
13848 
13849   ins_pipe(ialu_reg);
13850 %}
13851 
13852 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13853 %{
13854   match(Set dst (AndL (AddL src minus_1) src) );
13855   predicate(UseBMI1Instructions);
13856   effect(KILL cr);
13857   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13858 
13859   format %{ "blsrq  $dst, $src" %}
13860 
13861   ins_encode %{
13862     __ blsrq($dst$$Register, $src$$Register);
13863   %}
13864 
13865   ins_pipe(ialu_reg);
13866 %}
13867 
13868 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13869 %{
13870   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13871   predicate(UseBMI1Instructions);
13872   effect(KILL cr);
13873   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13874 
13875   ins_cost(125);
13876   format %{ "blsrq  $dst, $src" %}
13877 
13878   ins_encode %{
13879     __ blsrq($dst$$Register, $src$$Address);
13880   %}
13881 
13882   ins_pipe(ialu_reg);
13883 %}
13884 
13885 // Or Instructions
13886 // Or Register with Register
13887 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13888 %{
13889   predicate(!UseAPX);
13890   match(Set dst (OrL dst src));
13891   effect(KILL cr);
13892   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13893 
13894   format %{ "orq     $dst, $src\t# long" %}
13895   ins_encode %{
13896     __ orq($dst$$Register, $src$$Register);
13897   %}
13898   ins_pipe(ialu_reg_reg);
13899 %}
13900 
13901 // Or Register with Register using New Data Destination (NDD)
13902 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13903 %{
13904   predicate(UseAPX);
13905   match(Set dst (OrL src1 src2));
13906   effect(KILL cr);
13907   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13908 
13909   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13910   ins_encode %{
13911     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13912 
13913   %}
13914   ins_pipe(ialu_reg_reg);
13915 %}
13916 
13917 // Use any_RegP to match R15 (TLS register) without spilling.
13918 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13919   match(Set dst (OrL dst (CastP2X src)));
13920   effect(KILL cr);
13921   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13922 
13923   format %{ "orq     $dst, $src\t# long" %}
13924   ins_encode %{
13925     __ orq($dst$$Register, $src$$Register);
13926   %}
13927   ins_pipe(ialu_reg_reg);
13928 %}
13929 
13930 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13931   match(Set dst (OrL src1 (CastP2X src2)));
13932   effect(KILL cr);
13933   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13934 
13935   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13936   ins_encode %{
13937     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13938   %}
13939   ins_pipe(ialu_reg_reg);
13940 %}
13941 
13942 // Or Register with Immediate
13943 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13944 %{
13945   predicate(!UseAPX);
13946   match(Set dst (OrL dst src));
13947   effect(KILL cr);
13948   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13949 
13950   format %{ "orq     $dst, $src\t# long" %}
13951   ins_encode %{
13952     __ orq($dst$$Register, $src$$constant);
13953   %}
13954   ins_pipe(ialu_reg);
13955 %}
13956 
13957 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13958 %{
13959   predicate(UseAPX);
13960   match(Set dst (OrL src1 src2));
13961   effect(KILL cr);
13962   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13963 
13964   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13965   ins_encode %{
13966     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13967   %}
13968   ins_pipe(ialu_reg);
13969 %}
13970 
13971 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13972 %{
13973   predicate(UseAPX);
13974   match(Set dst (OrL src1 src2));
13975   effect(KILL cr);
13976   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13977 
13978   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13979   ins_encode %{
13980     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13981   %}
13982   ins_pipe(ialu_reg);
13983 %}
13984 
13985 // Or Memory with Immediate
13986 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13987 %{
13988   predicate(UseAPX);
13989   match(Set dst (OrL (LoadL src1) src2));
13990   effect(KILL cr);
13991   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13992 
13993   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13994   ins_encode %{
13995     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13996   %}
13997   ins_pipe(ialu_reg);
13998 %}
13999 
14000 // Or Register with Memory
14001 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14002 %{
14003   predicate(!UseAPX);
14004   match(Set dst (OrL dst (LoadL src)));
14005   effect(KILL cr);
14006   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14007 
14008   ins_cost(150);
14009   format %{ "orq     $dst, $src\t# long" %}
14010   ins_encode %{
14011     __ orq($dst$$Register, $src$$Address);
14012   %}
14013   ins_pipe(ialu_reg_mem);
14014 %}
14015 
14016 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14017 %{
14018   predicate(UseAPX);
14019   match(Set dst (OrL src1 (LoadL src2)));
14020   effect(KILL cr);
14021   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14022 
14023   ins_cost(150);
14024   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14025   ins_encode %{
14026     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14027   %}
14028   ins_pipe(ialu_reg_mem);
14029 %}
14030 
14031 // Or Memory with Register
14032 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14033 %{
14034   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14035   effect(KILL cr);
14036   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14037 
14038   ins_cost(150);
14039   format %{ "orq     $dst, $src\t# long" %}
14040   ins_encode %{
14041     __ orq($dst$$Address, $src$$Register);
14042   %}
14043   ins_pipe(ialu_mem_reg);
14044 %}
14045 
14046 // Or Memory with Immediate
14047 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14048 %{
14049   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14050   effect(KILL cr);
14051   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14052 
14053   ins_cost(125);
14054   format %{ "orq     $dst, $src\t# long" %}
14055   ins_encode %{
14056     __ orq($dst$$Address, $src$$constant);
14057   %}
14058   ins_pipe(ialu_mem_imm);
14059 %}
14060 
14061 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14062 %{
14063   // con should be a pure 64-bit power of 2 immediate
14064   // because AND/OR works well enough for 8/32-bit values.
14065   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14066 
14067   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14068   effect(KILL cr);
14069 
14070   ins_cost(125);
14071   format %{ "btsq    $dst, log2($con)\t# long" %}
14072   ins_encode %{
14073     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14074   %}
14075   ins_pipe(ialu_mem_imm);
14076 %}
14077 
14078 // Xor Instructions
14079 // Xor Register with Register
14080 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14081 %{
14082   predicate(!UseAPX);
14083   match(Set dst (XorL dst src));
14084   effect(KILL cr);
14085   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14086 
14087   format %{ "xorq    $dst, $src\t# long" %}
14088   ins_encode %{
14089     __ xorq($dst$$Register, $src$$Register);
14090   %}
14091   ins_pipe(ialu_reg_reg);
14092 %}
14093 
14094 // Xor Register with Register using New Data Destination (NDD)
14095 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14096 %{
14097   predicate(UseAPX);
14098   match(Set dst (XorL src1 src2));
14099   effect(KILL cr);
14100   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14101 
14102   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14103   ins_encode %{
14104     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14105   %}
14106   ins_pipe(ialu_reg_reg);
14107 %}
14108 
14109 // Xor Register with Immediate -1
14110 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14111 %{
14112   predicate(!UseAPX);
14113   match(Set dst (XorL dst imm));
14114 
14115   format %{ "notq   $dst" %}
14116   ins_encode %{
14117      __ notq($dst$$Register);
14118   %}
14119   ins_pipe(ialu_reg);
14120 %}
14121 
14122 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14123 %{
14124   predicate(UseAPX);
14125   match(Set dst (XorL src imm));
14126 
14127   format %{ "enotq   $dst, $src" %}
14128   ins_encode %{
14129     __ enotq($dst$$Register, $src$$Register);
14130   %}
14131   ins_pipe(ialu_reg);
14132 %}
14133 
14134 // Xor Register with Immediate
14135 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14136 %{
14137   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14138   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14139   match(Set dst (XorL dst src));
14140   effect(KILL cr);
14141   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14142 
14143   format %{ "xorq    $dst, $src\t# long" %}
14144   ins_encode %{
14145     __ xorq($dst$$Register, $src$$constant);
14146   %}
14147   ins_pipe(ialu_reg);
14148 %}
14149 
14150 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14151 %{
14152   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14153   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14154   match(Set dst (XorL src1 src2));
14155   effect(KILL cr);
14156   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14157 
14158   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14159   ins_encode %{
14160     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14161   %}
14162   ins_pipe(ialu_reg);
14163 %}
14164 
14165 // Xor Memory with Immediate
14166 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14167 %{
14168   predicate(UseAPX);
14169   match(Set dst (XorL (LoadL src1) src2));
14170   effect(KILL cr);
14171   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14172   ins_cost(150);
14173 
14174   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14175   ins_encode %{
14176     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14177   %}
14178   ins_pipe(ialu_reg);
14179 %}
14180 
14181 // Xor Register with Memory
14182 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14183 %{
14184   predicate(!UseAPX);
14185   match(Set dst (XorL dst (LoadL src)));
14186   effect(KILL cr);
14187   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14188 
14189   ins_cost(150);
14190   format %{ "xorq    $dst, $src\t# long" %}
14191   ins_encode %{
14192     __ xorq($dst$$Register, $src$$Address);
14193   %}
14194   ins_pipe(ialu_reg_mem);
14195 %}
14196 
14197 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14198 %{
14199   predicate(UseAPX);
14200   match(Set dst (XorL src1 (LoadL src2)));
14201   effect(KILL cr);
14202   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14203 
14204   ins_cost(150);
14205   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14206   ins_encode %{
14207     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14208   %}
14209   ins_pipe(ialu_reg_mem);
14210 %}
14211 
14212 // Xor Memory with Register
14213 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14214 %{
14215   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14216   effect(KILL cr);
14217   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218 
14219   ins_cost(150);
14220   format %{ "xorq    $dst, $src\t# long" %}
14221   ins_encode %{
14222     __ xorq($dst$$Address, $src$$Register);
14223   %}
14224   ins_pipe(ialu_mem_reg);
14225 %}
14226 
14227 // Xor Memory with Immediate
14228 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14229 %{
14230   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14231   effect(KILL cr);
14232   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14233 
14234   ins_cost(125);
14235   format %{ "xorq    $dst, $src\t# long" %}
14236   ins_encode %{
14237     __ xorq($dst$$Address, $src$$constant);
14238   %}
14239   ins_pipe(ialu_mem_imm);
14240 %}
14241 
14242 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14243 %{
14244   match(Set dst (CmpLTMask p q));
14245   effect(KILL cr);
14246 
14247   ins_cost(400);
14248   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14249             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14250             "negl    $dst" %}
14251   ins_encode %{
14252     __ cmpl($p$$Register, $q$$Register);
14253     __ setcc(Assembler::less, $dst$$Register);
14254     __ negl($dst$$Register);
14255   %}
14256   ins_pipe(pipe_slow);
14257 %}
14258 
14259 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14260 %{
14261   match(Set dst (CmpLTMask dst zero));
14262   effect(KILL cr);
14263 
14264   ins_cost(100);
14265   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14266   ins_encode %{
14267     __ sarl($dst$$Register, 31);
14268   %}
14269   ins_pipe(ialu_reg);
14270 %}
14271 
14272 /* Better to save a register than avoid a branch */
14273 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14274 %{
14275   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14276   effect(KILL cr);
14277   ins_cost(300);
14278   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14279             "jge     done\n\t"
14280             "addl    $p,$y\n"
14281             "done:   " %}
14282   ins_encode %{
14283     Register Rp = $p$$Register;
14284     Register Rq = $q$$Register;
14285     Register Ry = $y$$Register;
14286     Label done;
14287     __ subl(Rp, Rq);
14288     __ jccb(Assembler::greaterEqual, done);
14289     __ addl(Rp, Ry);
14290     __ bind(done);
14291   %}
14292   ins_pipe(pipe_cmplt);
14293 %}
14294 
14295 /* Better to save a register than avoid a branch */
14296 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14297 %{
14298   match(Set y (AndI (CmpLTMask p q) y));
14299   effect(KILL cr);
14300 
14301   ins_cost(300);
14302 
14303   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14304             "jlt     done\n\t"
14305             "xorl    $y, $y\n"
14306             "done:   " %}
14307   ins_encode %{
14308     Register Rp = $p$$Register;
14309     Register Rq = $q$$Register;
14310     Register Ry = $y$$Register;
14311     Label done;
14312     __ cmpl(Rp, Rq);
14313     __ jccb(Assembler::less, done);
14314     __ xorl(Ry, Ry);
14315     __ bind(done);
14316   %}
14317   ins_pipe(pipe_cmplt);
14318 %}
14319 
14320 
14321 //---------- FP Instructions------------------------------------------------
14322 
14323 // Really expensive, avoid
14324 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14325 %{
14326   match(Set cr (CmpF src1 src2));
14327 
14328   ins_cost(500);
14329   format %{ "ucomiss $src1, $src2\n\t"
14330             "jnp,s   exit\n\t"
14331             "pushfq\t# saw NaN, set CF\n\t"
14332             "andq    [rsp], #0xffffff2b\n\t"
14333             "popfq\n"
14334     "exit:" %}
14335   ins_encode %{
14336     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14337     emit_cmpfp_fixup(masm);
14338   %}
14339   ins_pipe(pipe_slow);
14340 %}
14341 
14342 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14343   match(Set cr (CmpF src1 src2));
14344 
14345   ins_cost(100);
14346   format %{ "ucomiss $src1, $src2" %}
14347   ins_encode %{
14348     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14349   %}
14350   ins_pipe(pipe_slow);
14351 %}
14352 
14353 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14354   match(Set cr (CmpF src1 (LoadF src2)));
14355 
14356   ins_cost(100);
14357   format %{ "ucomiss $src1, $src2" %}
14358   ins_encode %{
14359     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14360   %}
14361   ins_pipe(pipe_slow);
14362 %}
14363 
14364 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14365   match(Set cr (CmpF src con));
14366   ins_cost(100);
14367   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14368   ins_encode %{
14369     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14370   %}
14371   ins_pipe(pipe_slow);
14372 %}
14373 
14374 // Really expensive, avoid
14375 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14376 %{
14377   match(Set cr (CmpD src1 src2));
14378 
14379   ins_cost(500);
14380   format %{ "ucomisd $src1, $src2\n\t"
14381             "jnp,s   exit\n\t"
14382             "pushfq\t# saw NaN, set CF\n\t"
14383             "andq    [rsp], #0xffffff2b\n\t"
14384             "popfq\n"
14385     "exit:" %}
14386   ins_encode %{
14387     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14388     emit_cmpfp_fixup(masm);
14389   %}
14390   ins_pipe(pipe_slow);
14391 %}
14392 
14393 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14394   match(Set cr (CmpD src1 src2));
14395 
14396   ins_cost(100);
14397   format %{ "ucomisd $src1, $src2 test" %}
14398   ins_encode %{
14399     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14400   %}
14401   ins_pipe(pipe_slow);
14402 %}
14403 
14404 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14405   match(Set cr (CmpD src1 (LoadD src2)));
14406 
14407   ins_cost(100);
14408   format %{ "ucomisd $src1, $src2" %}
14409   ins_encode %{
14410     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14411   %}
14412   ins_pipe(pipe_slow);
14413 %}
14414 
14415 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14416   match(Set cr (CmpD src con));
14417   ins_cost(100);
14418   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14419   ins_encode %{
14420     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14421   %}
14422   ins_pipe(pipe_slow);
14423 %}
14424 
14425 // Compare into -1,0,1
14426 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14427 %{
14428   match(Set dst (CmpF3 src1 src2));
14429   effect(KILL cr);
14430 
14431   ins_cost(275);
14432   format %{ "ucomiss $src1, $src2\n\t"
14433             "movl    $dst, #-1\n\t"
14434             "jp,s    done\n\t"
14435             "jb,s    done\n\t"
14436             "setne   $dst\n\t"
14437             "movzbl  $dst, $dst\n"
14438     "done:" %}
14439   ins_encode %{
14440     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14441     emit_cmpfp3(masm, $dst$$Register);
14442   %}
14443   ins_pipe(pipe_slow);
14444 %}
14445 
14446 // Compare into -1,0,1
14447 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14448 %{
14449   match(Set dst (CmpF3 src1 (LoadF src2)));
14450   effect(KILL cr);
14451 
14452   ins_cost(275);
14453   format %{ "ucomiss $src1, $src2\n\t"
14454             "movl    $dst, #-1\n\t"
14455             "jp,s    done\n\t"
14456             "jb,s    done\n\t"
14457             "setne   $dst\n\t"
14458             "movzbl  $dst, $dst\n"
14459     "done:" %}
14460   ins_encode %{
14461     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14462     emit_cmpfp3(masm, $dst$$Register);
14463   %}
14464   ins_pipe(pipe_slow);
14465 %}
14466 
14467 // Compare into -1,0,1
14468 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14469   match(Set dst (CmpF3 src con));
14470   effect(KILL cr);
14471 
14472   ins_cost(275);
14473   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14474             "movl    $dst, #-1\n\t"
14475             "jp,s    done\n\t"
14476             "jb,s    done\n\t"
14477             "setne   $dst\n\t"
14478             "movzbl  $dst, $dst\n"
14479     "done:" %}
14480   ins_encode %{
14481     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14482     emit_cmpfp3(masm, $dst$$Register);
14483   %}
14484   ins_pipe(pipe_slow);
14485 %}
14486 
14487 // Compare into -1,0,1
14488 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14489 %{
14490   match(Set dst (CmpD3 src1 src2));
14491   effect(KILL cr);
14492 
14493   ins_cost(275);
14494   format %{ "ucomisd $src1, $src2\n\t"
14495             "movl    $dst, #-1\n\t"
14496             "jp,s    done\n\t"
14497             "jb,s    done\n\t"
14498             "setne   $dst\n\t"
14499             "movzbl  $dst, $dst\n"
14500     "done:" %}
14501   ins_encode %{
14502     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14503     emit_cmpfp3(masm, $dst$$Register);
14504   %}
14505   ins_pipe(pipe_slow);
14506 %}
14507 
14508 // Compare into -1,0,1
14509 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14510 %{
14511   match(Set dst (CmpD3 src1 (LoadD src2)));
14512   effect(KILL cr);
14513 
14514   ins_cost(275);
14515   format %{ "ucomisd $src1, $src2\n\t"
14516             "movl    $dst, #-1\n\t"
14517             "jp,s    done\n\t"
14518             "jb,s    done\n\t"
14519             "setne   $dst\n\t"
14520             "movzbl  $dst, $dst\n"
14521     "done:" %}
14522   ins_encode %{
14523     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14524     emit_cmpfp3(masm, $dst$$Register);
14525   %}
14526   ins_pipe(pipe_slow);
14527 %}
14528 
14529 // Compare into -1,0,1
14530 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14531   match(Set dst (CmpD3 src con));
14532   effect(KILL cr);
14533 
14534   ins_cost(275);
14535   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14536             "movl    $dst, #-1\n\t"
14537             "jp,s    done\n\t"
14538             "jb,s    done\n\t"
14539             "setne   $dst\n\t"
14540             "movzbl  $dst, $dst\n"
14541     "done:" %}
14542   ins_encode %{
14543     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14544     emit_cmpfp3(masm, $dst$$Register);
14545   %}
14546   ins_pipe(pipe_slow);
14547 %}
14548 
14549 //----------Arithmetic Conversion Instructions---------------------------------
14550 
14551 instruct convF2D_reg_reg(regD dst, regF src)
14552 %{
14553   match(Set dst (ConvF2D src));
14554 
14555   format %{ "cvtss2sd $dst, $src" %}
14556   ins_encode %{
14557     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14558   %}
14559   ins_pipe(pipe_slow); // XXX
14560 %}
14561 
14562 instruct convF2D_reg_mem(regD dst, memory src)
14563 %{
14564   predicate(UseAVX == 0);
14565   match(Set dst (ConvF2D (LoadF src)));
14566 
14567   format %{ "cvtss2sd $dst, $src" %}
14568   ins_encode %{
14569     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14570   %}
14571   ins_pipe(pipe_slow); // XXX
14572 %}
14573 
14574 instruct convD2F_reg_reg(regF dst, regD src)
14575 %{
14576   match(Set dst (ConvD2F src));
14577 
14578   format %{ "cvtsd2ss $dst, $src" %}
14579   ins_encode %{
14580     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14581   %}
14582   ins_pipe(pipe_slow); // XXX
14583 %}
14584 
14585 instruct convD2F_reg_mem(regF dst, memory src)
14586 %{
14587   predicate(UseAVX == 0);
14588   match(Set dst (ConvD2F (LoadD src)));
14589 
14590   format %{ "cvtsd2ss $dst, $src" %}
14591   ins_encode %{
14592     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14593   %}
14594   ins_pipe(pipe_slow); // XXX
14595 %}
14596 
14597 // XXX do mem variants
14598 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14599 %{
14600   predicate(!VM_Version::supports_avx10_2());
14601   match(Set dst (ConvF2I src));
14602   effect(KILL cr);
14603   format %{ "convert_f2i $dst, $src" %}
14604   ins_encode %{
14605     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14606   %}
14607   ins_pipe(pipe_slow);
14608 %}
14609 
14610 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14611 %{
14612   predicate(VM_Version::supports_avx10_2());
14613   match(Set dst (ConvF2I src));
14614   format %{ "evcvttss2sisl $dst, $src" %}
14615   ins_encode %{
14616     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14617   %}
14618   ins_pipe(pipe_slow);
14619 %}
14620 
14621 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14622 %{
14623   predicate(VM_Version::supports_avx10_2());
14624   match(Set dst (ConvF2I (LoadF src)));
14625   format %{ "evcvttss2sisl $dst, $src" %}
14626   ins_encode %{
14627     __ evcvttss2sisl($dst$$Register, $src$$Address);
14628   %}
14629   ins_pipe(pipe_slow);
14630 %}
14631 
14632 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14633 %{
14634   predicate(!VM_Version::supports_avx10_2());
14635   match(Set dst (ConvF2L src));
14636   effect(KILL cr);
14637   format %{ "convert_f2l $dst, $src"%}
14638   ins_encode %{
14639     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14640   %}
14641   ins_pipe(pipe_slow);
14642 %}
14643 
14644 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14645 %{
14646   predicate(VM_Version::supports_avx10_2());
14647   match(Set dst (ConvF2L src));
14648   format %{ "evcvttss2sisq $dst, $src" %}
14649   ins_encode %{
14650     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14651   %}
14652   ins_pipe(pipe_slow);
14653 %}
14654 
14655 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14656 %{
14657   predicate(VM_Version::supports_avx10_2());
14658   match(Set dst (ConvF2L (LoadF src)));
14659   format %{ "evcvttss2sisq $dst, $src" %}
14660   ins_encode %{
14661     __ evcvttss2sisq($dst$$Register, $src$$Address);
14662   %}
14663   ins_pipe(pipe_slow);
14664 %}
14665 
14666 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14667 %{
14668   predicate(!VM_Version::supports_avx10_2());
14669   match(Set dst (ConvD2I src));
14670   effect(KILL cr);
14671   format %{ "convert_d2i $dst, $src"%}
14672   ins_encode %{
14673     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14674   %}
14675   ins_pipe(pipe_slow);
14676 %}
14677 
14678 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14679 %{
14680   predicate(VM_Version::supports_avx10_2());
14681   match(Set dst (ConvD2I src));
14682   format %{ "evcvttsd2sisl $dst, $src" %}
14683   ins_encode %{
14684     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14685   %}
14686   ins_pipe(pipe_slow);
14687 %}
14688 
14689 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14690 %{
14691   predicate(VM_Version::supports_avx10_2());
14692   match(Set dst (ConvD2I (LoadD src)));
14693   format %{ "evcvttsd2sisl $dst, $src" %}
14694   ins_encode %{
14695     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14696   %}
14697   ins_pipe(pipe_slow);
14698 %}
14699 
14700 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14701 %{
14702   predicate(!VM_Version::supports_avx10_2());
14703   match(Set dst (ConvD2L src));
14704   effect(KILL cr);
14705   format %{ "convert_d2l $dst, $src"%}
14706   ins_encode %{
14707     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14708   %}
14709   ins_pipe(pipe_slow);
14710 %}
14711 
14712 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14713 %{
14714   predicate(VM_Version::supports_avx10_2());
14715   match(Set dst (ConvD2L src));
14716   format %{ "evcvttsd2sisq $dst, $src" %}
14717   ins_encode %{
14718     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14719   %}
14720   ins_pipe(pipe_slow);
14721 %}
14722 
14723 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14724 %{
14725   predicate(VM_Version::supports_avx10_2());
14726   match(Set dst (ConvD2L (LoadD src)));
14727   format %{ "evcvttsd2sisq $dst, $src" %}
14728   ins_encode %{
14729     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14730   %}
14731   ins_pipe(pipe_slow);
14732 %}
14733 
14734 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14735 %{
14736   match(Set dst (RoundD src));
14737   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14738   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14739   ins_encode %{
14740     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14741   %}
14742   ins_pipe(pipe_slow);
14743 %}
14744 
14745 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14746 %{
14747   match(Set dst (RoundF src));
14748   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14749   format %{ "round_float $dst,$src" %}
14750   ins_encode %{
14751     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14752   %}
14753   ins_pipe(pipe_slow);
14754 %}
14755 
14756 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14757 %{
14758   predicate(!UseXmmI2F);
14759   match(Set dst (ConvI2F src));
14760 
14761   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14762   ins_encode %{
14763     if (UseAVX > 0) {
14764       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14765     }
14766     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14767   %}
14768   ins_pipe(pipe_slow); // XXX
14769 %}
14770 
14771 instruct convI2F_reg_mem(regF dst, memory src)
14772 %{
14773   predicate(UseAVX == 0);
14774   match(Set dst (ConvI2F (LoadI src)));
14775 
14776   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14777   ins_encode %{
14778     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14779   %}
14780   ins_pipe(pipe_slow); // XXX
14781 %}
14782 
14783 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14784 %{
14785   predicate(!UseXmmI2D);
14786   match(Set dst (ConvI2D src));
14787 
14788   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14789   ins_encode %{
14790     if (UseAVX > 0) {
14791       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14792     }
14793     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14794   %}
14795   ins_pipe(pipe_slow); // XXX
14796 %}
14797 
14798 instruct convI2D_reg_mem(regD dst, memory src)
14799 %{
14800   predicate(UseAVX == 0);
14801   match(Set dst (ConvI2D (LoadI src)));
14802 
14803   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14804   ins_encode %{
14805     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14806   %}
14807   ins_pipe(pipe_slow); // XXX
14808 %}
14809 
14810 instruct convXI2F_reg(regF dst, rRegI src)
14811 %{
14812   predicate(UseXmmI2F);
14813   match(Set dst (ConvI2F src));
14814 
14815   format %{ "movdl $dst, $src\n\t"
14816             "cvtdq2psl $dst, $dst\t# i2f" %}
14817   ins_encode %{
14818     __ movdl($dst$$XMMRegister, $src$$Register);
14819     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14820   %}
14821   ins_pipe(pipe_slow); // XXX
14822 %}
14823 
14824 instruct convXI2D_reg(regD dst, rRegI src)
14825 %{
14826   predicate(UseXmmI2D);
14827   match(Set dst (ConvI2D src));
14828 
14829   format %{ "movdl $dst, $src\n\t"
14830             "cvtdq2pdl $dst, $dst\t# i2d" %}
14831   ins_encode %{
14832     __ movdl($dst$$XMMRegister, $src$$Register);
14833     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14834   %}
14835   ins_pipe(pipe_slow); // XXX
14836 %}
14837 
14838 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14839 %{
14840   match(Set dst (ConvL2F src));
14841 
14842   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14843   ins_encode %{
14844     if (UseAVX > 0) {
14845       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14846     }
14847     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14848   %}
14849   ins_pipe(pipe_slow); // XXX
14850 %}
14851 
14852 instruct convL2F_reg_mem(regF dst, memory src)
14853 %{
14854   predicate(UseAVX == 0);
14855   match(Set dst (ConvL2F (LoadL src)));
14856 
14857   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14858   ins_encode %{
14859     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14860   %}
14861   ins_pipe(pipe_slow); // XXX
14862 %}
14863 
14864 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14865 %{
14866   match(Set dst (ConvL2D src));
14867 
14868   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14869   ins_encode %{
14870     if (UseAVX > 0) {
14871       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14872     }
14873     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14874   %}
14875   ins_pipe(pipe_slow); // XXX
14876 %}
14877 
14878 instruct convL2D_reg_mem(regD dst, memory src)
14879 %{
14880   predicate(UseAVX == 0);
14881   match(Set dst (ConvL2D (LoadL src)));
14882 
14883   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14884   ins_encode %{
14885     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14886   %}
14887   ins_pipe(pipe_slow); // XXX
14888 %}
14889 
14890 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14891 %{
14892   match(Set dst (ConvI2L src));
14893 
14894   ins_cost(125);
14895   format %{ "movslq  $dst, $src\t# i2l" %}
14896   ins_encode %{
14897     __ movslq($dst$$Register, $src$$Register);
14898   %}
14899   ins_pipe(ialu_reg_reg);
14900 %}
14901 
14902 // Zero-extend convert int to long
14903 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14904 %{
14905   match(Set dst (AndL (ConvI2L src) mask));
14906 
14907   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14908   ins_encode %{
14909     if ($dst$$reg != $src$$reg) {
14910       __ movl($dst$$Register, $src$$Register);
14911     }
14912   %}
14913   ins_pipe(ialu_reg_reg);
14914 %}
14915 
14916 // Zero-extend convert int to long
14917 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14918 %{
14919   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14920 
14921   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14922   ins_encode %{
14923     __ movl($dst$$Register, $src$$Address);
14924   %}
14925   ins_pipe(ialu_reg_mem);
14926 %}
14927 
14928 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14929 %{
14930   match(Set dst (AndL src mask));
14931 
14932   format %{ "movl    $dst, $src\t# zero-extend long" %}
14933   ins_encode %{
14934     __ movl($dst$$Register, $src$$Register);
14935   %}
14936   ins_pipe(ialu_reg_reg);
14937 %}
14938 
14939 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14940 %{
14941   match(Set dst (ConvL2I src));
14942 
14943   format %{ "movl    $dst, $src\t# l2i" %}
14944   ins_encode %{
14945     __ movl($dst$$Register, $src$$Register);
14946   %}
14947   ins_pipe(ialu_reg_reg);
14948 %}
14949 
14950 
14951 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14952   match(Set dst (MoveF2I src));
14953   effect(DEF dst, USE src);
14954 
14955   ins_cost(125);
14956   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14957   ins_encode %{
14958     __ movl($dst$$Register, Address(rsp, $src$$disp));
14959   %}
14960   ins_pipe(ialu_reg_mem);
14961 %}
14962 
14963 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14964   match(Set dst (MoveI2F src));
14965   effect(DEF dst, USE src);
14966 
14967   ins_cost(125);
14968   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14969   ins_encode %{
14970     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14971   %}
14972   ins_pipe(pipe_slow);
14973 %}
14974 
14975 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14976   match(Set dst (MoveD2L src));
14977   effect(DEF dst, USE src);
14978 
14979   ins_cost(125);
14980   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14981   ins_encode %{
14982     __ movq($dst$$Register, Address(rsp, $src$$disp));
14983   %}
14984   ins_pipe(ialu_reg_mem);
14985 %}
14986 
14987 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14988   predicate(!UseXmmLoadAndClearUpper);
14989   match(Set dst (MoveL2D src));
14990   effect(DEF dst, USE src);
14991 
14992   ins_cost(125);
14993   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14994   ins_encode %{
14995     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14996   %}
14997   ins_pipe(pipe_slow);
14998 %}
14999 
15000 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15001   predicate(UseXmmLoadAndClearUpper);
15002   match(Set dst (MoveL2D src));
15003   effect(DEF dst, USE src);
15004 
15005   ins_cost(125);
15006   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15007   ins_encode %{
15008     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15009   %}
15010   ins_pipe(pipe_slow);
15011 %}
15012 
15013 
15014 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15015   match(Set dst (MoveF2I src));
15016   effect(DEF dst, USE src);
15017 
15018   ins_cost(95); // XXX
15019   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15020   ins_encode %{
15021     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15022   %}
15023   ins_pipe(pipe_slow);
15024 %}
15025 
15026 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15027   match(Set dst (MoveI2F src));
15028   effect(DEF dst, USE src);
15029 
15030   ins_cost(100);
15031   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15032   ins_encode %{
15033     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15034   %}
15035   ins_pipe( ialu_mem_reg );
15036 %}
15037 
15038 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15039   match(Set dst (MoveD2L src));
15040   effect(DEF dst, USE src);
15041 
15042   ins_cost(95); // XXX
15043   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15044   ins_encode %{
15045     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15046   %}
15047   ins_pipe(pipe_slow);
15048 %}
15049 
15050 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15051   match(Set dst (MoveL2D src));
15052   effect(DEF dst, USE src);
15053 
15054   ins_cost(100);
15055   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15056   ins_encode %{
15057     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15058   %}
15059   ins_pipe(ialu_mem_reg);
15060 %}
15061 
15062 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15063   match(Set dst (MoveF2I src));
15064   effect(DEF dst, USE src);
15065   ins_cost(85);
15066   format %{ "movd    $dst,$src\t# MoveF2I" %}
15067   ins_encode %{
15068     __ movdl($dst$$Register, $src$$XMMRegister);
15069   %}
15070   ins_pipe( pipe_slow );
15071 %}
15072 
15073 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15074   match(Set dst (MoveD2L src));
15075   effect(DEF dst, USE src);
15076   ins_cost(85);
15077   format %{ "movd    $dst,$src\t# MoveD2L" %}
15078   ins_encode %{
15079     __ movdq($dst$$Register, $src$$XMMRegister);
15080   %}
15081   ins_pipe( pipe_slow );
15082 %}
15083 
15084 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15085   match(Set dst (MoveI2F src));
15086   effect(DEF dst, USE src);
15087   ins_cost(100);
15088   format %{ "movd    $dst,$src\t# MoveI2F" %}
15089   ins_encode %{
15090     __ movdl($dst$$XMMRegister, $src$$Register);
15091   %}
15092   ins_pipe( pipe_slow );
15093 %}
15094 
15095 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15096   match(Set dst (MoveL2D src));
15097   effect(DEF dst, USE src);
15098   ins_cost(100);
15099   format %{ "movd    $dst,$src\t# MoveL2D" %}
15100   ins_encode %{
15101      __ movdq($dst$$XMMRegister, $src$$Register);
15102   %}
15103   ins_pipe( pipe_slow );
15104 %}
15105 
15106 
15107 // Fast clearing of an array
15108 // Small non-constant lenght ClearArray for non-AVX512 targets.
15109 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15110                   Universe dummy, rFlagsReg cr)
15111 %{
15112   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15113   match(Set dummy (ClearArray (Binary cnt base) val));
15114   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15115 
15116   format %{ $$template
15117     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15118     $$emit$$"jg      LARGE\n\t"
15119     $$emit$$"dec     rcx\n\t"
15120     $$emit$$"js      DONE\t# Zero length\n\t"
15121     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15122     $$emit$$"dec     rcx\n\t"
15123     $$emit$$"jge     LOOP\n\t"
15124     $$emit$$"jmp     DONE\n\t"
15125     $$emit$$"# LARGE:\n\t"
15126     if (UseFastStosb) {
15127        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15128        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15129     } else if (UseXMMForObjInit) {
15130        $$emit$$"movdq   $tmp, $val\n\t"
15131        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15132        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15133        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15134        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15135        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15136        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15137        $$emit$$"add     0x40,rax\n\t"
15138        $$emit$$"# L_zero_64_bytes:\n\t"
15139        $$emit$$"sub     0x8,rcx\n\t"
15140        $$emit$$"jge     L_loop\n\t"
15141        $$emit$$"add     0x4,rcx\n\t"
15142        $$emit$$"jl      L_tail\n\t"
15143        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15144        $$emit$$"add     0x20,rax\n\t"
15145        $$emit$$"sub     0x4,rcx\n\t"
15146        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15147        $$emit$$"add     0x4,rcx\n\t"
15148        $$emit$$"jle     L_end\n\t"
15149        $$emit$$"dec     rcx\n\t"
15150        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15151        $$emit$$"vmovq   xmm0,(rax)\n\t"
15152        $$emit$$"add     0x8,rax\n\t"
15153        $$emit$$"dec     rcx\n\t"
15154        $$emit$$"jge     L_sloop\n\t"
15155        $$emit$$"# L_end:\n\t"
15156     } else {
15157        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15158     }
15159     $$emit$$"# DONE"
15160   %}
15161   ins_encode %{
15162     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15163                  $tmp$$XMMRegister, false, false);
15164   %}
15165   ins_pipe(pipe_slow);
15166 %}
15167 
15168 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15169                             Universe dummy, rFlagsReg cr)
15170 %{
15171   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15172   match(Set dummy (ClearArray (Binary cnt base) val));
15173   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15174 
15175   format %{ $$template
15176     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15177     $$emit$$"jg      LARGE\n\t"
15178     $$emit$$"dec     rcx\n\t"
15179     $$emit$$"js      DONE\t# Zero length\n\t"
15180     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15181     $$emit$$"dec     rcx\n\t"
15182     $$emit$$"jge     LOOP\n\t"
15183     $$emit$$"jmp     DONE\n\t"
15184     $$emit$$"# LARGE:\n\t"
15185     if (UseXMMForObjInit) {
15186        $$emit$$"movdq   $tmp, $val\n\t"
15187        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15188        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15189        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15190        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15191        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15192        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15193        $$emit$$"add     0x40,rax\n\t"
15194        $$emit$$"# L_zero_64_bytes:\n\t"
15195        $$emit$$"sub     0x8,rcx\n\t"
15196        $$emit$$"jge     L_loop\n\t"
15197        $$emit$$"add     0x4,rcx\n\t"
15198        $$emit$$"jl      L_tail\n\t"
15199        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15200        $$emit$$"add     0x20,rax\n\t"
15201        $$emit$$"sub     0x4,rcx\n\t"
15202        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15203        $$emit$$"add     0x4,rcx\n\t"
15204        $$emit$$"jle     L_end\n\t"
15205        $$emit$$"dec     rcx\n\t"
15206        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15207        $$emit$$"vmovq   xmm0,(rax)\n\t"
15208        $$emit$$"add     0x8,rax\n\t"
15209        $$emit$$"dec     rcx\n\t"
15210        $$emit$$"jge     L_sloop\n\t"
15211        $$emit$$"# L_end:\n\t"
15212     } else {
15213        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15214     }
15215     $$emit$$"# DONE"
15216   %}
15217   ins_encode %{
15218     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15219                  $tmp$$XMMRegister, false, true);
15220   %}
15221   ins_pipe(pipe_slow);
15222 %}
15223 
15224 // Small non-constant length ClearArray for AVX512 targets.
15225 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15226                        Universe dummy, rFlagsReg cr)
15227 %{
15228   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15229   match(Set dummy (ClearArray (Binary cnt base) val));
15230   ins_cost(125);
15231   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15232 
15233   format %{ $$template
15234     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15235     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15236     $$emit$$"jg      LARGE\n\t"
15237     $$emit$$"dec     rcx\n\t"
15238     $$emit$$"js      DONE\t# Zero length\n\t"
15239     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15240     $$emit$$"dec     rcx\n\t"
15241     $$emit$$"jge     LOOP\n\t"
15242     $$emit$$"jmp     DONE\n\t"
15243     $$emit$$"# LARGE:\n\t"
15244     if (UseFastStosb) {
15245        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15246        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15247     } else if (UseXMMForObjInit) {
15248        $$emit$$"mov     rdi,rax\n\t"
15249        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15250        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15251        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15252        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15253        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15254        $$emit$$"add     0x40,rax\n\t"
15255        $$emit$$"# L_zero_64_bytes:\n\t"
15256        $$emit$$"sub     0x8,rcx\n\t"
15257        $$emit$$"jge     L_loop\n\t"
15258        $$emit$$"add     0x4,rcx\n\t"
15259        $$emit$$"jl      L_tail\n\t"
15260        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15261        $$emit$$"add     0x20,rax\n\t"
15262        $$emit$$"sub     0x4,rcx\n\t"
15263        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15264        $$emit$$"add     0x4,rcx\n\t"
15265        $$emit$$"jle     L_end\n\t"
15266        $$emit$$"dec     rcx\n\t"
15267        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15268        $$emit$$"vmovq   xmm0,(rax)\n\t"
15269        $$emit$$"add     0x8,rax\n\t"
15270        $$emit$$"dec     rcx\n\t"
15271        $$emit$$"jge     L_sloop\n\t"
15272        $$emit$$"# L_end:\n\t"
15273     } else {
15274        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15275     }
15276     $$emit$$"# DONE"
15277   %}
15278   ins_encode %{
15279     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15280                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15281   %}
15282   ins_pipe(pipe_slow);
15283 %}
15284 
15285 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15286                                  Universe dummy, rFlagsReg cr)
15287 %{
15288   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15289   match(Set dummy (ClearArray (Binary cnt base) val));
15290   ins_cost(125);
15291   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15292 
15293   format %{ $$template
15294     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15295     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15296     $$emit$$"jg      LARGE\n\t"
15297     $$emit$$"dec     rcx\n\t"
15298     $$emit$$"js      DONE\t# Zero length\n\t"
15299     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15300     $$emit$$"dec     rcx\n\t"
15301     $$emit$$"jge     LOOP\n\t"
15302     $$emit$$"jmp     DONE\n\t"
15303     $$emit$$"# LARGE:\n\t"
15304     if (UseFastStosb) {
15305        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15306        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15307     } else if (UseXMMForObjInit) {
15308        $$emit$$"mov     rdi,rax\n\t"
15309        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15310        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15311        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15312        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15313        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15314        $$emit$$"add     0x40,rax\n\t"
15315        $$emit$$"# L_zero_64_bytes:\n\t"
15316        $$emit$$"sub     0x8,rcx\n\t"
15317        $$emit$$"jge     L_loop\n\t"
15318        $$emit$$"add     0x4,rcx\n\t"
15319        $$emit$$"jl      L_tail\n\t"
15320        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15321        $$emit$$"add     0x20,rax\n\t"
15322        $$emit$$"sub     0x4,rcx\n\t"
15323        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15324        $$emit$$"add     0x4,rcx\n\t"
15325        $$emit$$"jle     L_end\n\t"
15326        $$emit$$"dec     rcx\n\t"
15327        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15328        $$emit$$"vmovq   xmm0,(rax)\n\t"
15329        $$emit$$"add     0x8,rax\n\t"
15330        $$emit$$"dec     rcx\n\t"
15331        $$emit$$"jge     L_sloop\n\t"
15332        $$emit$$"# L_end:\n\t"
15333     } else {
15334        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15335     }
15336     $$emit$$"# DONE"
15337   %}
15338   ins_encode %{
15339     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15340                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15341   %}
15342   ins_pipe(pipe_slow);
15343 %}
15344 
15345 // Large non-constant length ClearArray for non-AVX512 targets.
15346 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15347                         Universe dummy, rFlagsReg cr)
15348 %{
15349   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15350   match(Set dummy (ClearArray (Binary cnt base) val));
15351   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15352 
15353   format %{ $$template
15354     if (UseFastStosb) {
15355        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15356        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15357     } else if (UseXMMForObjInit) {
15358        $$emit$$"movdq   $tmp, $val\n\t"
15359        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15360        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15361        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15362        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15363        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15364        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15365        $$emit$$"add     0x40,rax\n\t"
15366        $$emit$$"# L_zero_64_bytes:\n\t"
15367        $$emit$$"sub     0x8,rcx\n\t"
15368        $$emit$$"jge     L_loop\n\t"
15369        $$emit$$"add     0x4,rcx\n\t"
15370        $$emit$$"jl      L_tail\n\t"
15371        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15372        $$emit$$"add     0x20,rax\n\t"
15373        $$emit$$"sub     0x4,rcx\n\t"
15374        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15375        $$emit$$"add     0x4,rcx\n\t"
15376        $$emit$$"jle     L_end\n\t"
15377        $$emit$$"dec     rcx\n\t"
15378        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15379        $$emit$$"vmovq   xmm0,(rax)\n\t"
15380        $$emit$$"add     0x8,rax\n\t"
15381        $$emit$$"dec     rcx\n\t"
15382        $$emit$$"jge     L_sloop\n\t"
15383        $$emit$$"# L_end:\n\t"
15384     } else {
15385        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15386     }
15387   %}
15388   ins_encode %{
15389     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15390                  $tmp$$XMMRegister, true, false);
15391   %}
15392   ins_pipe(pipe_slow);
15393 %}
15394 
15395 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15396                                   Universe dummy, rFlagsReg cr)
15397 %{
15398   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15399   match(Set dummy (ClearArray (Binary cnt base) val));
15400   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15401 
15402   format %{ $$template
15403     if (UseXMMForObjInit) {
15404        $$emit$$"movdq   $tmp, $val\n\t"
15405        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15406        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15407        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15408        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15410        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15411        $$emit$$"add     0x40,rax\n\t"
15412        $$emit$$"# L_zero_64_bytes:\n\t"
15413        $$emit$$"sub     0x8,rcx\n\t"
15414        $$emit$$"jge     L_loop\n\t"
15415        $$emit$$"add     0x4,rcx\n\t"
15416        $$emit$$"jl      L_tail\n\t"
15417        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15418        $$emit$$"add     0x20,rax\n\t"
15419        $$emit$$"sub     0x4,rcx\n\t"
15420        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421        $$emit$$"add     0x4,rcx\n\t"
15422        $$emit$$"jle     L_end\n\t"
15423        $$emit$$"dec     rcx\n\t"
15424        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425        $$emit$$"vmovq   xmm0,(rax)\n\t"
15426        $$emit$$"add     0x8,rax\n\t"
15427        $$emit$$"dec     rcx\n\t"
15428        $$emit$$"jge     L_sloop\n\t"
15429        $$emit$$"# L_end:\n\t"
15430     } else {
15431        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15432     }
15433   %}
15434   ins_encode %{
15435     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15436                  $tmp$$XMMRegister, true, true);
15437   %}
15438   ins_pipe(pipe_slow);
15439 %}
15440 
15441 // Large non-constant length ClearArray for AVX512 targets.
15442 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15443                              Universe dummy, rFlagsReg cr)
15444 %{
15445   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15446   match(Set dummy (ClearArray (Binary cnt base) val));
15447   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15448 
15449   format %{ $$template
15450     if (UseFastStosb) {
15451        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15452        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15453        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15454     } else if (UseXMMForObjInit) {
15455        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15456        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15457        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15458        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15459        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15460        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15461        $$emit$$"add     0x40,rax\n\t"
15462        $$emit$$"# L_zero_64_bytes:\n\t"
15463        $$emit$$"sub     0x8,rcx\n\t"
15464        $$emit$$"jge     L_loop\n\t"
15465        $$emit$$"add     0x4,rcx\n\t"
15466        $$emit$$"jl      L_tail\n\t"
15467        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468        $$emit$$"add     0x20,rax\n\t"
15469        $$emit$$"sub     0x4,rcx\n\t"
15470        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471        $$emit$$"add     0x4,rcx\n\t"
15472        $$emit$$"jle     L_end\n\t"
15473        $$emit$$"dec     rcx\n\t"
15474        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475        $$emit$$"vmovq   xmm0,(rax)\n\t"
15476        $$emit$$"add     0x8,rax\n\t"
15477        $$emit$$"dec     rcx\n\t"
15478        $$emit$$"jge     L_sloop\n\t"
15479        $$emit$$"# L_end:\n\t"
15480     } else {
15481        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15482        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15483     }
15484   %}
15485   ins_encode %{
15486     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15488   %}
15489   ins_pipe(pipe_slow);
15490 %}
15491 
15492 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493                                        Universe dummy, rFlagsReg cr)
15494 %{
15495   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496   match(Set dummy (ClearArray (Binary cnt base) val));
15497   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15498 
15499   format %{ $$template
15500     if (UseFastStosb) {
15501        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15502        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15503        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15504     } else if (UseXMMForObjInit) {
15505        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15506        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15507        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15508        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15509        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15510        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15511        $$emit$$"add     0x40,rax\n\t"
15512        $$emit$$"# L_zero_64_bytes:\n\t"
15513        $$emit$$"sub     0x8,rcx\n\t"
15514        $$emit$$"jge     L_loop\n\t"
15515        $$emit$$"add     0x4,rcx\n\t"
15516        $$emit$$"jl      L_tail\n\t"
15517        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15518        $$emit$$"add     0x20,rax\n\t"
15519        $$emit$$"sub     0x4,rcx\n\t"
15520        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15521        $$emit$$"add     0x4,rcx\n\t"
15522        $$emit$$"jle     L_end\n\t"
15523        $$emit$$"dec     rcx\n\t"
15524        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15525        $$emit$$"vmovq   xmm0,(rax)\n\t"
15526        $$emit$$"add     0x8,rax\n\t"
15527        $$emit$$"dec     rcx\n\t"
15528        $$emit$$"jge     L_sloop\n\t"
15529        $$emit$$"# L_end:\n\t"
15530     } else {
15531        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15532        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15533     }
15534   %}
15535   ins_encode %{
15536     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15537                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15538   %}
15539   ins_pipe(pipe_slow);
15540 %}
15541 
15542 // Small constant length ClearArray for AVX512 targets.
15543 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15544 %{
15545   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15546             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15547   match(Set dummy (ClearArray (Binary cnt base) val));
15548   ins_cost(100);
15549   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15550   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15551   ins_encode %{
15552     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15553   %}
15554   ins_pipe(pipe_slow);
15555 %}
15556 
15557 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15558                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15559 %{
15560   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15561   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15562   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15563 
15564   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15565   ins_encode %{
15566     __ string_compare($str1$$Register, $str2$$Register,
15567                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15568                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15569   %}
15570   ins_pipe( pipe_slow );
15571 %}
15572 
15573 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15574                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15575 %{
15576   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15577   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15578   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15579 
15580   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15581   ins_encode %{
15582     __ string_compare($str1$$Register, $str2$$Register,
15583                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15584                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15585   %}
15586   ins_pipe( pipe_slow );
15587 %}
15588 
15589 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15590                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15591 %{
15592   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15593   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15594   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15595 
15596   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15597   ins_encode %{
15598     __ string_compare($str1$$Register, $str2$$Register,
15599                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15600                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15601   %}
15602   ins_pipe( pipe_slow );
15603 %}
15604 
15605 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15606                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15607 %{
15608   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15609   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15610   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15611 
15612   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15613   ins_encode %{
15614     __ string_compare($str1$$Register, $str2$$Register,
15615                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15616                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15617   %}
15618   ins_pipe( pipe_slow );
15619 %}
15620 
15621 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15622                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15623 %{
15624   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15625   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15626   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15627 
15628   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15629   ins_encode %{
15630     __ string_compare($str1$$Register, $str2$$Register,
15631                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15632                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15633   %}
15634   ins_pipe( pipe_slow );
15635 %}
15636 
15637 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15638                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15639 %{
15640   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15641   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15642   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15643 
15644   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15645   ins_encode %{
15646     __ string_compare($str1$$Register, $str2$$Register,
15647                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15648                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15649   %}
15650   ins_pipe( pipe_slow );
15651 %}
15652 
15653 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15654                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15655 %{
15656   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15657   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15658   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15659 
15660   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15661   ins_encode %{
15662     __ string_compare($str2$$Register, $str1$$Register,
15663                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15664                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15665   %}
15666   ins_pipe( pipe_slow );
15667 %}
15668 
15669 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15670                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15671 %{
15672   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15673   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15674   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15675 
15676   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15677   ins_encode %{
15678     __ string_compare($str2$$Register, $str1$$Register,
15679                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15680                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15681   %}
15682   ins_pipe( pipe_slow );
15683 %}
15684 
15685 // fast search of substring with known size.
15686 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15687                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15688 %{
15689   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15690   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15691   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15692 
15693   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15694   ins_encode %{
15695     int icnt2 = (int)$int_cnt2$$constant;
15696     if (icnt2 >= 16) {
15697       // IndexOf for constant substrings with size >= 16 elements
15698       // which don't need to be loaded through stack.
15699       __ string_indexofC8($str1$$Register, $str2$$Register,
15700                           $cnt1$$Register, $cnt2$$Register,
15701                           icnt2, $result$$Register,
15702                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15703     } else {
15704       // Small strings are loaded through stack if they cross page boundary.
15705       __ string_indexof($str1$$Register, $str2$$Register,
15706                         $cnt1$$Register, $cnt2$$Register,
15707                         icnt2, $result$$Register,
15708                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15709     }
15710   %}
15711   ins_pipe( pipe_slow );
15712 %}
15713 
15714 // fast search of substring with known size.
15715 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15716                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15717 %{
15718   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15719   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15720   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15721 
15722   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15723   ins_encode %{
15724     int icnt2 = (int)$int_cnt2$$constant;
15725     if (icnt2 >= 8) {
15726       // IndexOf for constant substrings with size >= 8 elements
15727       // which don't need to be loaded through stack.
15728       __ string_indexofC8($str1$$Register, $str2$$Register,
15729                           $cnt1$$Register, $cnt2$$Register,
15730                           icnt2, $result$$Register,
15731                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15732     } else {
15733       // Small strings are loaded through stack if they cross page boundary.
15734       __ string_indexof($str1$$Register, $str2$$Register,
15735                         $cnt1$$Register, $cnt2$$Register,
15736                         icnt2, $result$$Register,
15737                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15738     }
15739   %}
15740   ins_pipe( pipe_slow );
15741 %}
15742 
15743 // fast search of substring with known size.
15744 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15745                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15746 %{
15747   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15748   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15749   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15750 
15751   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15752   ins_encode %{
15753     int icnt2 = (int)$int_cnt2$$constant;
15754     if (icnt2 >= 8) {
15755       // IndexOf for constant substrings with size >= 8 elements
15756       // which don't need to be loaded through stack.
15757       __ string_indexofC8($str1$$Register, $str2$$Register,
15758                           $cnt1$$Register, $cnt2$$Register,
15759                           icnt2, $result$$Register,
15760                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15761     } else {
15762       // Small strings are loaded through stack if they cross page boundary.
15763       __ string_indexof($str1$$Register, $str2$$Register,
15764                         $cnt1$$Register, $cnt2$$Register,
15765                         icnt2, $result$$Register,
15766                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15767     }
15768   %}
15769   ins_pipe( pipe_slow );
15770 %}
15771 
15772 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15773                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15774 %{
15775   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15776   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15777   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15778 
15779   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15780   ins_encode %{
15781     __ string_indexof($str1$$Register, $str2$$Register,
15782                       $cnt1$$Register, $cnt2$$Register,
15783                       (-1), $result$$Register,
15784                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15785   %}
15786   ins_pipe( pipe_slow );
15787 %}
15788 
15789 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15790                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15791 %{
15792   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15793   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15794   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15795 
15796   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15797   ins_encode %{
15798     __ string_indexof($str1$$Register, $str2$$Register,
15799                       $cnt1$$Register, $cnt2$$Register,
15800                       (-1), $result$$Register,
15801                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15802   %}
15803   ins_pipe( pipe_slow );
15804 %}
15805 
15806 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15807                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15808 %{
15809   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15810   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15811   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15812 
15813   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15814   ins_encode %{
15815     __ string_indexof($str1$$Register, $str2$$Register,
15816                       $cnt1$$Register, $cnt2$$Register,
15817                       (-1), $result$$Register,
15818                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15819   %}
15820   ins_pipe( pipe_slow );
15821 %}
15822 
15823 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15824                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15825 %{
15826   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15827   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15828   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15829   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15830   ins_encode %{
15831     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15832                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15833   %}
15834   ins_pipe( pipe_slow );
15835 %}
15836 
15837 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15838                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15839 %{
15840   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15841   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15842   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15843   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15844   ins_encode %{
15845     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15846                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15847   %}
15848   ins_pipe( pipe_slow );
15849 %}
15850 
15851 // fast string equals
15852 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15853                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15854 %{
15855   predicate(!VM_Version::supports_avx512vlbw());
15856   match(Set result (StrEquals (Binary str1 str2) cnt));
15857   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15858 
15859   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15860   ins_encode %{
15861     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15862                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15863                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15864   %}
15865   ins_pipe( pipe_slow );
15866 %}
15867 
15868 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15869                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15870 %{
15871   predicate(VM_Version::supports_avx512vlbw());
15872   match(Set result (StrEquals (Binary str1 str2) cnt));
15873   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15874 
15875   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15876   ins_encode %{
15877     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15878                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15879                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15880   %}
15881   ins_pipe( pipe_slow );
15882 %}
15883 
15884 // fast array equals
15885 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15886                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15887 %{
15888   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15889   match(Set result (AryEq ary1 ary2));
15890   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15891 
15892   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15893   ins_encode %{
15894     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15895                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15896                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15897   %}
15898   ins_pipe( pipe_slow );
15899 %}
15900 
15901 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15902                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15903 %{
15904   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15905   match(Set result (AryEq ary1 ary2));
15906   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15907 
15908   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15909   ins_encode %{
15910     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15911                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15912                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15913   %}
15914   ins_pipe( pipe_slow );
15915 %}
15916 
15917 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15918                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15919 %{
15920   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15921   match(Set result (AryEq ary1 ary2));
15922   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15923 
15924   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15925   ins_encode %{
15926     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15927                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15928                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15929   %}
15930   ins_pipe( pipe_slow );
15931 %}
15932 
15933 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15934                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15935 %{
15936   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15937   match(Set result (AryEq ary1 ary2));
15938   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15939 
15940   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15941   ins_encode %{
15942     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15943                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15944                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15945   %}
15946   ins_pipe( pipe_slow );
15947 %}
15948 
15949 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15950                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15951                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15952                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15953                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15954 %{
15955   predicate(UseAVX >= 2);
15956   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15957   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15958          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15959          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15960          USE basic_type, KILL cr);
15961 
15962   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15963   ins_encode %{
15964     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15965                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15966                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15967                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15968                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15969                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15970                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15971   %}
15972   ins_pipe( pipe_slow );
15973 %}
15974 
15975 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15976                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15977 %{
15978   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15979   match(Set result (CountPositives ary1 len));
15980   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15981 
15982   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15983   ins_encode %{
15984     __ count_positives($ary1$$Register, $len$$Register,
15985                        $result$$Register, $tmp3$$Register,
15986                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15987   %}
15988   ins_pipe( pipe_slow );
15989 %}
15990 
15991 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15992                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15993 %{
15994   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15995   match(Set result (CountPositives ary1 len));
15996   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15997 
15998   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15999   ins_encode %{
16000     __ count_positives($ary1$$Register, $len$$Register,
16001                        $result$$Register, $tmp3$$Register,
16002                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16003   %}
16004   ins_pipe( pipe_slow );
16005 %}
16006 
16007 // fast char[] to byte[] compression
16008 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16009                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16010   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16011   match(Set result (StrCompressedCopy src (Binary dst len)));
16012   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16013          USE_KILL len, KILL tmp5, KILL cr);
16014 
16015   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16016   ins_encode %{
16017     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16018                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16019                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16020                            knoreg, knoreg);
16021   %}
16022   ins_pipe( pipe_slow );
16023 %}
16024 
16025 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16026                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16027   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16028   match(Set result (StrCompressedCopy src (Binary dst len)));
16029   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16030          USE_KILL len, KILL tmp5, KILL cr);
16031 
16032   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16033   ins_encode %{
16034     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16035                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16036                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16037                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16038   %}
16039   ins_pipe( pipe_slow );
16040 %}
16041 // fast byte[] to char[] inflation
16042 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16043                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16044   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16045   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16046   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16047 
16048   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16049   ins_encode %{
16050     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16051                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16052   %}
16053   ins_pipe( pipe_slow );
16054 %}
16055 
16056 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16057                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16058   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16059   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16060   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16061 
16062   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16063   ins_encode %{
16064     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16065                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16066   %}
16067   ins_pipe( pipe_slow );
16068 %}
16069 
16070 // encode char[] to byte[] in ISO_8859_1
16071 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16072                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16073                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16074   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16075   match(Set result (EncodeISOArray src (Binary dst len)));
16076   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16077 
16078   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16079   ins_encode %{
16080     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16081                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16082                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16083   %}
16084   ins_pipe( pipe_slow );
16085 %}
16086 
16087 // encode char[] to byte[] in ASCII
16088 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16089                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16090                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16091   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16092   match(Set result (EncodeISOArray src (Binary dst len)));
16093   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16094 
16095   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16096   ins_encode %{
16097     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16098                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16099                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16100   %}
16101   ins_pipe( pipe_slow );
16102 %}
16103 
16104 //----------Overflow Math Instructions-----------------------------------------
16105 
16106 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16107 %{
16108   match(Set cr (OverflowAddI op1 op2));
16109   effect(DEF cr, USE_KILL op1, USE op2);
16110 
16111   format %{ "addl    $op1, $op2\t# overflow check int" %}
16112 
16113   ins_encode %{
16114     __ addl($op1$$Register, $op2$$Register);
16115   %}
16116   ins_pipe(ialu_reg_reg);
16117 %}
16118 
16119 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16120 %{
16121   match(Set cr (OverflowAddI op1 op2));
16122   effect(DEF cr, USE_KILL op1, USE op2);
16123 
16124   format %{ "addl    $op1, $op2\t# overflow check int" %}
16125 
16126   ins_encode %{
16127     __ addl($op1$$Register, $op2$$constant);
16128   %}
16129   ins_pipe(ialu_reg_reg);
16130 %}
16131 
16132 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16133 %{
16134   match(Set cr (OverflowAddL op1 op2));
16135   effect(DEF cr, USE_KILL op1, USE op2);
16136 
16137   format %{ "addq    $op1, $op2\t# overflow check long" %}
16138   ins_encode %{
16139     __ addq($op1$$Register, $op2$$Register);
16140   %}
16141   ins_pipe(ialu_reg_reg);
16142 %}
16143 
16144 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16145 %{
16146   match(Set cr (OverflowAddL op1 op2));
16147   effect(DEF cr, USE_KILL op1, USE op2);
16148 
16149   format %{ "addq    $op1, $op2\t# overflow check long" %}
16150   ins_encode %{
16151     __ addq($op1$$Register, $op2$$constant);
16152   %}
16153   ins_pipe(ialu_reg_reg);
16154 %}
16155 
16156 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16157 %{
16158   match(Set cr (OverflowSubI op1 op2));
16159 
16160   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16161   ins_encode %{
16162     __ cmpl($op1$$Register, $op2$$Register);
16163   %}
16164   ins_pipe(ialu_reg_reg);
16165 %}
16166 
16167 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16168 %{
16169   match(Set cr (OverflowSubI op1 op2));
16170 
16171   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16172   ins_encode %{
16173     __ cmpl($op1$$Register, $op2$$constant);
16174   %}
16175   ins_pipe(ialu_reg_reg);
16176 %}
16177 
16178 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16179 %{
16180   match(Set cr (OverflowSubL op1 op2));
16181 
16182   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16183   ins_encode %{
16184     __ cmpq($op1$$Register, $op2$$Register);
16185   %}
16186   ins_pipe(ialu_reg_reg);
16187 %}
16188 
16189 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16190 %{
16191   match(Set cr (OverflowSubL op1 op2));
16192 
16193   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16194   ins_encode %{
16195     __ cmpq($op1$$Register, $op2$$constant);
16196   %}
16197   ins_pipe(ialu_reg_reg);
16198 %}
16199 
16200 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16201 %{
16202   match(Set cr (OverflowSubI zero op2));
16203   effect(DEF cr, USE_KILL op2);
16204 
16205   format %{ "negl    $op2\t# overflow check int" %}
16206   ins_encode %{
16207     __ negl($op2$$Register);
16208   %}
16209   ins_pipe(ialu_reg_reg);
16210 %}
16211 
16212 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16213 %{
16214   match(Set cr (OverflowSubL zero op2));
16215   effect(DEF cr, USE_KILL op2);
16216 
16217   format %{ "negq    $op2\t# overflow check long" %}
16218   ins_encode %{
16219     __ negq($op2$$Register);
16220   %}
16221   ins_pipe(ialu_reg_reg);
16222 %}
16223 
16224 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16225 %{
16226   match(Set cr (OverflowMulI op1 op2));
16227   effect(DEF cr, USE_KILL op1, USE op2);
16228 
16229   format %{ "imull    $op1, $op2\t# overflow check int" %}
16230   ins_encode %{
16231     __ imull($op1$$Register, $op2$$Register);
16232   %}
16233   ins_pipe(ialu_reg_reg_alu0);
16234 %}
16235 
16236 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16237 %{
16238   match(Set cr (OverflowMulI op1 op2));
16239   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16240 
16241   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16242   ins_encode %{
16243     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16244   %}
16245   ins_pipe(ialu_reg_reg_alu0);
16246 %}
16247 
16248 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16249 %{
16250   match(Set cr (OverflowMulL op1 op2));
16251   effect(DEF cr, USE_KILL op1, USE op2);
16252 
16253   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16254   ins_encode %{
16255     __ imulq($op1$$Register, $op2$$Register);
16256   %}
16257   ins_pipe(ialu_reg_reg_alu0);
16258 %}
16259 
16260 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16261 %{
16262   match(Set cr (OverflowMulL op1 op2));
16263   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16264 
16265   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16266   ins_encode %{
16267     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16268   %}
16269   ins_pipe(ialu_reg_reg_alu0);
16270 %}
16271 
16272 
16273 //----------Control Flow Instructions------------------------------------------
16274 // Signed compare Instructions
16275 
16276 // XXX more variants!!
16277 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16278 %{
16279   match(Set cr (CmpI op1 op2));
16280   effect(DEF cr, USE op1, USE op2);
16281 
16282   format %{ "cmpl    $op1, $op2" %}
16283   ins_encode %{
16284     __ cmpl($op1$$Register, $op2$$Register);
16285   %}
16286   ins_pipe(ialu_cr_reg_reg);
16287 %}
16288 
16289 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16290 %{
16291   match(Set cr (CmpI op1 op2));
16292 
16293   format %{ "cmpl    $op1, $op2" %}
16294   ins_encode %{
16295     __ cmpl($op1$$Register, $op2$$constant);
16296   %}
16297   ins_pipe(ialu_cr_reg_imm);
16298 %}
16299 
16300 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16301 %{
16302   match(Set cr (CmpI op1 (LoadI op2)));
16303 
16304   ins_cost(500); // XXX
16305   format %{ "cmpl    $op1, $op2" %}
16306   ins_encode %{
16307     __ cmpl($op1$$Register, $op2$$Address);
16308   %}
16309   ins_pipe(ialu_cr_reg_mem);
16310 %}
16311 
16312 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16313 %{
16314   match(Set cr (CmpI src zero));
16315 
16316   format %{ "testl   $src, $src" %}
16317   ins_encode %{
16318     __ testl($src$$Register, $src$$Register);
16319   %}
16320   ins_pipe(ialu_cr_reg_imm);
16321 %}
16322 
16323 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16324 %{
16325   match(Set cr (CmpI (AndI src con) zero));
16326 
16327   format %{ "testl   $src, $con" %}
16328   ins_encode %{
16329     __ testl($src$$Register, $con$$constant);
16330   %}
16331   ins_pipe(ialu_cr_reg_imm);
16332 %}
16333 
16334 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16335 %{
16336   match(Set cr (CmpI (AndI src1 src2) zero));
16337 
16338   format %{ "testl   $src1, $src2" %}
16339   ins_encode %{
16340     __ testl($src1$$Register, $src2$$Register);
16341   %}
16342   ins_pipe(ialu_cr_reg_imm);
16343 %}
16344 
16345 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16346 %{
16347   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16348 
16349   format %{ "testl   $src, $mem" %}
16350   ins_encode %{
16351     __ testl($src$$Register, $mem$$Address);
16352   %}
16353   ins_pipe(ialu_cr_reg_mem);
16354 %}
16355 
16356 // Unsigned compare Instructions; really, same as signed except they
16357 // produce an rFlagsRegU instead of rFlagsReg.
16358 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16359 %{
16360   match(Set cr (CmpU op1 op2));
16361 
16362   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16363   ins_encode %{
16364     __ cmpl($op1$$Register, $op2$$Register);
16365   %}
16366   ins_pipe(ialu_cr_reg_reg);
16367 %}
16368 
16369 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16370 %{
16371   match(Set cr (CmpU op1 op2));
16372 
16373   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16374   ins_encode %{
16375     __ cmpl($op1$$Register, $op2$$constant);
16376   %}
16377   ins_pipe(ialu_cr_reg_imm);
16378 %}
16379 
16380 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16381 %{
16382   match(Set cr (CmpU op1 (LoadI op2)));
16383 
16384   ins_cost(500); // XXX
16385   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16386   ins_encode %{
16387     __ cmpl($op1$$Register, $op2$$Address);
16388   %}
16389   ins_pipe(ialu_cr_reg_mem);
16390 %}
16391 
16392 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16393 %{
16394   match(Set cr (CmpU src zero));
16395 
16396   format %{ "testl   $src, $src\t# unsigned" %}
16397   ins_encode %{
16398     __ testl($src$$Register, $src$$Register);
16399   %}
16400   ins_pipe(ialu_cr_reg_imm);
16401 %}
16402 
16403 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16404 %{
16405   match(Set cr (CmpP op1 op2));
16406 
16407   format %{ "cmpq    $op1, $op2\t# ptr" %}
16408   ins_encode %{
16409     __ cmpq($op1$$Register, $op2$$Register);
16410   %}
16411   ins_pipe(ialu_cr_reg_reg);
16412 %}
16413 
16414 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16415 %{
16416   match(Set cr (CmpP op1 (LoadP op2)));
16417   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16418 
16419   ins_cost(500); // XXX
16420   format %{ "cmpq    $op1, $op2\t# ptr" %}
16421   ins_encode %{
16422     __ cmpq($op1$$Register, $op2$$Address);
16423   %}
16424   ins_pipe(ialu_cr_reg_mem);
16425 %}
16426 
16427 // XXX this is generalized by compP_rReg_mem???
16428 // Compare raw pointer (used in out-of-heap check).
16429 // Only works because non-oop pointers must be raw pointers
16430 // and raw pointers have no anti-dependencies.
16431 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16432 %{
16433   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16434             n->in(2)->as_Load()->barrier_data() == 0);
16435   match(Set cr (CmpP op1 (LoadP op2)));
16436 
16437   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16438   ins_encode %{
16439     __ cmpq($op1$$Register, $op2$$Address);
16440   %}
16441   ins_pipe(ialu_cr_reg_mem);
16442 %}
16443 
16444 // This will generate a signed flags result. This should be OK since
16445 // any compare to a zero should be eq/neq.
16446 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16447 %{
16448   match(Set cr (CmpP src zero));
16449 
16450   format %{ "testq   $src, $src\t# ptr" %}
16451   ins_encode %{
16452     __ testq($src$$Register, $src$$Register);
16453   %}
16454   ins_pipe(ialu_cr_reg_imm);
16455 %}
16456 
16457 // This will generate a signed flags result. This should be OK since
16458 // any compare to a zero should be eq/neq.
16459 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16460 %{
16461   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16462             n->in(1)->as_Load()->barrier_data() == 0);
16463   match(Set cr (CmpP (LoadP op) zero));
16464 
16465   ins_cost(500); // XXX
16466   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16467   ins_encode %{
16468     __ testq($op$$Address, 0xFFFFFFFF);
16469   %}
16470   ins_pipe(ialu_cr_reg_imm);
16471 %}
16472 
16473 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16474 %{
16475   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16476             n->in(1)->as_Load()->barrier_data() == 0);
16477   match(Set cr (CmpP (LoadP mem) zero));
16478 
16479   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16480   ins_encode %{
16481     __ cmpq(r12, $mem$$Address);
16482   %}
16483   ins_pipe(ialu_cr_reg_mem);
16484 %}
16485 
16486 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16487 %{
16488   match(Set cr (CmpN op1 op2));
16489 
16490   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16491   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16492   ins_pipe(ialu_cr_reg_reg);
16493 %}
16494 
16495 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16496 %{
16497   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16498   match(Set cr (CmpN src (LoadN mem)));
16499 
16500   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16501   ins_encode %{
16502     __ cmpl($src$$Register, $mem$$Address);
16503   %}
16504   ins_pipe(ialu_cr_reg_mem);
16505 %}
16506 
16507 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16508   match(Set cr (CmpN op1 op2));
16509 
16510   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16511   ins_encode %{
16512     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16513   %}
16514   ins_pipe(ialu_cr_reg_imm);
16515 %}
16516 
16517 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16518 %{
16519   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16520   match(Set cr (CmpN src (LoadN mem)));
16521 
16522   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16523   ins_encode %{
16524     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16525   %}
16526   ins_pipe(ialu_cr_reg_mem);
16527 %}
16528 
16529 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16530   match(Set cr (CmpN op1 op2));
16531 
16532   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16533   ins_encode %{
16534     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16535   %}
16536   ins_pipe(ialu_cr_reg_imm);
16537 %}
16538 
16539 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16540 %{
16541   predicate(!UseCompactObjectHeaders);
16542   match(Set cr (CmpN src (LoadNKlass mem)));
16543 
16544   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16545   ins_encode %{
16546     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16547   %}
16548   ins_pipe(ialu_cr_reg_mem);
16549 %}
16550 
16551 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16552   match(Set cr (CmpN src zero));
16553 
16554   format %{ "testl   $src, $src\t# compressed ptr" %}
16555   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16556   ins_pipe(ialu_cr_reg_imm);
16557 %}
16558 
16559 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16560 %{
16561   predicate(CompressedOops::base() != nullptr &&
16562             n->in(1)->as_Load()->barrier_data() == 0);
16563   match(Set cr (CmpN (LoadN mem) zero));
16564 
16565   ins_cost(500); // XXX
16566   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16567   ins_encode %{
16568     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16569   %}
16570   ins_pipe(ialu_cr_reg_mem);
16571 %}
16572 
16573 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16574 %{
16575   predicate(CompressedOops::base() == nullptr &&
16576             n->in(1)->as_Load()->barrier_data() == 0);
16577   match(Set cr (CmpN (LoadN mem) zero));
16578 
16579   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16580   ins_encode %{
16581     __ cmpl(r12, $mem$$Address);
16582   %}
16583   ins_pipe(ialu_cr_reg_mem);
16584 %}
16585 
16586 // Yanked all unsigned pointer compare operations.
16587 // Pointer compares are done with CmpP which is already unsigned.
16588 
16589 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16590 %{
16591   match(Set cr (CmpL op1 op2));
16592 
16593   format %{ "cmpq    $op1, $op2" %}
16594   ins_encode %{
16595     __ cmpq($op1$$Register, $op2$$Register);
16596   %}
16597   ins_pipe(ialu_cr_reg_reg);
16598 %}
16599 
16600 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16601 %{
16602   match(Set cr (CmpL op1 op2));
16603 
16604   format %{ "cmpq    $op1, $op2" %}
16605   ins_encode %{
16606     __ cmpq($op1$$Register, $op2$$constant);
16607   %}
16608   ins_pipe(ialu_cr_reg_imm);
16609 %}
16610 
16611 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16612 %{
16613   match(Set cr (CmpL op1 (LoadL op2)));
16614 
16615   format %{ "cmpq    $op1, $op2" %}
16616   ins_encode %{
16617     __ cmpq($op1$$Register, $op2$$Address);
16618   %}
16619   ins_pipe(ialu_cr_reg_mem);
16620 %}
16621 
16622 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16623 %{
16624   match(Set cr (CmpL src zero));
16625 
16626   format %{ "testq   $src, $src" %}
16627   ins_encode %{
16628     __ testq($src$$Register, $src$$Register);
16629   %}
16630   ins_pipe(ialu_cr_reg_imm);
16631 %}
16632 
16633 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16634 %{
16635   match(Set cr (CmpL (AndL src con) zero));
16636 
16637   format %{ "testq   $src, $con\t# long" %}
16638   ins_encode %{
16639     __ testq($src$$Register, $con$$constant);
16640   %}
16641   ins_pipe(ialu_cr_reg_imm);
16642 %}
16643 
16644 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16645 %{
16646   match(Set cr (CmpL (AndL src1 src2) zero));
16647 
16648   format %{ "testq   $src1, $src2\t# long" %}
16649   ins_encode %{
16650     __ testq($src1$$Register, $src2$$Register);
16651   %}
16652   ins_pipe(ialu_cr_reg_imm);
16653 %}
16654 
16655 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16656 %{
16657   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16658 
16659   format %{ "testq   $src, $mem" %}
16660   ins_encode %{
16661     __ testq($src$$Register, $mem$$Address);
16662   %}
16663   ins_pipe(ialu_cr_reg_mem);
16664 %}
16665 
16666 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16667 %{
16668   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16669 
16670   format %{ "testq   $src, $mem" %}
16671   ins_encode %{
16672     __ testq($src$$Register, $mem$$Address);
16673   %}
16674   ins_pipe(ialu_cr_reg_mem);
16675 %}
16676 
16677 // Manifest a CmpU result in an integer register.  Very painful.
16678 // This is the test to avoid.
16679 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16680 %{
16681   match(Set dst (CmpU3 src1 src2));
16682   effect(KILL flags);
16683 
16684   ins_cost(275); // XXX
16685   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16686             "movl    $dst, -1\n\t"
16687             "jb,u    done\n\t"
16688             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16689     "done:" %}
16690   ins_encode %{
16691     Label done;
16692     __ cmpl($src1$$Register, $src2$$Register);
16693     __ movl($dst$$Register, -1);
16694     __ jccb(Assembler::below, done);
16695     __ setcc(Assembler::notZero, $dst$$Register);
16696     __ bind(done);
16697   %}
16698   ins_pipe(pipe_slow);
16699 %}
16700 
16701 // Manifest a CmpL result in an integer register.  Very painful.
16702 // This is the test to avoid.
16703 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16704 %{
16705   match(Set dst (CmpL3 src1 src2));
16706   effect(KILL flags);
16707 
16708   ins_cost(275); // XXX
16709   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16710             "movl    $dst, -1\n\t"
16711             "jl,s    done\n\t"
16712             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16713     "done:" %}
16714   ins_encode %{
16715     Label done;
16716     __ cmpq($src1$$Register, $src2$$Register);
16717     __ movl($dst$$Register, -1);
16718     __ jccb(Assembler::less, done);
16719     __ setcc(Assembler::notZero, $dst$$Register);
16720     __ bind(done);
16721   %}
16722   ins_pipe(pipe_slow);
16723 %}
16724 
16725 // Manifest a CmpUL result in an integer register.  Very painful.
16726 // This is the test to avoid.
16727 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16728 %{
16729   match(Set dst (CmpUL3 src1 src2));
16730   effect(KILL flags);
16731 
16732   ins_cost(275); // XXX
16733   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16734             "movl    $dst, -1\n\t"
16735             "jb,u    done\n\t"
16736             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16737     "done:" %}
16738   ins_encode %{
16739     Label done;
16740     __ cmpq($src1$$Register, $src2$$Register);
16741     __ movl($dst$$Register, -1);
16742     __ jccb(Assembler::below, done);
16743     __ setcc(Assembler::notZero, $dst$$Register);
16744     __ bind(done);
16745   %}
16746   ins_pipe(pipe_slow);
16747 %}
16748 
16749 // Unsigned long compare Instructions; really, same as signed long except they
16750 // produce an rFlagsRegU instead of rFlagsReg.
16751 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16752 %{
16753   match(Set cr (CmpUL op1 op2));
16754 
16755   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16756   ins_encode %{
16757     __ cmpq($op1$$Register, $op2$$Register);
16758   %}
16759   ins_pipe(ialu_cr_reg_reg);
16760 %}
16761 
16762 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16763 %{
16764   match(Set cr (CmpUL op1 op2));
16765 
16766   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16767   ins_encode %{
16768     __ cmpq($op1$$Register, $op2$$constant);
16769   %}
16770   ins_pipe(ialu_cr_reg_imm);
16771 %}
16772 
16773 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16774 %{
16775   match(Set cr (CmpUL op1 (LoadL op2)));
16776 
16777   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16778   ins_encode %{
16779     __ cmpq($op1$$Register, $op2$$Address);
16780   %}
16781   ins_pipe(ialu_cr_reg_mem);
16782 %}
16783 
16784 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16785 %{
16786   match(Set cr (CmpUL src zero));
16787 
16788   format %{ "testq   $src, $src\t# unsigned" %}
16789   ins_encode %{
16790     __ testq($src$$Register, $src$$Register);
16791   %}
16792   ins_pipe(ialu_cr_reg_imm);
16793 %}
16794 
16795 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16796 %{
16797   match(Set cr (CmpI (LoadB mem) imm));
16798 
16799   ins_cost(125);
16800   format %{ "cmpb    $mem, $imm" %}
16801   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16802   ins_pipe(ialu_cr_reg_mem);
16803 %}
16804 
16805 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16806 %{
16807   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16808 
16809   ins_cost(125);
16810   format %{ "testb   $mem, $imm\t# ubyte" %}
16811   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16812   ins_pipe(ialu_cr_reg_mem);
16813 %}
16814 
16815 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16816 %{
16817   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16818 
16819   ins_cost(125);
16820   format %{ "testb   $mem, $imm\t# byte" %}
16821   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16822   ins_pipe(ialu_cr_reg_mem);
16823 %}
16824 
16825 //----------Max and Min--------------------------------------------------------
16826 // Min Instructions
16827 
16828 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16829 %{
16830   predicate(!UseAPX);
16831   effect(USE_DEF dst, USE src, USE cr);
16832 
16833   format %{ "cmovlgt $dst, $src\t# min" %}
16834   ins_encode %{
16835     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16836   %}
16837   ins_pipe(pipe_cmov_reg);
16838 %}
16839 
16840 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16841 %{
16842   predicate(UseAPX);
16843   effect(DEF dst, USE src1, USE src2, USE cr);
16844 
16845   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16846   ins_encode %{
16847     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16848   %}
16849   ins_pipe(pipe_cmov_reg);
16850 %}
16851 
16852 instruct minI_rReg(rRegI dst, rRegI src)
16853 %{
16854   predicate(!UseAPX);
16855   match(Set dst (MinI dst src));
16856 
16857   ins_cost(200);
16858   expand %{
16859     rFlagsReg cr;
16860     compI_rReg(cr, dst, src);
16861     cmovI_reg_g(dst, src, cr);
16862   %}
16863 %}
16864 
16865 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16866 %{
16867   predicate(UseAPX);
16868   match(Set dst (MinI src1 src2));
16869   effect(DEF dst, USE src1, USE src2);
16870 
16871   ins_cost(200);
16872   expand %{
16873     rFlagsReg cr;
16874     compI_rReg(cr, src1, src2);
16875     cmovI_reg_g_ndd(dst, src1, src2, cr);
16876   %}
16877 %}
16878 
16879 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16880 %{
16881   predicate(!UseAPX);
16882   effect(USE_DEF dst, USE src, USE cr);
16883 
16884   format %{ "cmovllt $dst, $src\t# max" %}
16885   ins_encode %{
16886     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16887   %}
16888   ins_pipe(pipe_cmov_reg);
16889 %}
16890 
16891 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16892 %{
16893   predicate(UseAPX);
16894   effect(DEF dst, USE src1, USE src2, USE cr);
16895 
16896   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16897   ins_encode %{
16898     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16899   %}
16900   ins_pipe(pipe_cmov_reg);
16901 %}
16902 
16903 instruct maxI_rReg(rRegI dst, rRegI src)
16904 %{
16905   predicate(!UseAPX);
16906   match(Set dst (MaxI dst src));
16907 
16908   ins_cost(200);
16909   expand %{
16910     rFlagsReg cr;
16911     compI_rReg(cr, dst, src);
16912     cmovI_reg_l(dst, src, cr);
16913   %}
16914 %}
16915 
16916 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16917 %{
16918   predicate(UseAPX);
16919   match(Set dst (MaxI src1 src2));
16920   effect(DEF dst, USE src1, USE src2);
16921 
16922   ins_cost(200);
16923   expand %{
16924     rFlagsReg cr;
16925     compI_rReg(cr, src1, src2);
16926     cmovI_reg_l_ndd(dst, src1, src2, cr);
16927   %}
16928 %}
16929 
16930 // ============================================================================
16931 // Branch Instructions
16932 
16933 // Jump Direct - Label defines a relative address from JMP+1
16934 instruct jmpDir(label labl)
16935 %{
16936   match(Goto);
16937   effect(USE labl);
16938 
16939   ins_cost(300);
16940   format %{ "jmp     $labl" %}
16941   size(5);
16942   ins_encode %{
16943     Label* L = $labl$$label;
16944     __ jmp(*L, false); // Always long jump
16945   %}
16946   ins_pipe(pipe_jmp);
16947 %}
16948 
16949 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16950 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16951 %{
16952   match(If cop cr);
16953   effect(USE labl);
16954 
16955   ins_cost(300);
16956   format %{ "j$cop     $labl" %}
16957   size(6);
16958   ins_encode %{
16959     Label* L = $labl$$label;
16960     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16961   %}
16962   ins_pipe(pipe_jcc);
16963 %}
16964 
16965 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16966 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16967 %{
16968   match(CountedLoopEnd cop cr);
16969   effect(USE labl);
16970 
16971   ins_cost(300);
16972   format %{ "j$cop     $labl\t# loop end" %}
16973   size(6);
16974   ins_encode %{
16975     Label* L = $labl$$label;
16976     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16977   %}
16978   ins_pipe(pipe_jcc);
16979 %}
16980 
16981 // Jump Direct Conditional - using unsigned comparison
16982 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16983   match(If cop cmp);
16984   effect(USE labl);
16985 
16986   ins_cost(300);
16987   format %{ "j$cop,u   $labl" %}
16988   size(6);
16989   ins_encode %{
16990     Label* L = $labl$$label;
16991     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16992   %}
16993   ins_pipe(pipe_jcc);
16994 %}
16995 
16996 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16997   match(If cop cmp);
16998   effect(USE labl);
16999 
17000   ins_cost(200);
17001   format %{ "j$cop,u   $labl" %}
17002   size(6);
17003   ins_encode %{
17004     Label* L = $labl$$label;
17005     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17006   %}
17007   ins_pipe(pipe_jcc);
17008 %}
17009 
17010 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17011   match(If cop cmp);
17012   effect(USE labl);
17013 
17014   ins_cost(200);
17015   format %{ $$template
17016     if ($cop$$cmpcode == Assembler::notEqual) {
17017       $$emit$$"jp,u    $labl\n\t"
17018       $$emit$$"j$cop,u   $labl"
17019     } else {
17020       $$emit$$"jp,u    done\n\t"
17021       $$emit$$"j$cop,u   $labl\n\t"
17022       $$emit$$"done:"
17023     }
17024   %}
17025   ins_encode %{
17026     Label* l = $labl$$label;
17027     if ($cop$$cmpcode == Assembler::notEqual) {
17028       __ jcc(Assembler::parity, *l, false);
17029       __ jcc(Assembler::notEqual, *l, false);
17030     } else if ($cop$$cmpcode == Assembler::equal) {
17031       Label done;
17032       __ jccb(Assembler::parity, done);
17033       __ jcc(Assembler::equal, *l, false);
17034       __ bind(done);
17035     } else {
17036        ShouldNotReachHere();
17037     }
17038   %}
17039   ins_pipe(pipe_jcc);
17040 %}
17041 
17042 // ============================================================================
17043 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17044 // superklass array for an instance of the superklass.  Set a hidden
17045 // internal cache on a hit (cache is checked with exposed code in
17046 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17047 // encoding ALSO sets flags.
17048 
17049 instruct partialSubtypeCheck(rdi_RegP result,
17050                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17051                              rFlagsReg cr)
17052 %{
17053   match(Set result (PartialSubtypeCheck sub super));
17054   predicate(!UseSecondarySupersTable);
17055   effect(KILL rcx, KILL cr);
17056 
17057   ins_cost(1100);  // slightly larger than the next version
17058   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17059             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17060             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17061             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17062             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17063             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17064             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17065     "miss:\t" %}
17066 
17067   ins_encode %{
17068     Label miss;
17069     // NB: Callers may assume that, when $result is a valid register,
17070     // check_klass_subtype_slow_path_linear sets it to a nonzero
17071     // value.
17072     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17073                                             $rcx$$Register, $result$$Register,
17074                                             nullptr, &miss,
17075                                             /*set_cond_codes:*/ true);
17076     __ xorptr($result$$Register, $result$$Register);
17077     __ bind(miss);
17078   %}
17079 
17080   ins_pipe(pipe_slow);
17081 %}
17082 
17083 // ============================================================================
17084 // Two versions of hashtable-based partialSubtypeCheck, both used when
17085 // we need to search for a super class in the secondary supers array.
17086 // The first is used when we don't know _a priori_ the class being
17087 // searched for. The second, far more common, is used when we do know:
17088 // this is used for instanceof, checkcast, and any case where C2 can
17089 // determine it by constant propagation.
17090 
17091 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17092                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17093                                        rFlagsReg cr)
17094 %{
17095   match(Set result (PartialSubtypeCheck sub super));
17096   predicate(UseSecondarySupersTable);
17097   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17098 
17099   ins_cost(1000);
17100   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17101 
17102   ins_encode %{
17103     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17104 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17105   %}
17106 
17107   ins_pipe(pipe_slow);
17108 %}
17109 
17110 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17111                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17112                                        rFlagsReg cr)
17113 %{
17114   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17115   predicate(UseSecondarySupersTable);
17116   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17117 
17118   ins_cost(700);  // smaller than the next version
17119   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17120 
17121   ins_encode %{
17122     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17123     if (InlineSecondarySupersTest) {
17124       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17125                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17126                                        super_klass_slot);
17127     } else {
17128       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17129     }
17130   %}
17131 
17132   ins_pipe(pipe_slow);
17133 %}
17134 
17135 // ============================================================================
17136 // Branch Instructions -- short offset versions
17137 //
17138 // These instructions are used to replace jumps of a long offset (the default
17139 // match) with jumps of a shorter offset.  These instructions are all tagged
17140 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17141 // match rules in general matching.  Instead, the ADLC generates a conversion
17142 // method in the MachNode which can be used to do in-place replacement of the
17143 // long variant with the shorter variant.  The compiler will determine if a
17144 // branch can be taken by the is_short_branch_offset() predicate in the machine
17145 // specific code section of the file.
17146 
17147 // Jump Direct - Label defines a relative address from JMP+1
17148 instruct jmpDir_short(label labl) %{
17149   match(Goto);
17150   effect(USE labl);
17151 
17152   ins_cost(300);
17153   format %{ "jmp,s   $labl" %}
17154   size(2);
17155   ins_encode %{
17156     Label* L = $labl$$label;
17157     __ jmpb(*L);
17158   %}
17159   ins_pipe(pipe_jmp);
17160   ins_short_branch(1);
17161 %}
17162 
17163 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17164 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17165   match(If cop cr);
17166   effect(USE labl);
17167 
17168   ins_cost(300);
17169   format %{ "j$cop,s   $labl" %}
17170   size(2);
17171   ins_encode %{
17172     Label* L = $labl$$label;
17173     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17174   %}
17175   ins_pipe(pipe_jcc);
17176   ins_short_branch(1);
17177 %}
17178 
17179 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17180 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17181   match(CountedLoopEnd cop cr);
17182   effect(USE labl);
17183 
17184   ins_cost(300);
17185   format %{ "j$cop,s   $labl\t# loop end" %}
17186   size(2);
17187   ins_encode %{
17188     Label* L = $labl$$label;
17189     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17190   %}
17191   ins_pipe(pipe_jcc);
17192   ins_short_branch(1);
17193 %}
17194 
17195 // Jump Direct Conditional - using unsigned comparison
17196 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17197   match(If cop cmp);
17198   effect(USE labl);
17199 
17200   ins_cost(300);
17201   format %{ "j$cop,us  $labl" %}
17202   size(2);
17203   ins_encode %{
17204     Label* L = $labl$$label;
17205     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17206   %}
17207   ins_pipe(pipe_jcc);
17208   ins_short_branch(1);
17209 %}
17210 
17211 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17212   match(If cop cmp);
17213   effect(USE labl);
17214 
17215   ins_cost(300);
17216   format %{ "j$cop,us  $labl" %}
17217   size(2);
17218   ins_encode %{
17219     Label* L = $labl$$label;
17220     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17221   %}
17222   ins_pipe(pipe_jcc);
17223   ins_short_branch(1);
17224 %}
17225 
17226 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17227   match(If cop cmp);
17228   effect(USE labl);
17229 
17230   ins_cost(300);
17231   format %{ $$template
17232     if ($cop$$cmpcode == Assembler::notEqual) {
17233       $$emit$$"jp,u,s  $labl\n\t"
17234       $$emit$$"j$cop,u,s  $labl"
17235     } else {
17236       $$emit$$"jp,u,s  done\n\t"
17237       $$emit$$"j$cop,u,s  $labl\n\t"
17238       $$emit$$"done:"
17239     }
17240   %}
17241   size(4);
17242   ins_encode %{
17243     Label* l = $labl$$label;
17244     if ($cop$$cmpcode == Assembler::notEqual) {
17245       __ jccb(Assembler::parity, *l);
17246       __ jccb(Assembler::notEqual, *l);
17247     } else if ($cop$$cmpcode == Assembler::equal) {
17248       Label done;
17249       __ jccb(Assembler::parity, done);
17250       __ jccb(Assembler::equal, *l);
17251       __ bind(done);
17252     } else {
17253        ShouldNotReachHere();
17254     }
17255   %}
17256   ins_pipe(pipe_jcc);
17257   ins_short_branch(1);
17258 %}
17259 
17260 // ============================================================================
17261 // inlined locking and unlocking
17262 
17263 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17264   match(Set cr (FastLock object box));
17265   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17266   ins_cost(300);
17267   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17268   ins_encode %{
17269     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17270   %}
17271   ins_pipe(pipe_slow);
17272 %}
17273 
17274 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17275   match(Set cr (FastUnlock object rax_reg));
17276   effect(TEMP tmp, USE_KILL rax_reg);
17277   ins_cost(300);
17278   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17279   ins_encode %{
17280     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17281   %}
17282   ins_pipe(pipe_slow);
17283 %}
17284 
17285 
17286 // ============================================================================
17287 // Safepoint Instructions
17288 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17289 %{
17290   match(SafePoint poll);
17291   effect(KILL cr, USE poll);
17292 
17293   format %{ "testl   rax, [$poll]\t"
17294             "# Safepoint: poll for GC" %}
17295   ins_cost(125);
17296   ins_encode %{
17297     __ relocate(relocInfo::poll_type);
17298     address pre_pc = __ pc();
17299     __ testl(rax, Address($poll$$Register, 0));
17300     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17301   %}
17302   ins_pipe(ialu_reg_mem);
17303 %}
17304 
17305 instruct mask_all_evexL(kReg dst, rRegL src) %{
17306   match(Set dst (MaskAll src));
17307   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17308   ins_encode %{
17309     int mask_len = Matcher::vector_length(this);
17310     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17311   %}
17312   ins_pipe( pipe_slow );
17313 %}
17314 
17315 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17316   predicate(Matcher::vector_length(n) > 32);
17317   match(Set dst (MaskAll src));
17318   effect(TEMP tmp);
17319   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17320   ins_encode %{
17321     int mask_len = Matcher::vector_length(this);
17322     __ movslq($tmp$$Register, $src$$Register);
17323     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17324   %}
17325   ins_pipe( pipe_slow );
17326 %}
17327 
17328 // ============================================================================
17329 // Procedure Call/Return Instructions
17330 // Call Java Static Instruction
17331 // Note: If this code changes, the corresponding ret_addr_offset() and
17332 //       compute_padding() functions will have to be adjusted.
17333 instruct CallStaticJavaDirect(method meth) %{
17334   match(CallStaticJava);
17335   effect(USE meth);
17336 
17337   ins_cost(300);
17338   format %{ "call,static " %}
17339   opcode(0xE8); /* E8 cd */
17340   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17341   ins_pipe(pipe_slow);
17342   ins_alignment(4);
17343 %}
17344 
17345 // Call Java Dynamic Instruction
17346 // Note: If this code changes, the corresponding ret_addr_offset() and
17347 //       compute_padding() functions will have to be adjusted.
17348 instruct CallDynamicJavaDirect(method meth)
17349 %{
17350   match(CallDynamicJava);
17351   effect(USE meth);
17352 
17353   ins_cost(300);
17354   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17355             "call,dynamic " %}
17356   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17357   ins_pipe(pipe_slow);
17358   ins_alignment(4);
17359 %}
17360 
17361 // Call Runtime Instruction
17362 instruct CallRuntimeDirect(method meth)
17363 %{
17364   match(CallRuntime);
17365   effect(USE meth);
17366 
17367   ins_cost(300);
17368   format %{ "call,runtime " %}
17369   ins_encode(clear_avx, Java_To_Runtime(meth));
17370   ins_pipe(pipe_slow);
17371 %}
17372 
17373 // Call runtime without safepoint
17374 instruct CallLeafDirect(method meth)
17375 %{
17376   match(CallLeaf);
17377   effect(USE meth);
17378 
17379   ins_cost(300);
17380   format %{ "call_leaf,runtime " %}
17381   ins_encode(clear_avx, Java_To_Runtime(meth));
17382   ins_pipe(pipe_slow);
17383 %}
17384 
17385 // Call runtime without safepoint and with vector arguments
17386 instruct CallLeafDirectVector(method meth)
17387 %{
17388   match(CallLeafVector);
17389   effect(USE meth);
17390 
17391   ins_cost(300);
17392   format %{ "call_leaf,vector " %}
17393   ins_encode(Java_To_Runtime(meth));
17394   ins_pipe(pipe_slow);
17395 %}
17396 
17397 // Call runtime without safepoint
17398 // entry point is null, target holds the address to call
17399 instruct CallLeafNoFPInDirect(rRegP target)
17400 %{
17401   predicate(n->as_Call()->entry_point() == nullptr);
17402   match(CallLeafNoFP target);
17403 
17404   ins_cost(300);
17405   format %{ "call_leaf_nofp,runtime indirect " %}
17406   ins_encode %{
17407      __ call($target$$Register);
17408   %}
17409 
17410   ins_pipe(pipe_slow);
17411 %}
17412 
17413 // Call runtime without safepoint
17414 instruct CallLeafNoFPDirect(method meth)
17415 %{
17416   predicate(n->as_Call()->entry_point() != nullptr);
17417   match(CallLeafNoFP);
17418   effect(USE meth);
17419 
17420   ins_cost(300);
17421   format %{ "call_leaf_nofp,runtime " %}
17422   ins_encode(clear_avx, Java_To_Runtime(meth));
17423   ins_pipe(pipe_slow);
17424 %}
17425 
17426 // Return Instruction
17427 // Remove the return address & jump to it.
17428 // Notice: We always emit a nop after a ret to make sure there is room
17429 // for safepoint patching
17430 instruct Ret()
17431 %{
17432   match(Return);
17433 
17434   format %{ "ret" %}
17435   ins_encode %{
17436     __ ret(0);
17437   %}
17438   ins_pipe(pipe_jmp);
17439 %}
17440 
17441 // Tail Call; Jump from runtime stub to Java code.
17442 // Also known as an 'interprocedural jump'.
17443 // Target of jump will eventually return to caller.
17444 // TailJump below removes the return address.
17445 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17446 // emitted just above the TailCall which has reset rbp to the caller state.
17447 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17448 %{
17449   match(TailCall jump_target method_ptr);
17450 
17451   ins_cost(300);
17452   format %{ "jmp     $jump_target\t# rbx holds method" %}
17453   ins_encode %{
17454     __ jmp($jump_target$$Register);
17455   %}
17456   ins_pipe(pipe_jmp);
17457 %}
17458 
17459 // Tail Jump; remove the return address; jump to target.
17460 // TailCall above leaves the return address around.
17461 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17462 %{
17463   match(TailJump jump_target ex_oop);
17464 
17465   ins_cost(300);
17466   format %{ "popq    rdx\t# pop return address\n\t"
17467             "jmp     $jump_target" %}
17468   ins_encode %{
17469     __ popq(as_Register(RDX_enc));
17470     __ jmp($jump_target$$Register);
17471   %}
17472   ins_pipe(pipe_jmp);
17473 %}
17474 
17475 // Forward exception.
17476 instruct ForwardExceptionjmp()
17477 %{
17478   match(ForwardException);
17479 
17480   format %{ "jmp     forward_exception_stub" %}
17481   ins_encode %{
17482     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17483   %}
17484   ins_pipe(pipe_jmp);
17485 %}
17486 
17487 // Create exception oop: created by stack-crawling runtime code.
17488 // Created exception is now available to this handler, and is setup
17489 // just prior to jumping to this handler.  No code emitted.
17490 instruct CreateException(rax_RegP ex_oop)
17491 %{
17492   match(Set ex_oop (CreateEx));
17493 
17494   size(0);
17495   // use the following format syntax
17496   format %{ "# exception oop is in rax; no code emitted" %}
17497   ins_encode();
17498   ins_pipe(empty);
17499 %}
17500 
17501 // Rethrow exception:
17502 // The exception oop will come in the first argument position.
17503 // Then JUMP (not call) to the rethrow stub code.
17504 instruct RethrowException()
17505 %{
17506   match(Rethrow);
17507 
17508   // use the following format syntax
17509   format %{ "jmp     rethrow_stub" %}
17510   ins_encode %{
17511     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17512   %}
17513   ins_pipe(pipe_jmp);
17514 %}
17515 
17516 // ============================================================================
17517 // This name is KNOWN by the ADLC and cannot be changed.
17518 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17519 // for this guy.
17520 instruct tlsLoadP(r15_RegP dst) %{
17521   match(Set dst (ThreadLocal));
17522   effect(DEF dst);
17523 
17524   size(0);
17525   format %{ "# TLS is in R15" %}
17526   ins_encode( /*empty encoding*/ );
17527   ins_pipe(ialu_reg_reg);
17528 %}
17529 
17530 instruct addF_reg(regF dst, regF src) %{
17531   predicate(UseAVX == 0);
17532   match(Set dst (AddF dst src));
17533 
17534   format %{ "addss   $dst, $src" %}
17535   ins_cost(150);
17536   ins_encode %{
17537     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17538   %}
17539   ins_pipe(pipe_slow);
17540 %}
17541 
17542 instruct addF_mem(regF dst, memory src) %{
17543   predicate(UseAVX == 0);
17544   match(Set dst (AddF dst (LoadF src)));
17545 
17546   format %{ "addss   $dst, $src" %}
17547   ins_cost(150);
17548   ins_encode %{
17549     __ addss($dst$$XMMRegister, $src$$Address);
17550   %}
17551   ins_pipe(pipe_slow);
17552 %}
17553 
17554 instruct addF_imm(regF dst, immF con) %{
17555   predicate(UseAVX == 0);
17556   match(Set dst (AddF dst con));
17557   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17558   ins_cost(150);
17559   ins_encode %{
17560     __ addss($dst$$XMMRegister, $constantaddress($con));
17561   %}
17562   ins_pipe(pipe_slow);
17563 %}
17564 
17565 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17566   predicate(UseAVX > 0);
17567   match(Set dst (AddF src1 src2));
17568 
17569   format %{ "vaddss  $dst, $src1, $src2" %}
17570   ins_cost(150);
17571   ins_encode %{
17572     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17573   %}
17574   ins_pipe(pipe_slow);
17575 %}
17576 
17577 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17578   predicate(UseAVX > 0);
17579   match(Set dst (AddF src1 (LoadF src2)));
17580 
17581   format %{ "vaddss  $dst, $src1, $src2" %}
17582   ins_cost(150);
17583   ins_encode %{
17584     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17585   %}
17586   ins_pipe(pipe_slow);
17587 %}
17588 
17589 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17590   predicate(UseAVX > 0);
17591   match(Set dst (AddF src con));
17592 
17593   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17594   ins_cost(150);
17595   ins_encode %{
17596     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17597   %}
17598   ins_pipe(pipe_slow);
17599 %}
17600 
17601 instruct addD_reg(regD dst, regD src) %{
17602   predicate(UseAVX == 0);
17603   match(Set dst (AddD dst src));
17604 
17605   format %{ "addsd   $dst, $src" %}
17606   ins_cost(150);
17607   ins_encode %{
17608     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17609   %}
17610   ins_pipe(pipe_slow);
17611 %}
17612 
17613 instruct addD_mem(regD dst, memory src) %{
17614   predicate(UseAVX == 0);
17615   match(Set dst (AddD dst (LoadD src)));
17616 
17617   format %{ "addsd   $dst, $src" %}
17618   ins_cost(150);
17619   ins_encode %{
17620     __ addsd($dst$$XMMRegister, $src$$Address);
17621   %}
17622   ins_pipe(pipe_slow);
17623 %}
17624 
17625 instruct addD_imm(regD dst, immD con) %{
17626   predicate(UseAVX == 0);
17627   match(Set dst (AddD dst con));
17628   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17629   ins_cost(150);
17630   ins_encode %{
17631     __ addsd($dst$$XMMRegister, $constantaddress($con));
17632   %}
17633   ins_pipe(pipe_slow);
17634 %}
17635 
17636 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17637   predicate(UseAVX > 0);
17638   match(Set dst (AddD src1 src2));
17639 
17640   format %{ "vaddsd  $dst, $src1, $src2" %}
17641   ins_cost(150);
17642   ins_encode %{
17643     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17644   %}
17645   ins_pipe(pipe_slow);
17646 %}
17647 
17648 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17649   predicate(UseAVX > 0);
17650   match(Set dst (AddD src1 (LoadD src2)));
17651 
17652   format %{ "vaddsd  $dst, $src1, $src2" %}
17653   ins_cost(150);
17654   ins_encode %{
17655     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17656   %}
17657   ins_pipe(pipe_slow);
17658 %}
17659 
17660 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17661   predicate(UseAVX > 0);
17662   match(Set dst (AddD src con));
17663 
17664   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17665   ins_cost(150);
17666   ins_encode %{
17667     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17668   %}
17669   ins_pipe(pipe_slow);
17670 %}
17671 
17672 instruct subF_reg(regF dst, regF src) %{
17673   predicate(UseAVX == 0);
17674   match(Set dst (SubF dst src));
17675 
17676   format %{ "subss   $dst, $src" %}
17677   ins_cost(150);
17678   ins_encode %{
17679     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17680   %}
17681   ins_pipe(pipe_slow);
17682 %}
17683 
17684 instruct subF_mem(regF dst, memory src) %{
17685   predicate(UseAVX == 0);
17686   match(Set dst (SubF dst (LoadF src)));
17687 
17688   format %{ "subss   $dst, $src" %}
17689   ins_cost(150);
17690   ins_encode %{
17691     __ subss($dst$$XMMRegister, $src$$Address);
17692   %}
17693   ins_pipe(pipe_slow);
17694 %}
17695 
17696 instruct subF_imm(regF dst, immF con) %{
17697   predicate(UseAVX == 0);
17698   match(Set dst (SubF dst con));
17699   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17700   ins_cost(150);
17701   ins_encode %{
17702     __ subss($dst$$XMMRegister, $constantaddress($con));
17703   %}
17704   ins_pipe(pipe_slow);
17705 %}
17706 
17707 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17708   predicate(UseAVX > 0);
17709   match(Set dst (SubF src1 src2));
17710 
17711   format %{ "vsubss  $dst, $src1, $src2" %}
17712   ins_cost(150);
17713   ins_encode %{
17714     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17715   %}
17716   ins_pipe(pipe_slow);
17717 %}
17718 
17719 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17720   predicate(UseAVX > 0);
17721   match(Set dst (SubF src1 (LoadF src2)));
17722 
17723   format %{ "vsubss  $dst, $src1, $src2" %}
17724   ins_cost(150);
17725   ins_encode %{
17726     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17727   %}
17728   ins_pipe(pipe_slow);
17729 %}
17730 
17731 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17732   predicate(UseAVX > 0);
17733   match(Set dst (SubF src con));
17734 
17735   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17736   ins_cost(150);
17737   ins_encode %{
17738     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17739   %}
17740   ins_pipe(pipe_slow);
17741 %}
17742 
17743 instruct subD_reg(regD dst, regD src) %{
17744   predicate(UseAVX == 0);
17745   match(Set dst (SubD dst src));
17746 
17747   format %{ "subsd   $dst, $src" %}
17748   ins_cost(150);
17749   ins_encode %{
17750     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17751   %}
17752   ins_pipe(pipe_slow);
17753 %}
17754 
17755 instruct subD_mem(regD dst, memory src) %{
17756   predicate(UseAVX == 0);
17757   match(Set dst (SubD dst (LoadD src)));
17758 
17759   format %{ "subsd   $dst, $src" %}
17760   ins_cost(150);
17761   ins_encode %{
17762     __ subsd($dst$$XMMRegister, $src$$Address);
17763   %}
17764   ins_pipe(pipe_slow);
17765 %}
17766 
17767 instruct subD_imm(regD dst, immD con) %{
17768   predicate(UseAVX == 0);
17769   match(Set dst (SubD dst con));
17770   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17771   ins_cost(150);
17772   ins_encode %{
17773     __ subsd($dst$$XMMRegister, $constantaddress($con));
17774   %}
17775   ins_pipe(pipe_slow);
17776 %}
17777 
17778 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17779   predicate(UseAVX > 0);
17780   match(Set dst (SubD src1 src2));
17781 
17782   format %{ "vsubsd  $dst, $src1, $src2" %}
17783   ins_cost(150);
17784   ins_encode %{
17785     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17786   %}
17787   ins_pipe(pipe_slow);
17788 %}
17789 
17790 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17791   predicate(UseAVX > 0);
17792   match(Set dst (SubD src1 (LoadD src2)));
17793 
17794   format %{ "vsubsd  $dst, $src1, $src2" %}
17795   ins_cost(150);
17796   ins_encode %{
17797     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17798   %}
17799   ins_pipe(pipe_slow);
17800 %}
17801 
17802 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17803   predicate(UseAVX > 0);
17804   match(Set dst (SubD src con));
17805 
17806   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17807   ins_cost(150);
17808   ins_encode %{
17809     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17810   %}
17811   ins_pipe(pipe_slow);
17812 %}
17813 
17814 instruct mulF_reg(regF dst, regF src) %{
17815   predicate(UseAVX == 0);
17816   match(Set dst (MulF dst src));
17817 
17818   format %{ "mulss   $dst, $src" %}
17819   ins_cost(150);
17820   ins_encode %{
17821     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17822   %}
17823   ins_pipe(pipe_slow);
17824 %}
17825 
17826 instruct mulF_mem(regF dst, memory src) %{
17827   predicate(UseAVX == 0);
17828   match(Set dst (MulF dst (LoadF src)));
17829 
17830   format %{ "mulss   $dst, $src" %}
17831   ins_cost(150);
17832   ins_encode %{
17833     __ mulss($dst$$XMMRegister, $src$$Address);
17834   %}
17835   ins_pipe(pipe_slow);
17836 %}
17837 
17838 instruct mulF_imm(regF dst, immF con) %{
17839   predicate(UseAVX == 0);
17840   match(Set dst (MulF dst con));
17841   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17842   ins_cost(150);
17843   ins_encode %{
17844     __ mulss($dst$$XMMRegister, $constantaddress($con));
17845   %}
17846   ins_pipe(pipe_slow);
17847 %}
17848 
17849 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17850   predicate(UseAVX > 0);
17851   match(Set dst (MulF src1 src2));
17852 
17853   format %{ "vmulss  $dst, $src1, $src2" %}
17854   ins_cost(150);
17855   ins_encode %{
17856     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17857   %}
17858   ins_pipe(pipe_slow);
17859 %}
17860 
17861 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17862   predicate(UseAVX > 0);
17863   match(Set dst (MulF src1 (LoadF src2)));
17864 
17865   format %{ "vmulss  $dst, $src1, $src2" %}
17866   ins_cost(150);
17867   ins_encode %{
17868     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17869   %}
17870   ins_pipe(pipe_slow);
17871 %}
17872 
17873 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17874   predicate(UseAVX > 0);
17875   match(Set dst (MulF src con));
17876 
17877   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17878   ins_cost(150);
17879   ins_encode %{
17880     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17881   %}
17882   ins_pipe(pipe_slow);
17883 %}
17884 
17885 instruct mulD_reg(regD dst, regD src) %{
17886   predicate(UseAVX == 0);
17887   match(Set dst (MulD dst src));
17888 
17889   format %{ "mulsd   $dst, $src" %}
17890   ins_cost(150);
17891   ins_encode %{
17892     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17893   %}
17894   ins_pipe(pipe_slow);
17895 %}
17896 
17897 instruct mulD_mem(regD dst, memory src) %{
17898   predicate(UseAVX == 0);
17899   match(Set dst (MulD dst (LoadD src)));
17900 
17901   format %{ "mulsd   $dst, $src" %}
17902   ins_cost(150);
17903   ins_encode %{
17904     __ mulsd($dst$$XMMRegister, $src$$Address);
17905   %}
17906   ins_pipe(pipe_slow);
17907 %}
17908 
17909 instruct mulD_imm(regD dst, immD con) %{
17910   predicate(UseAVX == 0);
17911   match(Set dst (MulD dst con));
17912   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17913   ins_cost(150);
17914   ins_encode %{
17915     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17916   %}
17917   ins_pipe(pipe_slow);
17918 %}
17919 
17920 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17921   predicate(UseAVX > 0);
17922   match(Set dst (MulD src1 src2));
17923 
17924   format %{ "vmulsd  $dst, $src1, $src2" %}
17925   ins_cost(150);
17926   ins_encode %{
17927     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17928   %}
17929   ins_pipe(pipe_slow);
17930 %}
17931 
17932 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17933   predicate(UseAVX > 0);
17934   match(Set dst (MulD src1 (LoadD src2)));
17935 
17936   format %{ "vmulsd  $dst, $src1, $src2" %}
17937   ins_cost(150);
17938   ins_encode %{
17939     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17940   %}
17941   ins_pipe(pipe_slow);
17942 %}
17943 
17944 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17945   predicate(UseAVX > 0);
17946   match(Set dst (MulD src con));
17947 
17948   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17949   ins_cost(150);
17950   ins_encode %{
17951     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17952   %}
17953   ins_pipe(pipe_slow);
17954 %}
17955 
17956 instruct divF_reg(regF dst, regF src) %{
17957   predicate(UseAVX == 0);
17958   match(Set dst (DivF dst src));
17959 
17960   format %{ "divss   $dst, $src" %}
17961   ins_cost(150);
17962   ins_encode %{
17963     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17964   %}
17965   ins_pipe(pipe_slow);
17966 %}
17967 
17968 instruct divF_mem(regF dst, memory src) %{
17969   predicate(UseAVX == 0);
17970   match(Set dst (DivF dst (LoadF src)));
17971 
17972   format %{ "divss   $dst, $src" %}
17973   ins_cost(150);
17974   ins_encode %{
17975     __ divss($dst$$XMMRegister, $src$$Address);
17976   %}
17977   ins_pipe(pipe_slow);
17978 %}
17979 
17980 instruct divF_imm(regF dst, immF con) %{
17981   predicate(UseAVX == 0);
17982   match(Set dst (DivF dst con));
17983   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17984   ins_cost(150);
17985   ins_encode %{
17986     __ divss($dst$$XMMRegister, $constantaddress($con));
17987   %}
17988   ins_pipe(pipe_slow);
17989 %}
17990 
17991 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17992   predicate(UseAVX > 0);
17993   match(Set dst (DivF src1 src2));
17994 
17995   format %{ "vdivss  $dst, $src1, $src2" %}
17996   ins_cost(150);
17997   ins_encode %{
17998     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17999   %}
18000   ins_pipe(pipe_slow);
18001 %}
18002 
18003 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18004   predicate(UseAVX > 0);
18005   match(Set dst (DivF src1 (LoadF src2)));
18006 
18007   format %{ "vdivss  $dst, $src1, $src2" %}
18008   ins_cost(150);
18009   ins_encode %{
18010     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18011   %}
18012   ins_pipe(pipe_slow);
18013 %}
18014 
18015 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18016   predicate(UseAVX > 0);
18017   match(Set dst (DivF src con));
18018 
18019   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18020   ins_cost(150);
18021   ins_encode %{
18022     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18023   %}
18024   ins_pipe(pipe_slow);
18025 %}
18026 
18027 instruct divD_reg(regD dst, regD src) %{
18028   predicate(UseAVX == 0);
18029   match(Set dst (DivD dst src));
18030 
18031   format %{ "divsd   $dst, $src" %}
18032   ins_cost(150);
18033   ins_encode %{
18034     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18035   %}
18036   ins_pipe(pipe_slow);
18037 %}
18038 
18039 instruct divD_mem(regD dst, memory src) %{
18040   predicate(UseAVX == 0);
18041   match(Set dst (DivD dst (LoadD src)));
18042 
18043   format %{ "divsd   $dst, $src" %}
18044   ins_cost(150);
18045   ins_encode %{
18046     __ divsd($dst$$XMMRegister, $src$$Address);
18047   %}
18048   ins_pipe(pipe_slow);
18049 %}
18050 
18051 instruct divD_imm(regD dst, immD con) %{
18052   predicate(UseAVX == 0);
18053   match(Set dst (DivD dst con));
18054   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18055   ins_cost(150);
18056   ins_encode %{
18057     __ divsd($dst$$XMMRegister, $constantaddress($con));
18058   %}
18059   ins_pipe(pipe_slow);
18060 %}
18061 
18062 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18063   predicate(UseAVX > 0);
18064   match(Set dst (DivD src1 src2));
18065 
18066   format %{ "vdivsd  $dst, $src1, $src2" %}
18067   ins_cost(150);
18068   ins_encode %{
18069     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18070   %}
18071   ins_pipe(pipe_slow);
18072 %}
18073 
18074 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18075   predicate(UseAVX > 0);
18076   match(Set dst (DivD src1 (LoadD src2)));
18077 
18078   format %{ "vdivsd  $dst, $src1, $src2" %}
18079   ins_cost(150);
18080   ins_encode %{
18081     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18082   %}
18083   ins_pipe(pipe_slow);
18084 %}
18085 
18086 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18087   predicate(UseAVX > 0);
18088   match(Set dst (DivD src con));
18089 
18090   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18091   ins_cost(150);
18092   ins_encode %{
18093     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18094   %}
18095   ins_pipe(pipe_slow);
18096 %}
18097 
18098 instruct absF_reg(regF dst) %{
18099   predicate(UseAVX == 0);
18100   match(Set dst (AbsF dst));
18101   ins_cost(150);
18102   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18103   ins_encode %{
18104     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18105   %}
18106   ins_pipe(pipe_slow);
18107 %}
18108 
18109 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18110   predicate(UseAVX > 0);
18111   match(Set dst (AbsF src));
18112   ins_cost(150);
18113   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18114   ins_encode %{
18115     int vlen_enc = Assembler::AVX_128bit;
18116     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18117               ExternalAddress(float_signmask()), vlen_enc);
18118   %}
18119   ins_pipe(pipe_slow);
18120 %}
18121 
18122 instruct absD_reg(regD dst) %{
18123   predicate(UseAVX == 0);
18124   match(Set dst (AbsD dst));
18125   ins_cost(150);
18126   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18127             "# abs double by sign masking" %}
18128   ins_encode %{
18129     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18130   %}
18131   ins_pipe(pipe_slow);
18132 %}
18133 
18134 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18135   predicate(UseAVX > 0);
18136   match(Set dst (AbsD src));
18137   ins_cost(150);
18138   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18139             "# abs double by sign masking" %}
18140   ins_encode %{
18141     int vlen_enc = Assembler::AVX_128bit;
18142     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18143               ExternalAddress(double_signmask()), vlen_enc);
18144   %}
18145   ins_pipe(pipe_slow);
18146 %}
18147 
18148 instruct negF_reg(regF dst) %{
18149   predicate(UseAVX == 0);
18150   match(Set dst (NegF dst));
18151   ins_cost(150);
18152   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18153   ins_encode %{
18154     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18155   %}
18156   ins_pipe(pipe_slow);
18157 %}
18158 
18159 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18160   predicate(UseAVX > 0);
18161   match(Set dst (NegF src));
18162   ins_cost(150);
18163   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18164   ins_encode %{
18165     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18166                  ExternalAddress(float_signflip()));
18167   %}
18168   ins_pipe(pipe_slow);
18169 %}
18170 
18171 instruct negD_reg(regD dst) %{
18172   predicate(UseAVX == 0);
18173   match(Set dst (NegD dst));
18174   ins_cost(150);
18175   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18176             "# neg double by sign flipping" %}
18177   ins_encode %{
18178     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18179   %}
18180   ins_pipe(pipe_slow);
18181 %}
18182 
18183 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18184   predicate(UseAVX > 0);
18185   match(Set dst (NegD src));
18186   ins_cost(150);
18187   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18188             "# neg double by sign flipping" %}
18189   ins_encode %{
18190     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18191                  ExternalAddress(double_signflip()));
18192   %}
18193   ins_pipe(pipe_slow);
18194 %}
18195 
18196 // sqrtss instruction needs destination register to be pre initialized for best performance
18197 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18198 instruct sqrtF_reg(regF dst) %{
18199   match(Set dst (SqrtF dst));
18200   format %{ "sqrtss  $dst, $dst" %}
18201   ins_encode %{
18202     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18203   %}
18204   ins_pipe(pipe_slow);
18205 %}
18206 
18207 // sqrtsd instruction needs destination register to be pre initialized for best performance
18208 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18209 instruct sqrtD_reg(regD dst) %{
18210   match(Set dst (SqrtD dst));
18211   format %{ "sqrtsd  $dst, $dst" %}
18212   ins_encode %{
18213     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18214   %}
18215   ins_pipe(pipe_slow);
18216 %}
18217 
18218 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18219   effect(TEMP tmp);
18220   match(Set dst (ConvF2HF src));
18221   ins_cost(125);
18222   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18223   ins_encode %{
18224     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18225   %}
18226   ins_pipe( pipe_slow );
18227 %}
18228 
18229 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18230   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18231   effect(TEMP ktmp, TEMP rtmp);
18232   match(Set mem (StoreC mem (ConvF2HF src)));
18233   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18234   ins_encode %{
18235     __ movl($rtmp$$Register, 0x1);
18236     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18237     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18238   %}
18239   ins_pipe( pipe_slow );
18240 %}
18241 
18242 instruct vconvF2HF(vec dst, vec src) %{
18243   match(Set dst (VectorCastF2HF src));
18244   format %{ "vector_conv_F2HF $dst $src" %}
18245   ins_encode %{
18246     int vlen_enc = vector_length_encoding(this, $src);
18247     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18248   %}
18249   ins_pipe( pipe_slow );
18250 %}
18251 
18252 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18253   predicate(n->as_StoreVector()->memory_size() >= 16);
18254   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18255   format %{ "vcvtps2ph $mem,$src" %}
18256   ins_encode %{
18257     int vlen_enc = vector_length_encoding(this, $src);
18258     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18259   %}
18260   ins_pipe( pipe_slow );
18261 %}
18262 
18263 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18264   match(Set dst (ConvHF2F src));
18265   format %{ "vcvtph2ps $dst,$src" %}
18266   ins_encode %{
18267     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18268   %}
18269   ins_pipe( pipe_slow );
18270 %}
18271 
18272 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18273   match(Set dst (VectorCastHF2F (LoadVector mem)));
18274   format %{ "vcvtph2ps $dst,$mem" %}
18275   ins_encode %{
18276     int vlen_enc = vector_length_encoding(this);
18277     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18278   %}
18279   ins_pipe( pipe_slow );
18280 %}
18281 
18282 instruct vconvHF2F(vec dst, vec src) %{
18283   match(Set dst (VectorCastHF2F src));
18284   ins_cost(125);
18285   format %{ "vector_conv_HF2F $dst,$src" %}
18286   ins_encode %{
18287     int vlen_enc = vector_length_encoding(this);
18288     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18289   %}
18290   ins_pipe( pipe_slow );
18291 %}
18292 
18293 // ---------------------------------------- VectorReinterpret ------------------------------------
18294 instruct reinterpret_mask(kReg dst) %{
18295   predicate(n->bottom_type()->isa_vectmask() &&
18296             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18297   match(Set dst (VectorReinterpret dst));
18298   ins_cost(125);
18299   format %{ "vector_reinterpret $dst\t!" %}
18300   ins_encode %{
18301     // empty
18302   %}
18303   ins_pipe( pipe_slow );
18304 %}
18305 
18306 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18307   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18308             n->bottom_type()->isa_vectmask() &&
18309             n->in(1)->bottom_type()->isa_vectmask() &&
18310             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18311             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18312   match(Set dst (VectorReinterpret src));
18313   effect(TEMP xtmp);
18314   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18315   ins_encode %{
18316      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18317      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18318      assert(src_sz == dst_sz , "src and dst size mismatch");
18319      int vlen_enc = vector_length_encoding(src_sz);
18320      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18321      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18322   %}
18323   ins_pipe( pipe_slow );
18324 %}
18325 
18326 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18327   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18328             n->bottom_type()->isa_vectmask() &&
18329             n->in(1)->bottom_type()->isa_vectmask() &&
18330             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18331              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18332             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18333   match(Set dst (VectorReinterpret src));
18334   effect(TEMP xtmp);
18335   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18336   ins_encode %{
18337      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18338      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18339      assert(src_sz == dst_sz , "src and dst size mismatch");
18340      int vlen_enc = vector_length_encoding(src_sz);
18341      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18342      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18343   %}
18344   ins_pipe( pipe_slow );
18345 %}
18346 
18347 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18348   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18349             n->bottom_type()->isa_vectmask() &&
18350             n->in(1)->bottom_type()->isa_vectmask() &&
18351             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18352              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18353             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18354   match(Set dst (VectorReinterpret src));
18355   effect(TEMP xtmp);
18356   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18357   ins_encode %{
18358      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18359      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18360      assert(src_sz == dst_sz , "src and dst size mismatch");
18361      int vlen_enc = vector_length_encoding(src_sz);
18362      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18363      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18364   %}
18365   ins_pipe( pipe_slow );
18366 %}
18367 
18368 instruct reinterpret(vec dst) %{
18369   predicate(!n->bottom_type()->isa_vectmask() &&
18370             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18371   match(Set dst (VectorReinterpret dst));
18372   ins_cost(125);
18373   format %{ "vector_reinterpret $dst\t!" %}
18374   ins_encode %{
18375     // empty
18376   %}
18377   ins_pipe( pipe_slow );
18378 %}
18379 
18380 instruct reinterpret_expand(vec dst, vec src) %{
18381   predicate(UseAVX == 0 &&
18382             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18383   match(Set dst (VectorReinterpret src));
18384   ins_cost(125);
18385   effect(TEMP dst);
18386   format %{ "vector_reinterpret_expand $dst,$src" %}
18387   ins_encode %{
18388     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18389     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18390 
18391     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18392     if (src_vlen_in_bytes == 4) {
18393       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18394     } else {
18395       assert(src_vlen_in_bytes == 8, "");
18396       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18397     }
18398     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18399   %}
18400   ins_pipe( pipe_slow );
18401 %}
18402 
18403 instruct vreinterpret_expand4(legVec dst, vec src) %{
18404   predicate(UseAVX > 0 &&
18405             !n->bottom_type()->isa_vectmask() &&
18406             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18407             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18408   match(Set dst (VectorReinterpret src));
18409   ins_cost(125);
18410   format %{ "vector_reinterpret_expand $dst,$src" %}
18411   ins_encode %{
18412     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18413   %}
18414   ins_pipe( pipe_slow );
18415 %}
18416 
18417 
18418 instruct vreinterpret_expand(legVec dst, vec src) %{
18419   predicate(UseAVX > 0 &&
18420             !n->bottom_type()->isa_vectmask() &&
18421             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18422             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18423   match(Set dst (VectorReinterpret src));
18424   ins_cost(125);
18425   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18426   ins_encode %{
18427     switch (Matcher::vector_length_in_bytes(this, $src)) {
18428       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18429       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18430       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18431       default: ShouldNotReachHere();
18432     }
18433   %}
18434   ins_pipe( pipe_slow );
18435 %}
18436 
18437 instruct reinterpret_shrink(vec dst, legVec src) %{
18438   predicate(!n->bottom_type()->isa_vectmask() &&
18439             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18440   match(Set dst (VectorReinterpret src));
18441   ins_cost(125);
18442   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18443   ins_encode %{
18444     switch (Matcher::vector_length_in_bytes(this)) {
18445       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18446       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18447       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18448       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18449       default: ShouldNotReachHere();
18450     }
18451   %}
18452   ins_pipe( pipe_slow );
18453 %}
18454 
18455 // ----------------------------------------------------------------------------------------------------
18456 
18457 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18458   match(Set dst (RoundDoubleMode src rmode));
18459   format %{ "roundsd $dst,$src" %}
18460   ins_cost(150);
18461   ins_encode %{
18462     assert(UseSSE >= 4, "required");
18463     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18464       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18465     }
18466     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18467   %}
18468   ins_pipe(pipe_slow);
18469 %}
18470 
18471 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18472   match(Set dst (RoundDoubleMode con rmode));
18473   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18474   ins_cost(150);
18475   ins_encode %{
18476     assert(UseSSE >= 4, "required");
18477     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18478   %}
18479   ins_pipe(pipe_slow);
18480 %}
18481 
18482 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18483   predicate(Matcher::vector_length(n) < 8);
18484   match(Set dst (RoundDoubleModeV src rmode));
18485   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18486   ins_encode %{
18487     assert(UseAVX > 0, "required");
18488     int vlen_enc = vector_length_encoding(this);
18489     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18490   %}
18491   ins_pipe( pipe_slow );
18492 %}
18493 
18494 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18495   predicate(Matcher::vector_length(n) == 8);
18496   match(Set dst (RoundDoubleModeV src rmode));
18497   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18498   ins_encode %{
18499     assert(UseAVX > 2, "required");
18500     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18501   %}
18502   ins_pipe( pipe_slow );
18503 %}
18504 
18505 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18506   predicate(Matcher::vector_length(n) < 8);
18507   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18508   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18509   ins_encode %{
18510     assert(UseAVX > 0, "required");
18511     int vlen_enc = vector_length_encoding(this);
18512     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18513   %}
18514   ins_pipe( pipe_slow );
18515 %}
18516 
18517 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18518   predicate(Matcher::vector_length(n) == 8);
18519   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18520   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18521   ins_encode %{
18522     assert(UseAVX > 2, "required");
18523     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18524   %}
18525   ins_pipe( pipe_slow );
18526 %}
18527 
18528 instruct onspinwait() %{
18529   match(OnSpinWait);
18530   ins_cost(200);
18531 
18532   format %{
18533     $$template
18534     $$emit$$"pause\t! membar_onspinwait"
18535   %}
18536   ins_encode %{
18537     __ pause();
18538   %}
18539   ins_pipe(pipe_slow);
18540 %}
18541 
18542 // a * b + c
18543 instruct fmaD_reg(regD a, regD b, regD c) %{
18544   match(Set c (FmaD  c (Binary a b)));
18545   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18546   ins_cost(150);
18547   ins_encode %{
18548     assert(UseFMA, "Needs FMA instructions support.");
18549     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18550   %}
18551   ins_pipe( pipe_slow );
18552 %}
18553 
18554 // a * b + c
18555 instruct fmaF_reg(regF a, regF b, regF c) %{
18556   match(Set c (FmaF  c (Binary a b)));
18557   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18558   ins_cost(150);
18559   ins_encode %{
18560     assert(UseFMA, "Needs FMA instructions support.");
18561     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18562   %}
18563   ins_pipe( pipe_slow );
18564 %}
18565 
18566 // ====================VECTOR INSTRUCTIONS=====================================
18567 
18568 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18569 instruct MoveVec2Leg(legVec dst, vec src) %{
18570   match(Set dst src);
18571   format %{ "" %}
18572   ins_encode %{
18573     ShouldNotReachHere();
18574   %}
18575   ins_pipe( fpu_reg_reg );
18576 %}
18577 
18578 instruct MoveLeg2Vec(vec dst, legVec src) %{
18579   match(Set dst src);
18580   format %{ "" %}
18581   ins_encode %{
18582     ShouldNotReachHere();
18583   %}
18584   ins_pipe( fpu_reg_reg );
18585 %}
18586 
18587 // ============================================================================
18588 
18589 // Load vectors generic operand pattern
18590 instruct loadV(vec dst, memory mem) %{
18591   match(Set dst (LoadVector mem));
18592   ins_cost(125);
18593   format %{ "load_vector $dst,$mem" %}
18594   ins_encode %{
18595     BasicType bt = Matcher::vector_element_basic_type(this);
18596     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18597   %}
18598   ins_pipe( pipe_slow );
18599 %}
18600 
18601 // Store vectors generic operand pattern.
18602 instruct storeV(memory mem, vec src) %{
18603   match(Set mem (StoreVector mem src));
18604   ins_cost(145);
18605   format %{ "store_vector $mem,$src\n\t" %}
18606   ins_encode %{
18607     switch (Matcher::vector_length_in_bytes(this, $src)) {
18608       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18609       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18610       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18611       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18612       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18613       default: ShouldNotReachHere();
18614     }
18615   %}
18616   ins_pipe( pipe_slow );
18617 %}
18618 
18619 // ---------------------------------------- Gather ------------------------------------
18620 
18621 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18622 
18623 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18624   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18625             Matcher::vector_length_in_bytes(n) <= 32);
18626   match(Set dst (LoadVectorGather mem idx));
18627   effect(TEMP dst, TEMP tmp, TEMP mask);
18628   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18629   ins_encode %{
18630     int vlen_enc = vector_length_encoding(this);
18631     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18632     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18633     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18634     __ lea($tmp$$Register, $mem$$Address);
18635     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18636   %}
18637   ins_pipe( pipe_slow );
18638 %}
18639 
18640 
18641 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18642   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18643             !is_subword_type(Matcher::vector_element_basic_type(n)));
18644   match(Set dst (LoadVectorGather mem idx));
18645   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18646   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18647   ins_encode %{
18648     int vlen_enc = vector_length_encoding(this);
18649     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18650     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18651     __ lea($tmp$$Register, $mem$$Address);
18652     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18653   %}
18654   ins_pipe( pipe_slow );
18655 %}
18656 
18657 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18658   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18659             !is_subword_type(Matcher::vector_element_basic_type(n)));
18660   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18661   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18662   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18663   ins_encode %{
18664     assert(UseAVX > 2, "sanity");
18665     int vlen_enc = vector_length_encoding(this);
18666     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18667     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18668     // Note: Since gather instruction partially updates the opmask register used
18669     // for predication hense moving mask operand to a temporary.
18670     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18671     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18672     __ lea($tmp$$Register, $mem$$Address);
18673     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18674   %}
18675   ins_pipe( pipe_slow );
18676 %}
18677 
18678 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18679   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18680   match(Set dst (LoadVectorGather mem idx_base));
18681   effect(TEMP tmp, TEMP rtmp);
18682   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18683   ins_encode %{
18684     int vlen_enc = vector_length_encoding(this);
18685     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18686     __ lea($tmp$$Register, $mem$$Address);
18687     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18688   %}
18689   ins_pipe( pipe_slow );
18690 %}
18691 
18692 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18693                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18694   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18695   match(Set dst (LoadVectorGather mem idx_base));
18696   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18697   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18698   ins_encode %{
18699     int vlen_enc = vector_length_encoding(this);
18700     int vector_len = Matcher::vector_length(this);
18701     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18702     __ lea($tmp$$Register, $mem$$Address);
18703     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18704     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18705                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18706   %}
18707   ins_pipe( pipe_slow );
18708 %}
18709 
18710 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18711   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18712   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18713   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18714   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18715   ins_encode %{
18716     int vlen_enc = vector_length_encoding(this);
18717     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18718     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18719     __ lea($tmp$$Register, $mem$$Address);
18720     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18721     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18722   %}
18723   ins_pipe( pipe_slow );
18724 %}
18725 
18726 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18727                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18728   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18729   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18730   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18731   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18732   ins_encode %{
18733     int vlen_enc = vector_length_encoding(this);
18734     int vector_len = Matcher::vector_length(this);
18735     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18736     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18737     __ lea($tmp$$Register, $mem$$Address);
18738     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18739     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18740     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18741                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18742   %}
18743   ins_pipe( pipe_slow );
18744 %}
18745 
18746 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18747   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18748   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18749   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18750   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18751   ins_encode %{
18752     int vlen_enc = vector_length_encoding(this);
18753     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18754     __ lea($tmp$$Register, $mem$$Address);
18755     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18756     if (elem_bt == T_SHORT) {
18757       __ movl($mask_idx$$Register, 0x55555555);
18758       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18759     }
18760     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18761     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18762   %}
18763   ins_pipe( pipe_slow );
18764 %}
18765 
18766 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18767                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18768   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18769   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18770   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18771   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18772   ins_encode %{
18773     int vlen_enc = vector_length_encoding(this);
18774     int vector_len = Matcher::vector_length(this);
18775     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18776     __ lea($tmp$$Register, $mem$$Address);
18777     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18778     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18779     if (elem_bt == T_SHORT) {
18780       __ movl($mask_idx$$Register, 0x55555555);
18781       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18782     }
18783     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18784     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18785                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18786   %}
18787   ins_pipe( pipe_slow );
18788 %}
18789 
18790 // ====================Scatter=======================================
18791 
18792 // Scatter INT, LONG, FLOAT, DOUBLE
18793 
18794 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18795   predicate(UseAVX > 2);
18796   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18797   effect(TEMP tmp, TEMP ktmp);
18798   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18799   ins_encode %{
18800     int vlen_enc = vector_length_encoding(this, $src);
18801     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18802 
18803     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18804     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18805 
18806     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18807     __ lea($tmp$$Register, $mem$$Address);
18808     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18809   %}
18810   ins_pipe( pipe_slow );
18811 %}
18812 
18813 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18814   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18815   effect(TEMP tmp, TEMP ktmp);
18816   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18817   ins_encode %{
18818     int vlen_enc = vector_length_encoding(this, $src);
18819     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18820     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18821     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18822     // Note: Since scatter instruction partially updates the opmask register used
18823     // for predication hense moving mask operand to a temporary.
18824     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18825     __ lea($tmp$$Register, $mem$$Address);
18826     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18827   %}
18828   ins_pipe( pipe_slow );
18829 %}
18830 
18831 // ====================REPLICATE=======================================
18832 
18833 // Replicate byte scalar to be vector
18834 instruct vReplB_reg(vec dst, rRegI src) %{
18835   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18836   match(Set dst (Replicate src));
18837   format %{ "replicateB $dst,$src" %}
18838   ins_encode %{
18839     uint vlen = Matcher::vector_length(this);
18840     if (UseAVX >= 2) {
18841       int vlen_enc = vector_length_encoding(this);
18842       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18843         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18844         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18845       } else {
18846         __ movdl($dst$$XMMRegister, $src$$Register);
18847         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848       }
18849     } else {
18850        assert(UseAVX < 2, "");
18851       __ movdl($dst$$XMMRegister, $src$$Register);
18852       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18853       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18854       if (vlen >= 16) {
18855         assert(vlen == 16, "");
18856         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18857       }
18858     }
18859   %}
18860   ins_pipe( pipe_slow );
18861 %}
18862 
18863 instruct ReplB_mem(vec dst, memory mem) %{
18864   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18865   match(Set dst (Replicate (LoadB mem)));
18866   format %{ "replicateB $dst,$mem" %}
18867   ins_encode %{
18868     int vlen_enc = vector_length_encoding(this);
18869     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18870   %}
18871   ins_pipe( pipe_slow );
18872 %}
18873 
18874 // ====================ReplicateS=======================================
18875 
18876 instruct vReplS_reg(vec dst, rRegI src) %{
18877   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18878   match(Set dst (Replicate src));
18879   format %{ "replicateS $dst,$src" %}
18880   ins_encode %{
18881     uint vlen = Matcher::vector_length(this);
18882     int vlen_enc = vector_length_encoding(this);
18883     if (UseAVX >= 2) {
18884       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18885         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18886         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18887       } else {
18888         __ movdl($dst$$XMMRegister, $src$$Register);
18889         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18890       }
18891     } else {
18892       assert(UseAVX < 2, "");
18893       __ movdl($dst$$XMMRegister, $src$$Register);
18894       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18895       if (vlen >= 8) {
18896         assert(vlen == 8, "");
18897         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18898       }
18899     }
18900   %}
18901   ins_pipe( pipe_slow );
18902 %}
18903 
18904 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18905   match(Set dst (Replicate con));
18906   effect(TEMP rtmp);
18907   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18908   ins_encode %{
18909     int vlen_enc = vector_length_encoding(this);
18910     BasicType bt = Matcher::vector_element_basic_type(this);
18911     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18912     __ movl($rtmp$$Register, $con$$constant);
18913     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18914   %}
18915   ins_pipe( pipe_slow );
18916 %}
18917 
18918 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18919   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18920   match(Set dst (Replicate src));
18921   effect(TEMP rtmp);
18922   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18923   ins_encode %{
18924     int vlen_enc = vector_length_encoding(this);
18925     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18926     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18927   %}
18928   ins_pipe( pipe_slow );
18929 %}
18930 
18931 instruct ReplS_mem(vec dst, memory mem) %{
18932   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18933   match(Set dst (Replicate (LoadS mem)));
18934   format %{ "replicateS $dst,$mem" %}
18935   ins_encode %{
18936     int vlen_enc = vector_length_encoding(this);
18937     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18938   %}
18939   ins_pipe( pipe_slow );
18940 %}
18941 
18942 // ====================ReplicateI=======================================
18943 
18944 instruct ReplI_reg(vec dst, rRegI src) %{
18945   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18946   match(Set dst (Replicate src));
18947   format %{ "replicateI $dst,$src" %}
18948   ins_encode %{
18949     uint vlen = Matcher::vector_length(this);
18950     int vlen_enc = vector_length_encoding(this);
18951     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18952       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18953     } else if (VM_Version::supports_avx2()) {
18954       __ movdl($dst$$XMMRegister, $src$$Register);
18955       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18956     } else {
18957       __ movdl($dst$$XMMRegister, $src$$Register);
18958       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18959     }
18960   %}
18961   ins_pipe( pipe_slow );
18962 %}
18963 
18964 instruct ReplI_mem(vec dst, memory mem) %{
18965   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18966   match(Set dst (Replicate (LoadI mem)));
18967   format %{ "replicateI $dst,$mem" %}
18968   ins_encode %{
18969     int vlen_enc = vector_length_encoding(this);
18970     if (VM_Version::supports_avx2()) {
18971       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18972     } else if (VM_Version::supports_avx()) {
18973       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18974     } else {
18975       __ movdl($dst$$XMMRegister, $mem$$Address);
18976       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18977     }
18978   %}
18979   ins_pipe( pipe_slow );
18980 %}
18981 
18982 instruct ReplI_imm(vec dst, immI con) %{
18983   predicate(Matcher::is_non_long_integral_vector(n));
18984   match(Set dst (Replicate con));
18985   format %{ "replicateI $dst,$con" %}
18986   ins_encode %{
18987     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18988                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18989                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18990     BasicType bt = Matcher::vector_element_basic_type(this);
18991     int vlen = Matcher::vector_length_in_bytes(this);
18992     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18993   %}
18994   ins_pipe( pipe_slow );
18995 %}
18996 
18997 // Replicate scalar zero to be vector
18998 instruct ReplI_zero(vec dst, immI_0 zero) %{
18999   predicate(Matcher::is_non_long_integral_vector(n));
19000   match(Set dst (Replicate zero));
19001   format %{ "replicateI $dst,$zero" %}
19002   ins_encode %{
19003     int vlen_enc = vector_length_encoding(this);
19004     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19005       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19006     } else {
19007       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19008     }
19009   %}
19010   ins_pipe( fpu_reg_reg );
19011 %}
19012 
19013 instruct ReplI_M1(vec dst, immI_M1 con) %{
19014   predicate(Matcher::is_non_long_integral_vector(n));
19015   match(Set dst (Replicate con));
19016   format %{ "vallones $dst" %}
19017   ins_encode %{
19018     int vector_len = vector_length_encoding(this);
19019     __ vallones($dst$$XMMRegister, vector_len);
19020   %}
19021   ins_pipe( pipe_slow );
19022 %}
19023 
19024 // ====================ReplicateL=======================================
19025 
19026 // Replicate long (8 byte) scalar to be vector
19027 instruct ReplL_reg(vec dst, rRegL src) %{
19028   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19029   match(Set dst (Replicate src));
19030   format %{ "replicateL $dst,$src" %}
19031   ins_encode %{
19032     int vlen = Matcher::vector_length(this);
19033     int vlen_enc = vector_length_encoding(this);
19034     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19035       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19036     } else if (VM_Version::supports_avx2()) {
19037       __ movdq($dst$$XMMRegister, $src$$Register);
19038       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19039     } else {
19040       __ movdq($dst$$XMMRegister, $src$$Register);
19041       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19042     }
19043   %}
19044   ins_pipe( pipe_slow );
19045 %}
19046 
19047 instruct ReplL_mem(vec dst, memory mem) %{
19048   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19049   match(Set dst (Replicate (LoadL mem)));
19050   format %{ "replicateL $dst,$mem" %}
19051   ins_encode %{
19052     int vlen_enc = vector_length_encoding(this);
19053     if (VM_Version::supports_avx2()) {
19054       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19055     } else if (VM_Version::supports_sse3()) {
19056       __ movddup($dst$$XMMRegister, $mem$$Address);
19057     } else {
19058       __ movq($dst$$XMMRegister, $mem$$Address);
19059       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19060     }
19061   %}
19062   ins_pipe( pipe_slow );
19063 %}
19064 
19065 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19066 instruct ReplL_imm(vec dst, immL con) %{
19067   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19068   match(Set dst (Replicate con));
19069   format %{ "replicateL $dst,$con" %}
19070   ins_encode %{
19071     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19072     int vlen = Matcher::vector_length_in_bytes(this);
19073     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19074   %}
19075   ins_pipe( pipe_slow );
19076 %}
19077 
19078 instruct ReplL_zero(vec dst, immL0 zero) %{
19079   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19080   match(Set dst (Replicate zero));
19081   format %{ "replicateL $dst,$zero" %}
19082   ins_encode %{
19083     int vlen_enc = vector_length_encoding(this);
19084     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19085       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19086     } else {
19087       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19088     }
19089   %}
19090   ins_pipe( fpu_reg_reg );
19091 %}
19092 
19093 instruct ReplL_M1(vec dst, immL_M1 con) %{
19094   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19095   match(Set dst (Replicate con));
19096   format %{ "vallones $dst" %}
19097   ins_encode %{
19098     int vector_len = vector_length_encoding(this);
19099     __ vallones($dst$$XMMRegister, vector_len);
19100   %}
19101   ins_pipe( pipe_slow );
19102 %}
19103 
19104 // ====================ReplicateF=======================================
19105 
19106 instruct vReplF_reg(vec dst, vlRegF src) %{
19107   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19108   match(Set dst (Replicate src));
19109   format %{ "replicateF $dst,$src" %}
19110   ins_encode %{
19111     uint vlen = Matcher::vector_length(this);
19112     int vlen_enc = vector_length_encoding(this);
19113     if (vlen <= 4) {
19114       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19115     } else if (VM_Version::supports_avx2()) {
19116       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19117     } else {
19118       assert(vlen == 8, "sanity");
19119       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19120       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19121     }
19122   %}
19123   ins_pipe( pipe_slow );
19124 %}
19125 
19126 instruct ReplF_reg(vec dst, vlRegF src) %{
19127   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19128   match(Set dst (Replicate src));
19129   format %{ "replicateF $dst,$src" %}
19130   ins_encode %{
19131     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19132   %}
19133   ins_pipe( pipe_slow );
19134 %}
19135 
19136 instruct ReplF_mem(vec dst, memory mem) %{
19137   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19138   match(Set dst (Replicate (LoadF mem)));
19139   format %{ "replicateF $dst,$mem" %}
19140   ins_encode %{
19141     int vlen_enc = vector_length_encoding(this);
19142     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19143   %}
19144   ins_pipe( pipe_slow );
19145 %}
19146 
19147 // Replicate float scalar immediate to be vector by loading from const table.
19148 instruct ReplF_imm(vec dst, immF con) %{
19149   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19150   match(Set dst (Replicate con));
19151   format %{ "replicateF $dst,$con" %}
19152   ins_encode %{
19153     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19154                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19155     int vlen = Matcher::vector_length_in_bytes(this);
19156     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19157   %}
19158   ins_pipe( pipe_slow );
19159 %}
19160 
19161 instruct ReplF_zero(vec dst, immF0 zero) %{
19162   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19163   match(Set dst (Replicate zero));
19164   format %{ "replicateF $dst,$zero" %}
19165   ins_encode %{
19166     int vlen_enc = vector_length_encoding(this);
19167     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19168       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19169     } else {
19170       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19171     }
19172   %}
19173   ins_pipe( fpu_reg_reg );
19174 %}
19175 
19176 // ====================ReplicateD=======================================
19177 
19178 // Replicate double (8 bytes) scalar to be vector
19179 instruct vReplD_reg(vec dst, vlRegD src) %{
19180   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19181   match(Set dst (Replicate src));
19182   format %{ "replicateD $dst,$src" %}
19183   ins_encode %{
19184     uint vlen = Matcher::vector_length(this);
19185     int vlen_enc = vector_length_encoding(this);
19186     if (vlen <= 2) {
19187       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19188     } else if (VM_Version::supports_avx2()) {
19189       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19190     } else {
19191       assert(vlen == 4, "sanity");
19192       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19193       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19194     }
19195   %}
19196   ins_pipe( pipe_slow );
19197 %}
19198 
19199 instruct ReplD_reg(vec dst, vlRegD src) %{
19200   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19201   match(Set dst (Replicate src));
19202   format %{ "replicateD $dst,$src" %}
19203   ins_encode %{
19204     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19205   %}
19206   ins_pipe( pipe_slow );
19207 %}
19208 
19209 instruct ReplD_mem(vec dst, memory mem) %{
19210   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19211   match(Set dst (Replicate (LoadD mem)));
19212   format %{ "replicateD $dst,$mem" %}
19213   ins_encode %{
19214     if (Matcher::vector_length(this) >= 4) {
19215       int vlen_enc = vector_length_encoding(this);
19216       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19217     } else {
19218       __ movddup($dst$$XMMRegister, $mem$$Address);
19219     }
19220   %}
19221   ins_pipe( pipe_slow );
19222 %}
19223 
19224 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19225 instruct ReplD_imm(vec dst, immD con) %{
19226   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19227   match(Set dst (Replicate con));
19228   format %{ "replicateD $dst,$con" %}
19229   ins_encode %{
19230     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19231     int vlen = Matcher::vector_length_in_bytes(this);
19232     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19233   %}
19234   ins_pipe( pipe_slow );
19235 %}
19236 
19237 instruct ReplD_zero(vec dst, immD0 zero) %{
19238   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19239   match(Set dst (Replicate zero));
19240   format %{ "replicateD $dst,$zero" %}
19241   ins_encode %{
19242     int vlen_enc = vector_length_encoding(this);
19243     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19244       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19245     } else {
19246       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19247     }
19248   %}
19249   ins_pipe( fpu_reg_reg );
19250 %}
19251 
19252 // ====================VECTOR INSERT=======================================
19253 
19254 instruct insert(vec dst, rRegI val, immU8 idx) %{
19255   predicate(Matcher::vector_length_in_bytes(n) < 32);
19256   match(Set dst (VectorInsert (Binary dst val) idx));
19257   format %{ "vector_insert $dst,$val,$idx" %}
19258   ins_encode %{
19259     assert(UseSSE >= 4, "required");
19260     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19261 
19262     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19263 
19264     assert(is_integral_type(elem_bt), "");
19265     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19266 
19267     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19268   %}
19269   ins_pipe( pipe_slow );
19270 %}
19271 
19272 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19273   predicate(Matcher::vector_length_in_bytes(n) == 32);
19274   match(Set dst (VectorInsert (Binary src val) idx));
19275   effect(TEMP vtmp);
19276   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19277   ins_encode %{
19278     int vlen_enc = Assembler::AVX_256bit;
19279     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19280     int elem_per_lane = 16/type2aelembytes(elem_bt);
19281     int log2epr = log2(elem_per_lane);
19282 
19283     assert(is_integral_type(elem_bt), "sanity");
19284     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19285 
19286     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19287     uint y_idx = ($idx$$constant >> log2epr) & 1;
19288     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19289     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19290     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19291   %}
19292   ins_pipe( pipe_slow );
19293 %}
19294 
19295 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19296   predicate(Matcher::vector_length_in_bytes(n) == 64);
19297   match(Set dst (VectorInsert (Binary src val) idx));
19298   effect(TEMP vtmp);
19299   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19300   ins_encode %{
19301     assert(UseAVX > 2, "sanity");
19302 
19303     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19304     int elem_per_lane = 16/type2aelembytes(elem_bt);
19305     int log2epr = log2(elem_per_lane);
19306 
19307     assert(is_integral_type(elem_bt), "");
19308     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19309 
19310     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19311     uint y_idx = ($idx$$constant >> log2epr) & 3;
19312     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19313     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19314     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19315   %}
19316   ins_pipe( pipe_slow );
19317 %}
19318 
19319 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19320   predicate(Matcher::vector_length(n) == 2);
19321   match(Set dst (VectorInsert (Binary dst val) idx));
19322   format %{ "vector_insert $dst,$val,$idx" %}
19323   ins_encode %{
19324     assert(UseSSE >= 4, "required");
19325     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19326     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19327 
19328     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19329   %}
19330   ins_pipe( pipe_slow );
19331 %}
19332 
19333 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19334   predicate(Matcher::vector_length(n) == 4);
19335   match(Set dst (VectorInsert (Binary src val) idx));
19336   effect(TEMP vtmp);
19337   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19338   ins_encode %{
19339     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19340     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19341 
19342     uint x_idx = $idx$$constant & right_n_bits(1);
19343     uint y_idx = ($idx$$constant >> 1) & 1;
19344     int vlen_enc = Assembler::AVX_256bit;
19345     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19346     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19347     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19348   %}
19349   ins_pipe( pipe_slow );
19350 %}
19351 
19352 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19353   predicate(Matcher::vector_length(n) == 8);
19354   match(Set dst (VectorInsert (Binary src val) idx));
19355   effect(TEMP vtmp);
19356   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19357   ins_encode %{
19358     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19359     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19360 
19361     uint x_idx = $idx$$constant & right_n_bits(1);
19362     uint y_idx = ($idx$$constant >> 1) & 3;
19363     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19364     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19365     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19366   %}
19367   ins_pipe( pipe_slow );
19368 %}
19369 
19370 instruct insertF(vec dst, regF val, immU8 idx) %{
19371   predicate(Matcher::vector_length(n) < 8);
19372   match(Set dst (VectorInsert (Binary dst val) idx));
19373   format %{ "vector_insert $dst,$val,$idx" %}
19374   ins_encode %{
19375     assert(UseSSE >= 4, "sanity");
19376 
19377     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19378     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19379 
19380     uint x_idx = $idx$$constant & right_n_bits(2);
19381     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19382   %}
19383   ins_pipe( pipe_slow );
19384 %}
19385 
19386 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19387   predicate(Matcher::vector_length(n) >= 8);
19388   match(Set dst (VectorInsert (Binary src val) idx));
19389   effect(TEMP vtmp);
19390   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19391   ins_encode %{
19392     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19393     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19394 
19395     int vlen = Matcher::vector_length(this);
19396     uint x_idx = $idx$$constant & right_n_bits(2);
19397     if (vlen == 8) {
19398       uint y_idx = ($idx$$constant >> 2) & 1;
19399       int vlen_enc = Assembler::AVX_256bit;
19400       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19401       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19402       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19403     } else {
19404       assert(vlen == 16, "sanity");
19405       uint y_idx = ($idx$$constant >> 2) & 3;
19406       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19407       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19408       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19409     }
19410   %}
19411   ins_pipe( pipe_slow );
19412 %}
19413 
19414 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19415   predicate(Matcher::vector_length(n) == 2);
19416   match(Set dst (VectorInsert (Binary dst val) idx));
19417   effect(TEMP tmp);
19418   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19419   ins_encode %{
19420     assert(UseSSE >= 4, "sanity");
19421     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19422     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19423 
19424     __ movq($tmp$$Register, $val$$XMMRegister);
19425     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19426   %}
19427   ins_pipe( pipe_slow );
19428 %}
19429 
19430 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19431   predicate(Matcher::vector_length(n) == 4);
19432   match(Set dst (VectorInsert (Binary src val) idx));
19433   effect(TEMP vtmp, TEMP tmp);
19434   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19435   ins_encode %{
19436     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19437     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19438 
19439     uint x_idx = $idx$$constant & right_n_bits(1);
19440     uint y_idx = ($idx$$constant >> 1) & 1;
19441     int vlen_enc = Assembler::AVX_256bit;
19442     __ movq($tmp$$Register, $val$$XMMRegister);
19443     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19444     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19445     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19446   %}
19447   ins_pipe( pipe_slow );
19448 %}
19449 
19450 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19451   predicate(Matcher::vector_length(n) == 8);
19452   match(Set dst (VectorInsert (Binary src val) idx));
19453   effect(TEMP tmp, TEMP vtmp);
19454   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19455   ins_encode %{
19456     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19457     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19458 
19459     uint x_idx = $idx$$constant & right_n_bits(1);
19460     uint y_idx = ($idx$$constant >> 1) & 3;
19461     __ movq($tmp$$Register, $val$$XMMRegister);
19462     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19463     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19464     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19465   %}
19466   ins_pipe( pipe_slow );
19467 %}
19468 
19469 // ====================REDUCTION ARITHMETIC=======================================
19470 
19471 // =======================Int Reduction==========================================
19472 
19473 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19474   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19475   match(Set dst (AddReductionVI src1 src2));
19476   match(Set dst (MulReductionVI src1 src2));
19477   match(Set dst (AndReductionV  src1 src2));
19478   match(Set dst ( OrReductionV  src1 src2));
19479   match(Set dst (XorReductionV  src1 src2));
19480   match(Set dst (MinReductionV  src1 src2));
19481   match(Set dst (MaxReductionV  src1 src2));
19482   effect(TEMP vtmp1, TEMP vtmp2);
19483   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19484   ins_encode %{
19485     int opcode = this->ideal_Opcode();
19486     int vlen = Matcher::vector_length(this, $src2);
19487     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19488   %}
19489   ins_pipe( pipe_slow );
19490 %}
19491 
19492 // =======================Long Reduction==========================================
19493 
19494 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19496   match(Set dst (AddReductionVL src1 src2));
19497   match(Set dst (MulReductionVL src1 src2));
19498   match(Set dst (AndReductionV  src1 src2));
19499   match(Set dst ( OrReductionV  src1 src2));
19500   match(Set dst (XorReductionV  src1 src2));
19501   match(Set dst (MinReductionV  src1 src2));
19502   match(Set dst (MaxReductionV  src1 src2));
19503   effect(TEMP vtmp1, TEMP vtmp2);
19504   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19505   ins_encode %{
19506     int opcode = this->ideal_Opcode();
19507     int vlen = Matcher::vector_length(this, $src2);
19508     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19509   %}
19510   ins_pipe( pipe_slow );
19511 %}
19512 
19513 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19514   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19515   match(Set dst (AddReductionVL src1 src2));
19516   match(Set dst (MulReductionVL src1 src2));
19517   match(Set dst (AndReductionV  src1 src2));
19518   match(Set dst ( OrReductionV  src1 src2));
19519   match(Set dst (XorReductionV  src1 src2));
19520   match(Set dst (MinReductionV  src1 src2));
19521   match(Set dst (MaxReductionV  src1 src2));
19522   effect(TEMP vtmp1, TEMP vtmp2);
19523   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19524   ins_encode %{
19525     int opcode = this->ideal_Opcode();
19526     int vlen = Matcher::vector_length(this, $src2);
19527     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19528   %}
19529   ins_pipe( pipe_slow );
19530 %}
19531 
19532 // =======================Float Reduction==========================================
19533 
19534 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19535   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19536   match(Set dst (AddReductionVF dst src));
19537   match(Set dst (MulReductionVF dst src));
19538   effect(TEMP dst, TEMP vtmp);
19539   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19540   ins_encode %{
19541     int opcode = this->ideal_Opcode();
19542     int vlen = Matcher::vector_length(this, $src);
19543     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19544   %}
19545   ins_pipe( pipe_slow );
19546 %}
19547 
19548 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19549   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19550   match(Set dst (AddReductionVF dst src));
19551   match(Set dst (MulReductionVF dst src));
19552   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19553   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19554   ins_encode %{
19555     int opcode = this->ideal_Opcode();
19556     int vlen = Matcher::vector_length(this, $src);
19557     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19558   %}
19559   ins_pipe( pipe_slow );
19560 %}
19561 
19562 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19563   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19564   match(Set dst (AddReductionVF dst src));
19565   match(Set dst (MulReductionVF dst src));
19566   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19567   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19568   ins_encode %{
19569     int opcode = this->ideal_Opcode();
19570     int vlen = Matcher::vector_length(this, $src);
19571     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572   %}
19573   ins_pipe( pipe_slow );
19574 %}
19575 
19576 
19577 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19578   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19579   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19580   // src1 contains reduction identity
19581   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19582   match(Set dst (AddReductionVF src1 src2));
19583   match(Set dst (MulReductionVF src1 src2));
19584   effect(TEMP dst);
19585   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19586   ins_encode %{
19587     int opcode = this->ideal_Opcode();
19588     int vlen = Matcher::vector_length(this, $src2);
19589     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19590   %}
19591   ins_pipe( pipe_slow );
19592 %}
19593 
19594 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19595   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19596   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19597   // src1 contains reduction identity
19598   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19599   match(Set dst (AddReductionVF src1 src2));
19600   match(Set dst (MulReductionVF src1 src2));
19601   effect(TEMP dst, TEMP vtmp);
19602   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19603   ins_encode %{
19604     int opcode = this->ideal_Opcode();
19605     int vlen = Matcher::vector_length(this, $src2);
19606     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19607   %}
19608   ins_pipe( pipe_slow );
19609 %}
19610 
19611 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19612   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19613   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19614   // src1 contains reduction identity
19615   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19616   match(Set dst (AddReductionVF src1 src2));
19617   match(Set dst (MulReductionVF src1 src2));
19618   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19619   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19620   ins_encode %{
19621     int opcode = this->ideal_Opcode();
19622     int vlen = Matcher::vector_length(this, $src2);
19623     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19624   %}
19625   ins_pipe( pipe_slow );
19626 %}
19627 
19628 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19629   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19630   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19631   // src1 contains reduction identity
19632   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19633   match(Set dst (AddReductionVF src1 src2));
19634   match(Set dst (MulReductionVF src1 src2));
19635   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19636   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19637   ins_encode %{
19638     int opcode = this->ideal_Opcode();
19639     int vlen = Matcher::vector_length(this, $src2);
19640     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19641   %}
19642   ins_pipe( pipe_slow );
19643 %}
19644 
19645 // =======================Double Reduction==========================================
19646 
19647 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19648   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19649   match(Set dst (AddReductionVD dst src));
19650   match(Set dst (MulReductionVD dst src));
19651   effect(TEMP dst, TEMP vtmp);
19652   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19653   ins_encode %{
19654     int opcode = this->ideal_Opcode();
19655     int vlen = Matcher::vector_length(this, $src);
19656     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19657 %}
19658   ins_pipe( pipe_slow );
19659 %}
19660 
19661 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19662   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19663   match(Set dst (AddReductionVD dst src));
19664   match(Set dst (MulReductionVD dst src));
19665   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19666   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19667   ins_encode %{
19668     int opcode = this->ideal_Opcode();
19669     int vlen = Matcher::vector_length(this, $src);
19670     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19671   %}
19672   ins_pipe( pipe_slow );
19673 %}
19674 
19675 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19676   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19677   match(Set dst (AddReductionVD dst src));
19678   match(Set dst (MulReductionVD dst src));
19679   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19680   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19681   ins_encode %{
19682     int opcode = this->ideal_Opcode();
19683     int vlen = Matcher::vector_length(this, $src);
19684     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19685   %}
19686   ins_pipe( pipe_slow );
19687 %}
19688 
19689 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19690   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19691   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19692   // src1 contains reduction identity
19693   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19694   match(Set dst (AddReductionVD src1 src2));
19695   match(Set dst (MulReductionVD src1 src2));
19696   effect(TEMP dst);
19697   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19698   ins_encode %{
19699     int opcode = this->ideal_Opcode();
19700     int vlen = Matcher::vector_length(this, $src2);
19701     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19702 %}
19703   ins_pipe( pipe_slow );
19704 %}
19705 
19706 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19707   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19708   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19709   // src1 contains reduction identity
19710   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19711   match(Set dst (AddReductionVD src1 src2));
19712   match(Set dst (MulReductionVD src1 src2));
19713   effect(TEMP dst, TEMP vtmp);
19714   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19715   ins_encode %{
19716     int opcode = this->ideal_Opcode();
19717     int vlen = Matcher::vector_length(this, $src2);
19718     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19719   %}
19720   ins_pipe( pipe_slow );
19721 %}
19722 
19723 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19724   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19725   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19726   // src1 contains reduction identity
19727   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19728   match(Set dst (AddReductionVD src1 src2));
19729   match(Set dst (MulReductionVD src1 src2));
19730   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19731   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19732   ins_encode %{
19733     int opcode = this->ideal_Opcode();
19734     int vlen = Matcher::vector_length(this, $src2);
19735     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19736   %}
19737   ins_pipe( pipe_slow );
19738 %}
19739 
19740 // =======================Byte Reduction==========================================
19741 
19742 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19743   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19744   match(Set dst (AddReductionVI src1 src2));
19745   match(Set dst (AndReductionV  src1 src2));
19746   match(Set dst ( OrReductionV  src1 src2));
19747   match(Set dst (XorReductionV  src1 src2));
19748   match(Set dst (MinReductionV  src1 src2));
19749   match(Set dst (MaxReductionV  src1 src2));
19750   effect(TEMP vtmp1, TEMP vtmp2);
19751   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19752   ins_encode %{
19753     int opcode = this->ideal_Opcode();
19754     int vlen = Matcher::vector_length(this, $src2);
19755     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19756   %}
19757   ins_pipe( pipe_slow );
19758 %}
19759 
19760 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19761   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19762   match(Set dst (AddReductionVI src1 src2));
19763   match(Set dst (AndReductionV  src1 src2));
19764   match(Set dst ( OrReductionV  src1 src2));
19765   match(Set dst (XorReductionV  src1 src2));
19766   match(Set dst (MinReductionV  src1 src2));
19767   match(Set dst (MaxReductionV  src1 src2));
19768   effect(TEMP vtmp1, TEMP vtmp2);
19769   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19770   ins_encode %{
19771     int opcode = this->ideal_Opcode();
19772     int vlen = Matcher::vector_length(this, $src2);
19773     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19774   %}
19775   ins_pipe( pipe_slow );
19776 %}
19777 
19778 // =======================Short Reduction==========================================
19779 
19780 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19781   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19782   match(Set dst (AddReductionVI src1 src2));
19783   match(Set dst (MulReductionVI src1 src2));
19784   match(Set dst (AndReductionV  src1 src2));
19785   match(Set dst ( OrReductionV  src1 src2));
19786   match(Set dst (XorReductionV  src1 src2));
19787   match(Set dst (MinReductionV  src1 src2));
19788   match(Set dst (MaxReductionV  src1 src2));
19789   effect(TEMP vtmp1, TEMP vtmp2);
19790   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19791   ins_encode %{
19792     int opcode = this->ideal_Opcode();
19793     int vlen = Matcher::vector_length(this, $src2);
19794     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19795   %}
19796   ins_pipe( pipe_slow );
19797 %}
19798 
19799 // =======================Mul Reduction==========================================
19800 
19801 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19802   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19803             Matcher::vector_length(n->in(2)) <= 32); // src2
19804   match(Set dst (MulReductionVI src1 src2));
19805   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19806   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19807   ins_encode %{
19808     int opcode = this->ideal_Opcode();
19809     int vlen = Matcher::vector_length(this, $src2);
19810     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19811   %}
19812   ins_pipe( pipe_slow );
19813 %}
19814 
19815 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19816   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19817             Matcher::vector_length(n->in(2)) == 64); // src2
19818   match(Set dst (MulReductionVI src1 src2));
19819   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19820   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19821   ins_encode %{
19822     int opcode = this->ideal_Opcode();
19823     int vlen = Matcher::vector_length(this, $src2);
19824     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19825   %}
19826   ins_pipe( pipe_slow );
19827 %}
19828 
19829 //--------------------Min/Max Float Reduction --------------------
19830 // Float Min Reduction
19831 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19832                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19833   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19834             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19835              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19836             Matcher::vector_length(n->in(2)) == 2);
19837   match(Set dst (MinReductionV src1 src2));
19838   match(Set dst (MaxReductionV src1 src2));
19839   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19840   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19841   ins_encode %{
19842     assert(UseAVX > 0, "sanity");
19843 
19844     int opcode = this->ideal_Opcode();
19845     int vlen = Matcher::vector_length(this, $src2);
19846     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19847                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19848   %}
19849   ins_pipe( pipe_slow );
19850 %}
19851 
19852 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19853                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19854   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19855             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19856              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19857             Matcher::vector_length(n->in(2)) >= 4);
19858   match(Set dst (MinReductionV src1 src2));
19859   match(Set dst (MaxReductionV src1 src2));
19860   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19861   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19862   ins_encode %{
19863     assert(UseAVX > 0, "sanity");
19864 
19865     int opcode = this->ideal_Opcode();
19866     int vlen = Matcher::vector_length(this, $src2);
19867     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19868                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19869   %}
19870   ins_pipe( pipe_slow );
19871 %}
19872 
19873 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19874                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19875   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19876             Matcher::vector_length(n->in(2)) == 2);
19877   match(Set dst (MinReductionV dst src));
19878   match(Set dst (MaxReductionV dst src));
19879   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19880   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19881   ins_encode %{
19882     assert(UseAVX > 0, "sanity");
19883 
19884     int opcode = this->ideal_Opcode();
19885     int vlen = Matcher::vector_length(this, $src);
19886     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19887                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19888   %}
19889   ins_pipe( pipe_slow );
19890 %}
19891 
19892 
19893 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19894                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19895   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19896             Matcher::vector_length(n->in(2)) >= 4);
19897   match(Set dst (MinReductionV dst src));
19898   match(Set dst (MaxReductionV dst src));
19899   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19900   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19901   ins_encode %{
19902     assert(UseAVX > 0, "sanity");
19903 
19904     int opcode = this->ideal_Opcode();
19905     int vlen = Matcher::vector_length(this, $src);
19906     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19907                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19908   %}
19909   ins_pipe( pipe_slow );
19910 %}
19911 
19912 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19913   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19914             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19915              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19916             Matcher::vector_length(n->in(2)) == 2);
19917   match(Set dst (MinReductionV src1 src2));
19918   match(Set dst (MaxReductionV src1 src2));
19919   effect(TEMP dst, TEMP xtmp1);
19920   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19921   ins_encode %{
19922     int opcode = this->ideal_Opcode();
19923     int vlen = Matcher::vector_length(this, $src2);
19924     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19925                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19926   %}
19927   ins_pipe( pipe_slow );
19928 %}
19929 
19930 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19931   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19932             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19933              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19934             Matcher::vector_length(n->in(2)) >= 4);
19935   match(Set dst (MinReductionV src1 src2));
19936   match(Set dst (MaxReductionV src1 src2));
19937   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19938   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19939   ins_encode %{
19940     int opcode = this->ideal_Opcode();
19941     int vlen = Matcher::vector_length(this, $src2);
19942     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19943                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19944   %}
19945   ins_pipe( pipe_slow );
19946 %}
19947 
19948 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19949   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19950             Matcher::vector_length(n->in(2)) == 2);
19951   match(Set dst (MinReductionV dst src));
19952   match(Set dst (MaxReductionV dst src));
19953   effect(TEMP dst, TEMP xtmp1);
19954   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19955   ins_encode %{
19956     int opcode = this->ideal_Opcode();
19957     int vlen = Matcher::vector_length(this, $src);
19958     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19959                          $xtmp1$$XMMRegister);
19960   %}
19961   ins_pipe( pipe_slow );
19962 %}
19963 
19964 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19965   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19966             Matcher::vector_length(n->in(2)) >= 4);
19967   match(Set dst (MinReductionV dst src));
19968   match(Set dst (MaxReductionV dst src));
19969   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19970   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19971   ins_encode %{
19972     int opcode = this->ideal_Opcode();
19973     int vlen = Matcher::vector_length(this, $src);
19974     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19975                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19976   %}
19977   ins_pipe( pipe_slow );
19978 %}
19979 
19980 //--------------------Min Double Reduction --------------------
19981 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19982                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19983   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19984             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19985              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19986             Matcher::vector_length(n->in(2)) == 2);
19987   match(Set dst (MinReductionV src1 src2));
19988   match(Set dst (MaxReductionV src1 src2));
19989   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19990   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19991   ins_encode %{
19992     assert(UseAVX > 0, "sanity");
19993 
19994     int opcode = this->ideal_Opcode();
19995     int vlen = Matcher::vector_length(this, $src2);
19996     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19997                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19998   %}
19999   ins_pipe( pipe_slow );
20000 %}
20001 
20002 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20003                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20004   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20005             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20006              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20007             Matcher::vector_length(n->in(2)) >= 4);
20008   match(Set dst (MinReductionV src1 src2));
20009   match(Set dst (MaxReductionV src1 src2));
20010   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20011   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20012   ins_encode %{
20013     assert(UseAVX > 0, "sanity");
20014 
20015     int opcode = this->ideal_Opcode();
20016     int vlen = Matcher::vector_length(this, $src2);
20017     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20018                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20019   %}
20020   ins_pipe( pipe_slow );
20021 %}
20022 
20023 
20024 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20025                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20026   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20027             Matcher::vector_length(n->in(2)) == 2);
20028   match(Set dst (MinReductionV dst src));
20029   match(Set dst (MaxReductionV dst src));
20030   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20031   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20032   ins_encode %{
20033     assert(UseAVX > 0, "sanity");
20034 
20035     int opcode = this->ideal_Opcode();
20036     int vlen = Matcher::vector_length(this, $src);
20037     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20038                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20039   %}
20040   ins_pipe( pipe_slow );
20041 %}
20042 
20043 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20044                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20045   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20046             Matcher::vector_length(n->in(2)) >= 4);
20047   match(Set dst (MinReductionV dst src));
20048   match(Set dst (MaxReductionV dst src));
20049   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20050   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20051   ins_encode %{
20052     assert(UseAVX > 0, "sanity");
20053 
20054     int opcode = this->ideal_Opcode();
20055     int vlen = Matcher::vector_length(this, $src);
20056     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20057                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20058   %}
20059   ins_pipe( pipe_slow );
20060 %}
20061 
20062 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
20063   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20064             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20065              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20066             Matcher::vector_length(n->in(2)) == 2);
20067   match(Set dst (MinReductionV src1 src2));
20068   match(Set dst (MaxReductionV src1 src2));
20069   effect(TEMP dst, TEMP xtmp1);
20070   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20071   ins_encode %{
20072     int opcode = this->ideal_Opcode();
20073     int vlen = Matcher::vector_length(this, $src2);
20074     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20075                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20076   %}
20077   ins_pipe( pipe_slow );
20078 %}
20079 
20080 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20081   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20082             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20083              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20084             Matcher::vector_length(n->in(2)) >= 4);
20085   match(Set dst (MinReductionV src1 src2));
20086   match(Set dst (MaxReductionV src1 src2));
20087   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20088   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20089   ins_encode %{
20090     int opcode = this->ideal_Opcode();
20091     int vlen = Matcher::vector_length(this, $src2);
20092     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20093                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20094   %}
20095   ins_pipe( pipe_slow );
20096 %}
20097 
20098 
20099 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
20100   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20101             Matcher::vector_length(n->in(2)) == 2);
20102   match(Set dst (MinReductionV dst src));
20103   match(Set dst (MaxReductionV dst src));
20104   effect(TEMP dst, TEMP xtmp1);
20105   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20106   ins_encode %{
20107     int opcode = this->ideal_Opcode();
20108     int vlen = Matcher::vector_length(this, $src);
20109     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20110                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20111   %}
20112   ins_pipe( pipe_slow );
20113 %}
20114 
20115 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20116   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20117             Matcher::vector_length(n->in(2)) >= 4);
20118   match(Set dst (MinReductionV dst src));
20119   match(Set dst (MaxReductionV dst src));
20120   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20121   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20122   ins_encode %{
20123     int opcode = this->ideal_Opcode();
20124     int vlen = Matcher::vector_length(this, $src);
20125     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20126                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20127   %}
20128   ins_pipe( pipe_slow );
20129 %}
20130 
20131 // ====================VECTOR ARITHMETIC=======================================
20132 
20133 // --------------------------------- ADD --------------------------------------
20134 
20135 // Bytes vector add
20136 instruct vaddB(vec dst, vec src) %{
20137   predicate(UseAVX == 0);
20138   match(Set dst (AddVB dst src));
20139   format %{ "paddb   $dst,$src\t! add packedB" %}
20140   ins_encode %{
20141     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20142   %}
20143   ins_pipe( pipe_slow );
20144 %}
20145 
20146 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20147   predicate(UseAVX > 0);
20148   match(Set dst (AddVB src1 src2));
20149   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20150   ins_encode %{
20151     int vlen_enc = vector_length_encoding(this);
20152     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20153   %}
20154   ins_pipe( pipe_slow );
20155 %}
20156 
20157 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20158   predicate((UseAVX > 0) &&
20159             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20160   match(Set dst (AddVB src (LoadVector mem)));
20161   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20162   ins_encode %{
20163     int vlen_enc = vector_length_encoding(this);
20164     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20165   %}
20166   ins_pipe( pipe_slow );
20167 %}
20168 
20169 // Shorts/Chars vector add
20170 instruct vaddS(vec dst, vec src) %{
20171   predicate(UseAVX == 0);
20172   match(Set dst (AddVS dst src));
20173   format %{ "paddw   $dst,$src\t! add packedS" %}
20174   ins_encode %{
20175     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20176   %}
20177   ins_pipe( pipe_slow );
20178 %}
20179 
20180 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20181   predicate(UseAVX > 0);
20182   match(Set dst (AddVS src1 src2));
20183   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20184   ins_encode %{
20185     int vlen_enc = vector_length_encoding(this);
20186     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20187   %}
20188   ins_pipe( pipe_slow );
20189 %}
20190 
20191 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20192   predicate((UseAVX > 0) &&
20193             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20194   match(Set dst (AddVS src (LoadVector mem)));
20195   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20196   ins_encode %{
20197     int vlen_enc = vector_length_encoding(this);
20198     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20199   %}
20200   ins_pipe( pipe_slow );
20201 %}
20202 
20203 // Integers vector add
20204 instruct vaddI(vec dst, vec src) %{
20205   predicate(UseAVX == 0);
20206   match(Set dst (AddVI dst src));
20207   format %{ "paddd   $dst,$src\t! add packedI" %}
20208   ins_encode %{
20209     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20210   %}
20211   ins_pipe( pipe_slow );
20212 %}
20213 
20214 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20215   predicate(UseAVX > 0);
20216   match(Set dst (AddVI src1 src2));
20217   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20218   ins_encode %{
20219     int vlen_enc = vector_length_encoding(this);
20220     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221   %}
20222   ins_pipe( pipe_slow );
20223 %}
20224 
20225 
20226 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20227   predicate((UseAVX > 0) &&
20228             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20229   match(Set dst (AddVI src (LoadVector mem)));
20230   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20231   ins_encode %{
20232     int vlen_enc = vector_length_encoding(this);
20233     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20234   %}
20235   ins_pipe( pipe_slow );
20236 %}
20237 
20238 // Longs vector add
20239 instruct vaddL(vec dst, vec src) %{
20240   predicate(UseAVX == 0);
20241   match(Set dst (AddVL dst src));
20242   format %{ "paddq   $dst,$src\t! add packedL" %}
20243   ins_encode %{
20244     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20245   %}
20246   ins_pipe( pipe_slow );
20247 %}
20248 
20249 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20250   predicate(UseAVX > 0);
20251   match(Set dst (AddVL src1 src2));
20252   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20253   ins_encode %{
20254     int vlen_enc = vector_length_encoding(this);
20255     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20256   %}
20257   ins_pipe( pipe_slow );
20258 %}
20259 
20260 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20261   predicate((UseAVX > 0) &&
20262             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20263   match(Set dst (AddVL src (LoadVector mem)));
20264   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20265   ins_encode %{
20266     int vlen_enc = vector_length_encoding(this);
20267     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20268   %}
20269   ins_pipe( pipe_slow );
20270 %}
20271 
20272 // Floats vector add
20273 instruct vaddF(vec dst, vec src) %{
20274   predicate(UseAVX == 0);
20275   match(Set dst (AddVF dst src));
20276   format %{ "addps   $dst,$src\t! add packedF" %}
20277   ins_encode %{
20278     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20279   %}
20280   ins_pipe( pipe_slow );
20281 %}
20282 
20283 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20284   predicate(UseAVX > 0);
20285   match(Set dst (AddVF src1 src2));
20286   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20287   ins_encode %{
20288     int vlen_enc = vector_length_encoding(this);
20289     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20290   %}
20291   ins_pipe( pipe_slow );
20292 %}
20293 
20294 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20295   predicate((UseAVX > 0) &&
20296             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20297   match(Set dst (AddVF src (LoadVector mem)));
20298   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20299   ins_encode %{
20300     int vlen_enc = vector_length_encoding(this);
20301     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20302   %}
20303   ins_pipe( pipe_slow );
20304 %}
20305 
20306 // Doubles vector add
20307 instruct vaddD(vec dst, vec src) %{
20308   predicate(UseAVX == 0);
20309   match(Set dst (AddVD dst src));
20310   format %{ "addpd   $dst,$src\t! add packedD" %}
20311   ins_encode %{
20312     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20313   %}
20314   ins_pipe( pipe_slow );
20315 %}
20316 
20317 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20318   predicate(UseAVX > 0);
20319   match(Set dst (AddVD src1 src2));
20320   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20321   ins_encode %{
20322     int vlen_enc = vector_length_encoding(this);
20323     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324   %}
20325   ins_pipe( pipe_slow );
20326 %}
20327 
20328 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20329   predicate((UseAVX > 0) &&
20330             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20331   match(Set dst (AddVD src (LoadVector mem)));
20332   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20333   ins_encode %{
20334     int vlen_enc = vector_length_encoding(this);
20335     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20336   %}
20337   ins_pipe( pipe_slow );
20338 %}
20339 
20340 // --------------------------------- SUB --------------------------------------
20341 
20342 // Bytes vector sub
20343 instruct vsubB(vec dst, vec src) %{
20344   predicate(UseAVX == 0);
20345   match(Set dst (SubVB dst src));
20346   format %{ "psubb   $dst,$src\t! sub packedB" %}
20347   ins_encode %{
20348     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20349   %}
20350   ins_pipe( pipe_slow );
20351 %}
20352 
20353 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20354   predicate(UseAVX > 0);
20355   match(Set dst (SubVB src1 src2));
20356   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20357   ins_encode %{
20358     int vlen_enc = vector_length_encoding(this);
20359     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20360   %}
20361   ins_pipe( pipe_slow );
20362 %}
20363 
20364 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20365   predicate((UseAVX > 0) &&
20366             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20367   match(Set dst (SubVB src (LoadVector mem)));
20368   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20369   ins_encode %{
20370     int vlen_enc = vector_length_encoding(this);
20371     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20372   %}
20373   ins_pipe( pipe_slow );
20374 %}
20375 
20376 // Shorts/Chars vector sub
20377 instruct vsubS(vec dst, vec src) %{
20378   predicate(UseAVX == 0);
20379   match(Set dst (SubVS dst src));
20380   format %{ "psubw   $dst,$src\t! sub packedS" %}
20381   ins_encode %{
20382     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20383   %}
20384   ins_pipe( pipe_slow );
20385 %}
20386 
20387 
20388 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20389   predicate(UseAVX > 0);
20390   match(Set dst (SubVS src1 src2));
20391   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20392   ins_encode %{
20393     int vlen_enc = vector_length_encoding(this);
20394     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20395   %}
20396   ins_pipe( pipe_slow );
20397 %}
20398 
20399 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20400   predicate((UseAVX > 0) &&
20401             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20402   match(Set dst (SubVS src (LoadVector mem)));
20403   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20404   ins_encode %{
20405     int vlen_enc = vector_length_encoding(this);
20406     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20407   %}
20408   ins_pipe( pipe_slow );
20409 %}
20410 
20411 // Integers vector sub
20412 instruct vsubI(vec dst, vec src) %{
20413   predicate(UseAVX == 0);
20414   match(Set dst (SubVI dst src));
20415   format %{ "psubd   $dst,$src\t! sub packedI" %}
20416   ins_encode %{
20417     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20418   %}
20419   ins_pipe( pipe_slow );
20420 %}
20421 
20422 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20423   predicate(UseAVX > 0);
20424   match(Set dst (SubVI src1 src2));
20425   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20426   ins_encode %{
20427     int vlen_enc = vector_length_encoding(this);
20428     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429   %}
20430   ins_pipe( pipe_slow );
20431 %}
20432 
20433 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20434   predicate((UseAVX > 0) &&
20435             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20436   match(Set dst (SubVI src (LoadVector mem)));
20437   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20438   ins_encode %{
20439     int vlen_enc = vector_length_encoding(this);
20440     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20441   %}
20442   ins_pipe( pipe_slow );
20443 %}
20444 
20445 // Longs vector sub
20446 instruct vsubL(vec dst, vec src) %{
20447   predicate(UseAVX == 0);
20448   match(Set dst (SubVL dst src));
20449   format %{ "psubq   $dst,$src\t! sub packedL" %}
20450   ins_encode %{
20451     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20452   %}
20453   ins_pipe( pipe_slow );
20454 %}
20455 
20456 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20457   predicate(UseAVX > 0);
20458   match(Set dst (SubVL src1 src2));
20459   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20460   ins_encode %{
20461     int vlen_enc = vector_length_encoding(this);
20462     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463   %}
20464   ins_pipe( pipe_slow );
20465 %}
20466 
20467 
20468 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20469   predicate((UseAVX > 0) &&
20470             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20471   match(Set dst (SubVL src (LoadVector mem)));
20472   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20473   ins_encode %{
20474     int vlen_enc = vector_length_encoding(this);
20475     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20476   %}
20477   ins_pipe( pipe_slow );
20478 %}
20479 
20480 // Floats vector sub
20481 instruct vsubF(vec dst, vec src) %{
20482   predicate(UseAVX == 0);
20483   match(Set dst (SubVF dst src));
20484   format %{ "subps   $dst,$src\t! sub packedF" %}
20485   ins_encode %{
20486     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20487   %}
20488   ins_pipe( pipe_slow );
20489 %}
20490 
20491 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20492   predicate(UseAVX > 0);
20493   match(Set dst (SubVF src1 src2));
20494   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20495   ins_encode %{
20496     int vlen_enc = vector_length_encoding(this);
20497     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20498   %}
20499   ins_pipe( pipe_slow );
20500 %}
20501 
20502 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20503   predicate((UseAVX > 0) &&
20504             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20505   match(Set dst (SubVF src (LoadVector mem)));
20506   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20507   ins_encode %{
20508     int vlen_enc = vector_length_encoding(this);
20509     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20510   %}
20511   ins_pipe( pipe_slow );
20512 %}
20513 
20514 // Doubles vector sub
20515 instruct vsubD(vec dst, vec src) %{
20516   predicate(UseAVX == 0);
20517   match(Set dst (SubVD dst src));
20518   format %{ "subpd   $dst,$src\t! sub packedD" %}
20519   ins_encode %{
20520     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20521   %}
20522   ins_pipe( pipe_slow );
20523 %}
20524 
20525 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20526   predicate(UseAVX > 0);
20527   match(Set dst (SubVD src1 src2));
20528   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20529   ins_encode %{
20530     int vlen_enc = vector_length_encoding(this);
20531     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20532   %}
20533   ins_pipe( pipe_slow );
20534 %}
20535 
20536 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20537   predicate((UseAVX > 0) &&
20538             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20539   match(Set dst (SubVD src (LoadVector mem)));
20540   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20541   ins_encode %{
20542     int vlen_enc = vector_length_encoding(this);
20543     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20544   %}
20545   ins_pipe( pipe_slow );
20546 %}
20547 
20548 // --------------------------------- MUL --------------------------------------
20549 
20550 // Byte vector mul
20551 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20552   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20553   match(Set dst (MulVB src1 src2));
20554   effect(TEMP dst, TEMP xtmp);
20555   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20556   ins_encode %{
20557     assert(UseSSE > 3, "required");
20558     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20559     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20560     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20561     __ psllw($dst$$XMMRegister, 8);
20562     __ psrlw($dst$$XMMRegister, 8);
20563     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20564   %}
20565   ins_pipe( pipe_slow );
20566 %}
20567 
20568 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20569   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20570   match(Set dst (MulVB src1 src2));
20571   effect(TEMP dst, TEMP xtmp);
20572   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20573   ins_encode %{
20574     assert(UseSSE > 3, "required");
20575     // Odd-index elements
20576     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20577     __ psrlw($dst$$XMMRegister, 8);
20578     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20579     __ psrlw($xtmp$$XMMRegister, 8);
20580     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20581     __ psllw($dst$$XMMRegister, 8);
20582     // Even-index elements
20583     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20584     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20585     __ psllw($xtmp$$XMMRegister, 8);
20586     __ psrlw($xtmp$$XMMRegister, 8);
20587     // Combine
20588     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20589   %}
20590   ins_pipe( pipe_slow );
20591 %}
20592 
20593 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20594   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20595   match(Set dst (MulVB src1 src2));
20596   effect(TEMP xtmp1, TEMP xtmp2);
20597   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20598   ins_encode %{
20599     int vlen_enc = vector_length_encoding(this);
20600     // Odd-index elements
20601     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20602     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20603     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20604     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20605     // Even-index elements
20606     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20607     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20608     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20609     // Combine
20610     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20611   %}
20612   ins_pipe( pipe_slow );
20613 %}
20614 
20615 // Shorts/Chars vector mul
20616 instruct vmulS(vec dst, vec src) %{
20617   predicate(UseAVX == 0);
20618   match(Set dst (MulVS dst src));
20619   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20620   ins_encode %{
20621     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20622   %}
20623   ins_pipe( pipe_slow );
20624 %}
20625 
20626 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20627   predicate(UseAVX > 0);
20628   match(Set dst (MulVS src1 src2));
20629   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20630   ins_encode %{
20631     int vlen_enc = vector_length_encoding(this);
20632     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20633   %}
20634   ins_pipe( pipe_slow );
20635 %}
20636 
20637 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20638   predicate((UseAVX > 0) &&
20639             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20640   match(Set dst (MulVS src (LoadVector mem)));
20641   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20642   ins_encode %{
20643     int vlen_enc = vector_length_encoding(this);
20644     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20645   %}
20646   ins_pipe( pipe_slow );
20647 %}
20648 
20649 // Integers vector mul
20650 instruct vmulI(vec dst, vec src) %{
20651   predicate(UseAVX == 0);
20652   match(Set dst (MulVI dst src));
20653   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20654   ins_encode %{
20655     assert(UseSSE > 3, "required");
20656     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20657   %}
20658   ins_pipe( pipe_slow );
20659 %}
20660 
20661 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20662   predicate(UseAVX > 0);
20663   match(Set dst (MulVI src1 src2));
20664   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20665   ins_encode %{
20666     int vlen_enc = vector_length_encoding(this);
20667     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20668   %}
20669   ins_pipe( pipe_slow );
20670 %}
20671 
20672 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20673   predicate((UseAVX > 0) &&
20674             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20675   match(Set dst (MulVI src (LoadVector mem)));
20676   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20677   ins_encode %{
20678     int vlen_enc = vector_length_encoding(this);
20679     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20680   %}
20681   ins_pipe( pipe_slow );
20682 %}
20683 
20684 // Longs vector mul
20685 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20686   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20687              VM_Version::supports_avx512dq()) ||
20688             VM_Version::supports_avx512vldq());
20689   match(Set dst (MulVL src1 src2));
20690   ins_cost(500);
20691   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20692   ins_encode %{
20693     assert(UseAVX > 2, "required");
20694     int vlen_enc = vector_length_encoding(this);
20695     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20696   %}
20697   ins_pipe( pipe_slow );
20698 %}
20699 
20700 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20701   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20702              VM_Version::supports_avx512dq()) ||
20703             (Matcher::vector_length_in_bytes(n) > 8 &&
20704              VM_Version::supports_avx512vldq()));
20705   match(Set dst (MulVL src (LoadVector mem)));
20706   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20707   ins_cost(500);
20708   ins_encode %{
20709     assert(UseAVX > 2, "required");
20710     int vlen_enc = vector_length_encoding(this);
20711     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20712   %}
20713   ins_pipe( pipe_slow );
20714 %}
20715 
20716 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20717   predicate(UseAVX == 0);
20718   match(Set dst (MulVL src1 src2));
20719   ins_cost(500);
20720   effect(TEMP dst, TEMP xtmp);
20721   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20722   ins_encode %{
20723     assert(VM_Version::supports_sse4_1(), "required");
20724     // Get the lo-hi products, only the lower 32 bits is in concerns
20725     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20726     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20727     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20728     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20729     __ psllq($dst$$XMMRegister, 32);
20730     // Get the lo-lo products
20731     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20732     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20733     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20734   %}
20735   ins_pipe( pipe_slow );
20736 %}
20737 
20738 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20739   predicate(UseAVX > 0 &&
20740             ((Matcher::vector_length_in_bytes(n) == 64 &&
20741               !VM_Version::supports_avx512dq()) ||
20742              (Matcher::vector_length_in_bytes(n) < 64 &&
20743               !VM_Version::supports_avx512vldq())));
20744   match(Set dst (MulVL src1 src2));
20745   effect(TEMP xtmp1, TEMP xtmp2);
20746   ins_cost(500);
20747   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20748   ins_encode %{
20749     int vlen_enc = vector_length_encoding(this);
20750     // Get the lo-hi products, only the lower 32 bits is in concerns
20751     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20752     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20753     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20754     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20755     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20756     // Get the lo-lo products
20757     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20758     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20759   %}
20760   ins_pipe( pipe_slow );
20761 %}
20762 
20763 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20764   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20765   match(Set dst (MulVL src1 src2));
20766   ins_cost(100);
20767   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20768   ins_encode %{
20769     int vlen_enc = vector_length_encoding(this);
20770     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20771   %}
20772   ins_pipe( pipe_slow );
20773 %}
20774 
20775 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20776   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20777   match(Set dst (MulVL src1 src2));
20778   ins_cost(100);
20779   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20780   ins_encode %{
20781     int vlen_enc = vector_length_encoding(this);
20782     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20783   %}
20784   ins_pipe( pipe_slow );
20785 %}
20786 
20787 // Floats vector mul
20788 instruct vmulF(vec dst, vec src) %{
20789   predicate(UseAVX == 0);
20790   match(Set dst (MulVF dst src));
20791   format %{ "mulps   $dst,$src\t! mul packedF" %}
20792   ins_encode %{
20793     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20794   %}
20795   ins_pipe( pipe_slow );
20796 %}
20797 
20798 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20799   predicate(UseAVX > 0);
20800   match(Set dst (MulVF src1 src2));
20801   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20802   ins_encode %{
20803     int vlen_enc = vector_length_encoding(this);
20804     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20805   %}
20806   ins_pipe( pipe_slow );
20807 %}
20808 
20809 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20810   predicate((UseAVX > 0) &&
20811             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20812   match(Set dst (MulVF src (LoadVector mem)));
20813   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20814   ins_encode %{
20815     int vlen_enc = vector_length_encoding(this);
20816     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20817   %}
20818   ins_pipe( pipe_slow );
20819 %}
20820 
20821 // Doubles vector mul
20822 instruct vmulD(vec dst, vec src) %{
20823   predicate(UseAVX == 0);
20824   match(Set dst (MulVD dst src));
20825   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20826   ins_encode %{
20827     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20828   %}
20829   ins_pipe( pipe_slow );
20830 %}
20831 
20832 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20833   predicate(UseAVX > 0);
20834   match(Set dst (MulVD src1 src2));
20835   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20836   ins_encode %{
20837     int vlen_enc = vector_length_encoding(this);
20838     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20839   %}
20840   ins_pipe( pipe_slow );
20841 %}
20842 
20843 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20844   predicate((UseAVX > 0) &&
20845             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20846   match(Set dst (MulVD src (LoadVector mem)));
20847   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20848   ins_encode %{
20849     int vlen_enc = vector_length_encoding(this);
20850     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20851   %}
20852   ins_pipe( pipe_slow );
20853 %}
20854 
20855 // --------------------------------- DIV --------------------------------------
20856 
20857 // Floats vector div
20858 instruct vdivF(vec dst, vec src) %{
20859   predicate(UseAVX == 0);
20860   match(Set dst (DivVF dst src));
20861   format %{ "divps   $dst,$src\t! div packedF" %}
20862   ins_encode %{
20863     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20864   %}
20865   ins_pipe( pipe_slow );
20866 %}
20867 
20868 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20869   predicate(UseAVX > 0);
20870   match(Set dst (DivVF src1 src2));
20871   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20872   ins_encode %{
20873     int vlen_enc = vector_length_encoding(this);
20874     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20875   %}
20876   ins_pipe( pipe_slow );
20877 %}
20878 
20879 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20880   predicate((UseAVX > 0) &&
20881             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20882   match(Set dst (DivVF src (LoadVector mem)));
20883   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20884   ins_encode %{
20885     int vlen_enc = vector_length_encoding(this);
20886     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20887   %}
20888   ins_pipe( pipe_slow );
20889 %}
20890 
20891 // Doubles vector div
20892 instruct vdivD(vec dst, vec src) %{
20893   predicate(UseAVX == 0);
20894   match(Set dst (DivVD dst src));
20895   format %{ "divpd   $dst,$src\t! div packedD" %}
20896   ins_encode %{
20897     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20898   %}
20899   ins_pipe( pipe_slow );
20900 %}
20901 
20902 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20903   predicate(UseAVX > 0);
20904   match(Set dst (DivVD src1 src2));
20905   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20906   ins_encode %{
20907     int vlen_enc = vector_length_encoding(this);
20908     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20909   %}
20910   ins_pipe( pipe_slow );
20911 %}
20912 
20913 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20914   predicate((UseAVX > 0) &&
20915             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20916   match(Set dst (DivVD src (LoadVector mem)));
20917   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20918   ins_encode %{
20919     int vlen_enc = vector_length_encoding(this);
20920     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20921   %}
20922   ins_pipe( pipe_slow );
20923 %}
20924 
20925 // ------------------------------ MinMax ---------------------------------------
20926 
20927 // Byte, Short, Int vector Min/Max
20928 instruct minmax_reg_sse(vec dst, vec src) %{
20929   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20930             UseAVX == 0);
20931   match(Set dst (MinV dst src));
20932   match(Set dst (MaxV dst src));
20933   format %{ "vector_minmax  $dst,$src\t!  " %}
20934   ins_encode %{
20935     assert(UseSSE >= 4, "required");
20936 
20937     int opcode = this->ideal_Opcode();
20938     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20939     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20940   %}
20941   ins_pipe( pipe_slow );
20942 %}
20943 
20944 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20945   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20946             UseAVX > 0);
20947   match(Set dst (MinV src1 src2));
20948   match(Set dst (MaxV src1 src2));
20949   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20950   ins_encode %{
20951     int opcode = this->ideal_Opcode();
20952     int vlen_enc = vector_length_encoding(this);
20953     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20954 
20955     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20956   %}
20957   ins_pipe( pipe_slow );
20958 %}
20959 
20960 // Long vector Min/Max
20961 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20962   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20963             UseAVX == 0);
20964   match(Set dst (MinV dst src));
20965   match(Set dst (MaxV src dst));
20966   effect(TEMP dst, TEMP tmp);
20967   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20968   ins_encode %{
20969     assert(UseSSE >= 4, "required");
20970 
20971     int opcode = this->ideal_Opcode();
20972     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20973     assert(elem_bt == T_LONG, "sanity");
20974 
20975     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20976   %}
20977   ins_pipe( pipe_slow );
20978 %}
20979 
20980 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20981   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20982             UseAVX > 0 && !VM_Version::supports_avx512vl());
20983   match(Set dst (MinV src1 src2));
20984   match(Set dst (MaxV src1 src2));
20985   effect(TEMP dst);
20986   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20987   ins_encode %{
20988     int vlen_enc = vector_length_encoding(this);
20989     int opcode = this->ideal_Opcode();
20990     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20991     assert(elem_bt == T_LONG, "sanity");
20992 
20993     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20994   %}
20995   ins_pipe( pipe_slow );
20996 %}
20997 
20998 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20999   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21000             Matcher::vector_element_basic_type(n) == T_LONG);
21001   match(Set dst (MinV src1 src2));
21002   match(Set dst (MaxV src1 src2));
21003   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21004   ins_encode %{
21005     assert(UseAVX > 2, "required");
21006 
21007     int vlen_enc = vector_length_encoding(this);
21008     int opcode = this->ideal_Opcode();
21009     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21010     assert(elem_bt == T_LONG, "sanity");
21011 
21012     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21013   %}
21014   ins_pipe( pipe_slow );
21015 %}
21016 
21017 // Float/Double vector Min/Max
21018 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
21019   predicate(VM_Version::supports_avx10_2() &&
21020             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21021   match(Set dst (MinV a b));
21022   match(Set dst (MaxV a b));
21023   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21024   ins_encode %{
21025     int vlen_enc = vector_length_encoding(this);
21026     int opcode = this->ideal_Opcode();
21027     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21028     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21029   %}
21030   ins_pipe( pipe_slow );
21031 %}
21032 
21033 // Float/Double vector Min/Max
21034 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21035   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21036             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21037             UseAVX > 0);
21038   match(Set dst (MinV a b));
21039   match(Set dst (MaxV a b));
21040   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21041   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21042   ins_encode %{
21043     assert(UseAVX > 0, "required");
21044 
21045     int opcode = this->ideal_Opcode();
21046     int vlen_enc = vector_length_encoding(this);
21047     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21048 
21049     __ vminmax_fp(opcode, elem_bt,
21050                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21051                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21052   %}
21053   ins_pipe( pipe_slow );
21054 %}
21055 
21056 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21057   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21058             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21059   match(Set dst (MinV a b));
21060   match(Set dst (MaxV a b));
21061   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21062   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21063   ins_encode %{
21064     assert(UseAVX > 2, "required");
21065 
21066     int opcode = this->ideal_Opcode();
21067     int vlen_enc = vector_length_encoding(this);
21068     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21069 
21070     __ evminmax_fp(opcode, elem_bt,
21071                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21072                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21073   %}
21074   ins_pipe( pipe_slow );
21075 %}
21076 
21077 // ------------------------------ Unsigned vector Min/Max ----------------------
21078 
21079 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21080   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21081   match(Set dst (UMinV a b));
21082   match(Set dst (UMaxV a b));
21083   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21084   ins_encode %{
21085     int opcode = this->ideal_Opcode();
21086     int vlen_enc = vector_length_encoding(this);
21087     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21088     assert(is_integral_type(elem_bt), "");
21089     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21090   %}
21091   ins_pipe( pipe_slow );
21092 %}
21093 
21094 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21095   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21096   match(Set dst (UMinV a (LoadVector b)));
21097   match(Set dst (UMaxV a (LoadVector b)));
21098   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21099   ins_encode %{
21100     int opcode = this->ideal_Opcode();
21101     int vlen_enc = vector_length_encoding(this);
21102     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21103     assert(is_integral_type(elem_bt), "");
21104     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21105   %}
21106   ins_pipe( pipe_slow );
21107 %}
21108 
21109 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21110   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21111   match(Set dst (UMinV a b));
21112   match(Set dst (UMaxV a b));
21113   effect(TEMP xtmp1, TEMP xtmp2);
21114   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21115   ins_encode %{
21116     int opcode = this->ideal_Opcode();
21117     int vlen_enc = vector_length_encoding(this);
21118     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21119   %}
21120   ins_pipe( pipe_slow );
21121 %}
21122 
21123 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21124   match(Set dst (UMinV (Binary dst src2) mask));
21125   match(Set dst (UMaxV (Binary dst src2) mask));
21126   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21127   ins_encode %{
21128     int vlen_enc = vector_length_encoding(this);
21129     BasicType bt = Matcher::vector_element_basic_type(this);
21130     int opc = this->ideal_Opcode();
21131     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21132                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21133   %}
21134   ins_pipe( pipe_slow );
21135 %}
21136 
21137 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21138   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21139   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21140   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21141   ins_encode %{
21142     int vlen_enc = vector_length_encoding(this);
21143     BasicType bt = Matcher::vector_element_basic_type(this);
21144     int opc = this->ideal_Opcode();
21145     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21146                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21147   %}
21148   ins_pipe( pipe_slow );
21149 %}
21150 
21151 // --------------------------------- Signum/CopySign ---------------------------
21152 
21153 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21154   match(Set dst (SignumF dst (Binary zero one)));
21155   effect(KILL cr);
21156   format %{ "signumF $dst, $dst" %}
21157   ins_encode %{
21158     int opcode = this->ideal_Opcode();
21159     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21160   %}
21161   ins_pipe( pipe_slow );
21162 %}
21163 
21164 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21165   match(Set dst (SignumD dst (Binary zero one)));
21166   effect(KILL cr);
21167   format %{ "signumD $dst, $dst" %}
21168   ins_encode %{
21169     int opcode = this->ideal_Opcode();
21170     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21171   %}
21172   ins_pipe( pipe_slow );
21173 %}
21174 
21175 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21176   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21177   match(Set dst (SignumVF src (Binary zero one)));
21178   match(Set dst (SignumVD src (Binary zero one)));
21179   effect(TEMP dst, TEMP xtmp1);
21180   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21181   ins_encode %{
21182     int opcode = this->ideal_Opcode();
21183     int vec_enc = vector_length_encoding(this);
21184     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21185                          $xtmp1$$XMMRegister, vec_enc);
21186   %}
21187   ins_pipe( pipe_slow );
21188 %}
21189 
21190 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21191   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21192   match(Set dst (SignumVF src (Binary zero one)));
21193   match(Set dst (SignumVD src (Binary zero one)));
21194   effect(TEMP dst, TEMP ktmp1);
21195   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21196   ins_encode %{
21197     int opcode = this->ideal_Opcode();
21198     int vec_enc = vector_length_encoding(this);
21199     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21200                           $ktmp1$$KRegister, vec_enc);
21201   %}
21202   ins_pipe( pipe_slow );
21203 %}
21204 
21205 // ---------------------------------------
21206 // For copySign use 0xE4 as writemask for vpternlog
21207 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21208 // C (xmm2) is set to 0x7FFFFFFF
21209 // Wherever xmm2 is 0, we want to pick from B (sign)
21210 // Wherever xmm2 is 1, we want to pick from A (src)
21211 //
21212 // A B C Result
21213 // 0 0 0 0
21214 // 0 0 1 0
21215 // 0 1 0 1
21216 // 0 1 1 0
21217 // 1 0 0 0
21218 // 1 0 1 1
21219 // 1 1 0 1
21220 // 1 1 1 1
21221 //
21222 // Result going from high bit to low bit is 0x11100100 = 0xe4
21223 // ---------------------------------------
21224 
21225 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21226   match(Set dst (CopySignF dst src));
21227   effect(TEMP tmp1, TEMP tmp2);
21228   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21229   ins_encode %{
21230     __ movl($tmp2$$Register, 0x7FFFFFFF);
21231     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21232     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21233   %}
21234   ins_pipe( pipe_slow );
21235 %}
21236 
21237 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21238   match(Set dst (CopySignD dst (Binary src zero)));
21239   ins_cost(100);
21240   effect(TEMP tmp1, TEMP tmp2);
21241   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21242   ins_encode %{
21243     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21244     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21245     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21246   %}
21247   ins_pipe( pipe_slow );
21248 %}
21249 
21250 //----------------------------- CompressBits/ExpandBits ------------------------
21251 
21252 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21253   predicate(n->bottom_type()->isa_int());
21254   match(Set dst (CompressBits src mask));
21255   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21256   ins_encode %{
21257     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21258   %}
21259   ins_pipe( pipe_slow );
21260 %}
21261 
21262 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21263   predicate(n->bottom_type()->isa_int());
21264   match(Set dst (ExpandBits src mask));
21265   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21266   ins_encode %{
21267     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21268   %}
21269   ins_pipe( pipe_slow );
21270 %}
21271 
21272 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21273   predicate(n->bottom_type()->isa_int());
21274   match(Set dst (CompressBits src (LoadI mask)));
21275   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21276   ins_encode %{
21277     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21278   %}
21279   ins_pipe( pipe_slow );
21280 %}
21281 
21282 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21283   predicate(n->bottom_type()->isa_int());
21284   match(Set dst (ExpandBits src (LoadI mask)));
21285   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21286   ins_encode %{
21287     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21288   %}
21289   ins_pipe( pipe_slow );
21290 %}
21291 
21292 // --------------------------------- Sqrt --------------------------------------
21293 
21294 instruct vsqrtF_reg(vec dst, vec src) %{
21295   match(Set dst (SqrtVF src));
21296   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21297   ins_encode %{
21298     assert(UseAVX > 0, "required");
21299     int vlen_enc = vector_length_encoding(this);
21300     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21301   %}
21302   ins_pipe( pipe_slow );
21303 %}
21304 
21305 instruct vsqrtF_mem(vec dst, memory mem) %{
21306   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21307   match(Set dst (SqrtVF (LoadVector mem)));
21308   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21309   ins_encode %{
21310     assert(UseAVX > 0, "required");
21311     int vlen_enc = vector_length_encoding(this);
21312     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21313   %}
21314   ins_pipe( pipe_slow );
21315 %}
21316 
21317 // Floating point vector sqrt
21318 instruct vsqrtD_reg(vec dst, vec src) %{
21319   match(Set dst (SqrtVD src));
21320   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21321   ins_encode %{
21322     assert(UseAVX > 0, "required");
21323     int vlen_enc = vector_length_encoding(this);
21324     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 instruct vsqrtD_mem(vec dst, memory mem) %{
21330   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21331   match(Set dst (SqrtVD (LoadVector mem)));
21332   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21333   ins_encode %{
21334     assert(UseAVX > 0, "required");
21335     int vlen_enc = vector_length_encoding(this);
21336     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21337   %}
21338   ins_pipe( pipe_slow );
21339 %}
21340 
21341 // ------------------------------ Shift ---------------------------------------
21342 
21343 // Left and right shift count vectors are the same on x86
21344 // (only lowest bits of xmm reg are used for count).
21345 instruct vshiftcnt(vec dst, rRegI cnt) %{
21346   match(Set dst (LShiftCntV cnt));
21347   match(Set dst (RShiftCntV cnt));
21348   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21349   ins_encode %{
21350     __ movdl($dst$$XMMRegister, $cnt$$Register);
21351   %}
21352   ins_pipe( pipe_slow );
21353 %}
21354 
21355 // Byte vector shift
21356 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21357   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21358   match(Set dst ( LShiftVB src shift));
21359   match(Set dst ( RShiftVB src shift));
21360   match(Set dst (URShiftVB src shift));
21361   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21362   format %{"vector_byte_shift $dst,$src,$shift" %}
21363   ins_encode %{
21364     assert(UseSSE > 3, "required");
21365     int opcode = this->ideal_Opcode();
21366     bool sign = (opcode != Op_URShiftVB);
21367     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21368     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21369     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21370     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21371     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21372   %}
21373   ins_pipe( pipe_slow );
21374 %}
21375 
21376 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21377   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21378             UseAVX <= 1);
21379   match(Set dst ( LShiftVB src shift));
21380   match(Set dst ( RShiftVB src shift));
21381   match(Set dst (URShiftVB src shift));
21382   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21383   format %{"vector_byte_shift $dst,$src,$shift" %}
21384   ins_encode %{
21385     assert(UseSSE > 3, "required");
21386     int opcode = this->ideal_Opcode();
21387     bool sign = (opcode != Op_URShiftVB);
21388     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21389     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21390     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21391     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21392     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21393     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21394     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21395     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21396     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21397   %}
21398   ins_pipe( pipe_slow );
21399 %}
21400 
21401 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21402   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21403             UseAVX > 1);
21404   match(Set dst ( LShiftVB src shift));
21405   match(Set dst ( RShiftVB src shift));
21406   match(Set dst (URShiftVB src shift));
21407   effect(TEMP dst, TEMP tmp);
21408   format %{"vector_byte_shift $dst,$src,$shift" %}
21409   ins_encode %{
21410     int opcode = this->ideal_Opcode();
21411     bool sign = (opcode != Op_URShiftVB);
21412     int vlen_enc = Assembler::AVX_256bit;
21413     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21414     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21415     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21416     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21417     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21418   %}
21419   ins_pipe( pipe_slow );
21420 %}
21421 
21422 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21423   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21424   match(Set dst ( LShiftVB src shift));
21425   match(Set dst ( RShiftVB src shift));
21426   match(Set dst (URShiftVB src shift));
21427   effect(TEMP dst, TEMP tmp);
21428   format %{"vector_byte_shift $dst,$src,$shift" %}
21429   ins_encode %{
21430     assert(UseAVX > 1, "required");
21431     int opcode = this->ideal_Opcode();
21432     bool sign = (opcode != Op_URShiftVB);
21433     int vlen_enc = Assembler::AVX_256bit;
21434     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21435     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21436     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21437     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21438     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21439     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21440     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21441     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21442     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21443   %}
21444   ins_pipe( pipe_slow );
21445 %}
21446 
21447 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21448   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21449   match(Set dst ( LShiftVB src shift));
21450   match(Set dst  (RShiftVB src shift));
21451   match(Set dst (URShiftVB src shift));
21452   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21453   format %{"vector_byte_shift $dst,$src,$shift" %}
21454   ins_encode %{
21455     assert(UseAVX > 2, "required");
21456     int opcode = this->ideal_Opcode();
21457     bool sign = (opcode != Op_URShiftVB);
21458     int vlen_enc = Assembler::AVX_512bit;
21459     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21460     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21461     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21462     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21463     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21464     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21465     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21466     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21467     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21468     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21469     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21470     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21471   %}
21472   ins_pipe( pipe_slow );
21473 %}
21474 
21475 // Shorts vector logical right shift produces incorrect Java result
21476 // for negative data because java code convert short value into int with
21477 // sign extension before a shift. But char vectors are fine since chars are
21478 // unsigned values.
21479 // Shorts/Chars vector left shift
21480 instruct vshiftS(vec dst, vec src, vec shift) %{
21481   predicate(!n->as_ShiftV()->is_var_shift());
21482   match(Set dst ( LShiftVS src shift));
21483   match(Set dst ( RShiftVS src shift));
21484   match(Set dst (URShiftVS src shift));
21485   effect(TEMP dst, USE src, USE shift);
21486   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21487   ins_encode %{
21488     int opcode = this->ideal_Opcode();
21489     if (UseAVX > 0) {
21490       int vlen_enc = vector_length_encoding(this);
21491       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21492     } else {
21493       int vlen = Matcher::vector_length(this);
21494       if (vlen == 2) {
21495         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21496         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21497       } else if (vlen == 4) {
21498         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21499         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21500       } else {
21501         assert (vlen == 8, "sanity");
21502         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21503         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21504       }
21505     }
21506   %}
21507   ins_pipe( pipe_slow );
21508 %}
21509 
21510 // Integers vector left shift
21511 instruct vshiftI(vec dst, vec src, vec shift) %{
21512   predicate(!n->as_ShiftV()->is_var_shift());
21513   match(Set dst ( LShiftVI src shift));
21514   match(Set dst ( RShiftVI src shift));
21515   match(Set dst (URShiftVI src shift));
21516   effect(TEMP dst, USE src, USE shift);
21517   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21518   ins_encode %{
21519     int opcode = this->ideal_Opcode();
21520     if (UseAVX > 0) {
21521       int vlen_enc = vector_length_encoding(this);
21522       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21523     } else {
21524       int vlen = Matcher::vector_length(this);
21525       if (vlen == 2) {
21526         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21527         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21528       } else {
21529         assert(vlen == 4, "sanity");
21530         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21531         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21532       }
21533     }
21534   %}
21535   ins_pipe( pipe_slow );
21536 %}
21537 
21538 // Integers vector left constant shift
21539 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21540   match(Set dst (LShiftVI src (LShiftCntV shift)));
21541   match(Set dst (RShiftVI src (RShiftCntV shift)));
21542   match(Set dst (URShiftVI src (RShiftCntV shift)));
21543   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21544   ins_encode %{
21545     int opcode = this->ideal_Opcode();
21546     if (UseAVX > 0) {
21547       int vector_len = vector_length_encoding(this);
21548       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21549     } else {
21550       int vlen = Matcher::vector_length(this);
21551       if (vlen == 2) {
21552         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21553         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21554       } else {
21555         assert(vlen == 4, "sanity");
21556         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21557         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21558       }
21559     }
21560   %}
21561   ins_pipe( pipe_slow );
21562 %}
21563 
21564 // Longs vector shift
21565 instruct vshiftL(vec dst, vec src, vec shift) %{
21566   predicate(!n->as_ShiftV()->is_var_shift());
21567   match(Set dst ( LShiftVL src shift));
21568   match(Set dst (URShiftVL src shift));
21569   effect(TEMP dst, USE src, USE shift);
21570   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21571   ins_encode %{
21572     int opcode = this->ideal_Opcode();
21573     if (UseAVX > 0) {
21574       int vlen_enc = vector_length_encoding(this);
21575       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21576     } else {
21577       assert(Matcher::vector_length(this) == 2, "");
21578       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21579       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21580     }
21581   %}
21582   ins_pipe( pipe_slow );
21583 %}
21584 
21585 // Longs vector constant shift
21586 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21587   match(Set dst (LShiftVL src (LShiftCntV shift)));
21588   match(Set dst (URShiftVL src (RShiftCntV shift)));
21589   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21590   ins_encode %{
21591     int opcode = this->ideal_Opcode();
21592     if (UseAVX > 0) {
21593       int vector_len = vector_length_encoding(this);
21594       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21595     } else {
21596       assert(Matcher::vector_length(this) == 2, "");
21597       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21598       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21599     }
21600   %}
21601   ins_pipe( pipe_slow );
21602 %}
21603 
21604 // -------------------ArithmeticRightShift -----------------------------------
21605 // Long vector arithmetic right shift
21606 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21607   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21608   match(Set dst (RShiftVL src shift));
21609   effect(TEMP dst, TEMP tmp);
21610   format %{ "vshiftq $dst,$src,$shift" %}
21611   ins_encode %{
21612     uint vlen = Matcher::vector_length(this);
21613     if (vlen == 2) {
21614       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21615       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21616       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21617       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21618       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21619       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21620     } else {
21621       assert(vlen == 4, "sanity");
21622       assert(UseAVX > 1, "required");
21623       int vlen_enc = Assembler::AVX_256bit;
21624       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21625       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21626       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21627       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21628       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21629     }
21630   %}
21631   ins_pipe( pipe_slow );
21632 %}
21633 
21634 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21635   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21636   match(Set dst (RShiftVL src shift));
21637   format %{ "vshiftq $dst,$src,$shift" %}
21638   ins_encode %{
21639     int vlen_enc = vector_length_encoding(this);
21640     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21641   %}
21642   ins_pipe( pipe_slow );
21643 %}
21644 
21645 // ------------------- Variable Shift -----------------------------
21646 // Byte variable shift
21647 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21648   predicate(Matcher::vector_length(n) <= 8 &&
21649             n->as_ShiftV()->is_var_shift() &&
21650             !VM_Version::supports_avx512bw());
21651   match(Set dst ( LShiftVB src shift));
21652   match(Set dst ( RShiftVB src shift));
21653   match(Set dst (URShiftVB src shift));
21654   effect(TEMP dst, TEMP vtmp);
21655   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21656   ins_encode %{
21657     assert(UseAVX >= 2, "required");
21658 
21659     int opcode = this->ideal_Opcode();
21660     int vlen_enc = Assembler::AVX_128bit;
21661     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21662     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21663   %}
21664   ins_pipe( pipe_slow );
21665 %}
21666 
21667 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21668   predicate(Matcher::vector_length(n) == 16 &&
21669             n->as_ShiftV()->is_var_shift() &&
21670             !VM_Version::supports_avx512bw());
21671   match(Set dst ( LShiftVB src shift));
21672   match(Set dst ( RShiftVB src shift));
21673   match(Set dst (URShiftVB src shift));
21674   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21675   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21676   ins_encode %{
21677     assert(UseAVX >= 2, "required");
21678 
21679     int opcode = this->ideal_Opcode();
21680     int vlen_enc = Assembler::AVX_128bit;
21681     // Shift lower half and get word result in dst
21682     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21683 
21684     // Shift upper half and get word result in vtmp1
21685     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21686     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21687     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21688 
21689     // Merge and down convert the two word results to byte in dst
21690     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21691   %}
21692   ins_pipe( pipe_slow );
21693 %}
21694 
21695 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21696   predicate(Matcher::vector_length(n) == 32 &&
21697             n->as_ShiftV()->is_var_shift() &&
21698             !VM_Version::supports_avx512bw());
21699   match(Set dst ( LShiftVB src shift));
21700   match(Set dst ( RShiftVB src shift));
21701   match(Set dst (URShiftVB src shift));
21702   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21703   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21704   ins_encode %{
21705     assert(UseAVX >= 2, "required");
21706 
21707     int opcode = this->ideal_Opcode();
21708     int vlen_enc = Assembler::AVX_128bit;
21709     // Process lower 128 bits and get result in dst
21710     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21711     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21712     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21713     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21714     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21715 
21716     // Process higher 128 bits and get result in vtmp3
21717     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21718     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21719     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21720     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21721     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21722     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21723     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21724 
21725     // Merge the two results in dst
21726     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21727   %}
21728   ins_pipe( pipe_slow );
21729 %}
21730 
21731 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21732   predicate(Matcher::vector_length(n) <= 32 &&
21733             n->as_ShiftV()->is_var_shift() &&
21734             VM_Version::supports_avx512bw());
21735   match(Set dst ( LShiftVB src shift));
21736   match(Set dst ( RShiftVB src shift));
21737   match(Set dst (URShiftVB src shift));
21738   effect(TEMP dst, TEMP vtmp);
21739   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21740   ins_encode %{
21741     assert(UseAVX > 2, "required");
21742 
21743     int opcode = this->ideal_Opcode();
21744     int vlen_enc = vector_length_encoding(this);
21745     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21746   %}
21747   ins_pipe( pipe_slow );
21748 %}
21749 
21750 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21751   predicate(Matcher::vector_length(n) == 64 &&
21752             n->as_ShiftV()->is_var_shift() &&
21753             VM_Version::supports_avx512bw());
21754   match(Set dst ( LShiftVB src shift));
21755   match(Set dst ( RShiftVB src shift));
21756   match(Set dst (URShiftVB src shift));
21757   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21758   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21759   ins_encode %{
21760     assert(UseAVX > 2, "required");
21761 
21762     int opcode = this->ideal_Opcode();
21763     int vlen_enc = Assembler::AVX_256bit;
21764     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21765     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21766     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21767     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21768     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21769   %}
21770   ins_pipe( pipe_slow );
21771 %}
21772 
21773 // Short variable shift
21774 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21775   predicate(Matcher::vector_length(n) <= 8 &&
21776             n->as_ShiftV()->is_var_shift() &&
21777             !VM_Version::supports_avx512bw());
21778   match(Set dst ( LShiftVS src shift));
21779   match(Set dst ( RShiftVS src shift));
21780   match(Set dst (URShiftVS src shift));
21781   effect(TEMP dst, TEMP vtmp);
21782   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21783   ins_encode %{
21784     assert(UseAVX >= 2, "required");
21785 
21786     int opcode = this->ideal_Opcode();
21787     bool sign = (opcode != Op_URShiftVS);
21788     int vlen_enc = Assembler::AVX_256bit;
21789     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21790     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21791     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21792     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21793     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21794     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21795   %}
21796   ins_pipe( pipe_slow );
21797 %}
21798 
21799 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21800   predicate(Matcher::vector_length(n) == 16 &&
21801             n->as_ShiftV()->is_var_shift() &&
21802             !VM_Version::supports_avx512bw());
21803   match(Set dst ( LShiftVS src shift));
21804   match(Set dst ( RShiftVS src shift));
21805   match(Set dst (URShiftVS src shift));
21806   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21807   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21808   ins_encode %{
21809     assert(UseAVX >= 2, "required");
21810 
21811     int opcode = this->ideal_Opcode();
21812     bool sign = (opcode != Op_URShiftVS);
21813     int vlen_enc = Assembler::AVX_256bit;
21814     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21815     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21816     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21817     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21818     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21819 
21820     // Shift upper half, with result in dst using vtmp1 as TEMP
21821     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21822     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21823     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21824     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21825     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21826     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21827 
21828     // Merge lower and upper half result into dst
21829     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21830     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21831   %}
21832   ins_pipe( pipe_slow );
21833 %}
21834 
21835 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21836   predicate(n->as_ShiftV()->is_var_shift() &&
21837             VM_Version::supports_avx512bw());
21838   match(Set dst ( LShiftVS src shift));
21839   match(Set dst ( RShiftVS src shift));
21840   match(Set dst (URShiftVS src shift));
21841   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21842   ins_encode %{
21843     assert(UseAVX > 2, "required");
21844 
21845     int opcode = this->ideal_Opcode();
21846     int vlen_enc = vector_length_encoding(this);
21847     if (!VM_Version::supports_avx512vl()) {
21848       vlen_enc = Assembler::AVX_512bit;
21849     }
21850     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21851   %}
21852   ins_pipe( pipe_slow );
21853 %}
21854 
21855 //Integer variable shift
21856 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21857   predicate(n->as_ShiftV()->is_var_shift());
21858   match(Set dst ( LShiftVI src shift));
21859   match(Set dst ( RShiftVI src shift));
21860   match(Set dst (URShiftVI src shift));
21861   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21862   ins_encode %{
21863     assert(UseAVX >= 2, "required");
21864 
21865     int opcode = this->ideal_Opcode();
21866     int vlen_enc = vector_length_encoding(this);
21867     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21868   %}
21869   ins_pipe( pipe_slow );
21870 %}
21871 
21872 //Long variable shift
21873 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21874   predicate(n->as_ShiftV()->is_var_shift());
21875   match(Set dst ( LShiftVL src shift));
21876   match(Set dst (URShiftVL src shift));
21877   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21878   ins_encode %{
21879     assert(UseAVX >= 2, "required");
21880 
21881     int opcode = this->ideal_Opcode();
21882     int vlen_enc = vector_length_encoding(this);
21883     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21884   %}
21885   ins_pipe( pipe_slow );
21886 %}
21887 
21888 //Long variable right shift arithmetic
21889 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21890   predicate(Matcher::vector_length(n) <= 4 &&
21891             n->as_ShiftV()->is_var_shift() &&
21892             UseAVX == 2);
21893   match(Set dst (RShiftVL src shift));
21894   effect(TEMP dst, TEMP vtmp);
21895   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21896   ins_encode %{
21897     int opcode = this->ideal_Opcode();
21898     int vlen_enc = vector_length_encoding(this);
21899     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21900                  $vtmp$$XMMRegister);
21901   %}
21902   ins_pipe( pipe_slow );
21903 %}
21904 
21905 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21906   predicate(n->as_ShiftV()->is_var_shift() &&
21907             UseAVX > 2);
21908   match(Set dst (RShiftVL src shift));
21909   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21910   ins_encode %{
21911     int opcode = this->ideal_Opcode();
21912     int vlen_enc = vector_length_encoding(this);
21913     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21914   %}
21915   ins_pipe( pipe_slow );
21916 %}
21917 
21918 // --------------------------------- AND --------------------------------------
21919 
21920 instruct vand(vec dst, vec src) %{
21921   predicate(UseAVX == 0);
21922   match(Set dst (AndV dst src));
21923   format %{ "pand    $dst,$src\t! and vectors" %}
21924   ins_encode %{
21925     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21926   %}
21927   ins_pipe( pipe_slow );
21928 %}
21929 
21930 instruct vand_reg(vec dst, vec src1, vec src2) %{
21931   predicate(UseAVX > 0);
21932   match(Set dst (AndV src1 src2));
21933   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21934   ins_encode %{
21935     int vlen_enc = vector_length_encoding(this);
21936     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21937   %}
21938   ins_pipe( pipe_slow );
21939 %}
21940 
21941 instruct vand_mem(vec dst, vec src, memory mem) %{
21942   predicate((UseAVX > 0) &&
21943             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21944   match(Set dst (AndV src (LoadVector mem)));
21945   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21946   ins_encode %{
21947     int vlen_enc = vector_length_encoding(this);
21948     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21949   %}
21950   ins_pipe( pipe_slow );
21951 %}
21952 
21953 // --------------------------------- OR ---------------------------------------
21954 
21955 instruct vor(vec dst, vec src) %{
21956   predicate(UseAVX == 0);
21957   match(Set dst (OrV dst src));
21958   format %{ "por     $dst,$src\t! or vectors" %}
21959   ins_encode %{
21960     __ por($dst$$XMMRegister, $src$$XMMRegister);
21961   %}
21962   ins_pipe( pipe_slow );
21963 %}
21964 
21965 instruct vor_reg(vec dst, vec src1, vec src2) %{
21966   predicate(UseAVX > 0);
21967   match(Set dst (OrV src1 src2));
21968   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21969   ins_encode %{
21970     int vlen_enc = vector_length_encoding(this);
21971     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21972   %}
21973   ins_pipe( pipe_slow );
21974 %}
21975 
21976 instruct vor_mem(vec dst, vec src, memory mem) %{
21977   predicate((UseAVX > 0) &&
21978             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21979   match(Set dst (OrV src (LoadVector mem)));
21980   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21981   ins_encode %{
21982     int vlen_enc = vector_length_encoding(this);
21983     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21984   %}
21985   ins_pipe( pipe_slow );
21986 %}
21987 
21988 // --------------------------------- XOR --------------------------------------
21989 
21990 instruct vxor(vec dst, vec src) %{
21991   predicate(UseAVX == 0);
21992   match(Set dst (XorV dst src));
21993   format %{ "pxor    $dst,$src\t! xor vectors" %}
21994   ins_encode %{
21995     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21996   %}
21997   ins_pipe( pipe_slow );
21998 %}
21999 
22000 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22001   predicate(UseAVX > 0);
22002   match(Set dst (XorV src1 src2));
22003   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22004   ins_encode %{
22005     int vlen_enc = vector_length_encoding(this);
22006     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22007   %}
22008   ins_pipe( pipe_slow );
22009 %}
22010 
22011 instruct vxor_mem(vec dst, vec src, memory mem) %{
22012   predicate((UseAVX > 0) &&
22013             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22014   match(Set dst (XorV src (LoadVector mem)));
22015   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22016   ins_encode %{
22017     int vlen_enc = vector_length_encoding(this);
22018     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22019   %}
22020   ins_pipe( pipe_slow );
22021 %}
22022 
22023 // --------------------------------- VectorCast --------------------------------------
22024 
22025 instruct vcastBtoX(vec dst, vec src) %{
22026   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22027   match(Set dst (VectorCastB2X src));
22028   format %{ "vector_cast_b2x $dst,$src\t!" %}
22029   ins_encode %{
22030     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22031     int vlen_enc = vector_length_encoding(this);
22032     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033   %}
22034   ins_pipe( pipe_slow );
22035 %}
22036 
22037 instruct vcastBtoD(legVec dst, legVec src) %{
22038   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22039   match(Set dst (VectorCastB2X src));
22040   format %{ "vector_cast_b2x $dst,$src\t!" %}
22041   ins_encode %{
22042     int vlen_enc = vector_length_encoding(this);
22043     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22044   %}
22045   ins_pipe( pipe_slow );
22046 %}
22047 
22048 instruct castStoX(vec dst, vec src) %{
22049   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22050             Matcher::vector_length(n->in(1)) <= 8 && // src
22051             Matcher::vector_element_basic_type(n) == T_BYTE);
22052   match(Set dst (VectorCastS2X src));
22053   format %{ "vector_cast_s2x $dst,$src" %}
22054   ins_encode %{
22055     assert(UseAVX > 0, "required");
22056 
22057     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22058     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22059   %}
22060   ins_pipe( pipe_slow );
22061 %}
22062 
22063 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22064   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22065             Matcher::vector_length(n->in(1)) == 16 && // src
22066             Matcher::vector_element_basic_type(n) == T_BYTE);
22067   effect(TEMP dst, TEMP vtmp);
22068   match(Set dst (VectorCastS2X src));
22069   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22070   ins_encode %{
22071     assert(UseAVX > 0, "required");
22072 
22073     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22074     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22075     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22076     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22077   %}
22078   ins_pipe( pipe_slow );
22079 %}
22080 
22081 instruct vcastStoX_evex(vec dst, vec src) %{
22082   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22083             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22084   match(Set dst (VectorCastS2X src));
22085   format %{ "vector_cast_s2x $dst,$src\t!" %}
22086   ins_encode %{
22087     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22088     int src_vlen_enc = vector_length_encoding(this, $src);
22089     int vlen_enc = vector_length_encoding(this);
22090     switch (to_elem_bt) {
22091       case T_BYTE:
22092         if (!VM_Version::supports_avx512vl()) {
22093           vlen_enc = Assembler::AVX_512bit;
22094         }
22095         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22096         break;
22097       case T_INT:
22098         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22099         break;
22100       case T_FLOAT:
22101         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22102         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22103         break;
22104       case T_LONG:
22105         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22106         break;
22107       case T_DOUBLE: {
22108         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22109         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22110         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22111         break;
22112       }
22113       default:
22114         ShouldNotReachHere();
22115     }
22116   %}
22117   ins_pipe( pipe_slow );
22118 %}
22119 
22120 instruct castItoX(vec dst, vec src) %{
22121   predicate(UseAVX <= 2 &&
22122             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22123             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22124   match(Set dst (VectorCastI2X src));
22125   format %{ "vector_cast_i2x $dst,$src" %}
22126   ins_encode %{
22127     assert(UseAVX > 0, "required");
22128 
22129     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22130     int vlen_enc = vector_length_encoding(this, $src);
22131 
22132     if (to_elem_bt == T_BYTE) {
22133       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22134       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22135       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22136     } else {
22137       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22138       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22139       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22140     }
22141   %}
22142   ins_pipe( pipe_slow );
22143 %}
22144 
22145 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22146   predicate(UseAVX <= 2 &&
22147             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22148             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22149   match(Set dst (VectorCastI2X src));
22150   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22151   effect(TEMP dst, TEMP vtmp);
22152   ins_encode %{
22153     assert(UseAVX > 0, "required");
22154 
22155     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156     int vlen_enc = vector_length_encoding(this, $src);
22157 
22158     if (to_elem_bt == T_BYTE) {
22159       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22160       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22161       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22162       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163     } else {
22164       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22165       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22166       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22167       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22168     }
22169   %}
22170   ins_pipe( pipe_slow );
22171 %}
22172 
22173 instruct vcastItoX_evex(vec dst, vec src) %{
22174   predicate(UseAVX > 2 ||
22175             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22176   match(Set dst (VectorCastI2X src));
22177   format %{ "vector_cast_i2x $dst,$src\t!" %}
22178   ins_encode %{
22179     assert(UseAVX > 0, "required");
22180 
22181     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22182     int src_vlen_enc = vector_length_encoding(this, $src);
22183     int dst_vlen_enc = vector_length_encoding(this);
22184     switch (dst_elem_bt) {
22185       case T_BYTE:
22186         if (!VM_Version::supports_avx512vl()) {
22187           src_vlen_enc = Assembler::AVX_512bit;
22188         }
22189         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22190         break;
22191       case T_SHORT:
22192         if (!VM_Version::supports_avx512vl()) {
22193           src_vlen_enc = Assembler::AVX_512bit;
22194         }
22195         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22196         break;
22197       case T_FLOAT:
22198         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22199         break;
22200       case T_LONG:
22201         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22202         break;
22203       case T_DOUBLE:
22204         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22205         break;
22206       default:
22207         ShouldNotReachHere();
22208     }
22209   %}
22210   ins_pipe( pipe_slow );
22211 %}
22212 
22213 instruct vcastLtoBS(vec dst, vec src) %{
22214   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22215             UseAVX <= 2);
22216   match(Set dst (VectorCastL2X src));
22217   format %{ "vector_cast_l2x  $dst,$src" %}
22218   ins_encode %{
22219     assert(UseAVX > 0, "required");
22220 
22221     int vlen = Matcher::vector_length_in_bytes(this, $src);
22222     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22223     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22224                                                       : ExternalAddress(vector_int_to_short_mask());
22225     if (vlen <= 16) {
22226       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22227       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22228       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22229     } else {
22230       assert(vlen <= 32, "required");
22231       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22232       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22233       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22234       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22235     }
22236     if (to_elem_bt == T_BYTE) {
22237       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22238     }
22239   %}
22240   ins_pipe( pipe_slow );
22241 %}
22242 
22243 instruct vcastLtoX_evex(vec dst, vec src) %{
22244   predicate(UseAVX > 2 ||
22245             (Matcher::vector_element_basic_type(n) == T_INT ||
22246              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22247              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22248   match(Set dst (VectorCastL2X src));
22249   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22250   ins_encode %{
22251     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22252     int vlen = Matcher::vector_length_in_bytes(this, $src);
22253     int vlen_enc = vector_length_encoding(this, $src);
22254     switch (to_elem_bt) {
22255       case T_BYTE:
22256         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22257           vlen_enc = Assembler::AVX_512bit;
22258         }
22259         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22260         break;
22261       case T_SHORT:
22262         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22263           vlen_enc = Assembler::AVX_512bit;
22264         }
22265         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22266         break;
22267       case T_INT:
22268         if (vlen == 8) {
22269           if ($dst$$XMMRegister != $src$$XMMRegister) {
22270             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22271           }
22272         } else if (vlen == 16) {
22273           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22274         } else if (vlen == 32) {
22275           if (UseAVX > 2) {
22276             if (!VM_Version::supports_avx512vl()) {
22277               vlen_enc = Assembler::AVX_512bit;
22278             }
22279             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22280           } else {
22281             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22282             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22283           }
22284         } else { // vlen == 64
22285           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22286         }
22287         break;
22288       case T_FLOAT:
22289         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22290         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22291         break;
22292       case T_DOUBLE:
22293         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22294         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22295         break;
22296 
22297       default: assert(false, "%s", type2name(to_elem_bt));
22298     }
22299   %}
22300   ins_pipe( pipe_slow );
22301 %}
22302 
22303 instruct vcastFtoD_reg(vec dst, vec src) %{
22304   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22305   match(Set dst (VectorCastF2X src));
22306   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22307   ins_encode %{
22308     int vlen_enc = vector_length_encoding(this);
22309     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22310   %}
22311   ins_pipe( pipe_slow );
22312 %}
22313 
22314 
22315 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22316   predicate(!VM_Version::supports_avx10_2() &&
22317             !VM_Version::supports_avx512vl() &&
22318             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22319             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22320             is_integral_type(Matcher::vector_element_basic_type(n)));
22321   match(Set dst (VectorCastF2X src));
22322   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22323   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22324   ins_encode %{
22325     int vlen_enc = vector_length_encoding(this, $src);
22326     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22327     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22328     // 32 bit addresses for register indirect addressing mode since stub constants
22329     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22330     // However, targets are free to increase this limit, but having a large code cache size
22331     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22332     // cap we save a temporary register allocation which in limiting case can prevent
22333     // spilling in high register pressure blocks.
22334     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22335                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22336                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22337   %}
22338   ins_pipe( pipe_slow );
22339 %}
22340 
22341 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22342   predicate(!VM_Version::supports_avx10_2() &&
22343             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22344             is_integral_type(Matcher::vector_element_basic_type(n)));
22345   match(Set dst (VectorCastF2X src));
22346   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22347   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22348   ins_encode %{
22349     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22350     if (to_elem_bt == T_LONG) {
22351       int vlen_enc = vector_length_encoding(this);
22352       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22353                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22354                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22355     } else {
22356       int vlen_enc = vector_length_encoding(this, $src);
22357       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22358                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22359                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22360     }
22361   %}
22362   ins_pipe( pipe_slow );
22363 %}
22364 
22365 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22366   predicate(VM_Version::supports_avx10_2() &&
22367             is_integral_type(Matcher::vector_element_basic_type(n)));
22368   match(Set dst (VectorCastF2X src));
22369   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22370   ins_encode %{
22371     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22372     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22373     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22374   %}
22375   ins_pipe( pipe_slow );
22376 %}
22377 
22378 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22379   predicate(VM_Version::supports_avx10_2() &&
22380             is_integral_type(Matcher::vector_element_basic_type(n)));
22381   match(Set dst (VectorCastF2X (LoadVector src)));
22382   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22383   ins_encode %{
22384     int vlen = Matcher::vector_length(this);
22385     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22386     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22387     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22388   %}
22389   ins_pipe( pipe_slow );
22390 %}
22391 
22392 instruct vcastDtoF_reg(vec dst, vec src) %{
22393   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22394   match(Set dst (VectorCastD2X src));
22395   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22396   ins_encode %{
22397     int vlen_enc = vector_length_encoding(this, $src);
22398     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22399   %}
22400   ins_pipe( pipe_slow );
22401 %}
22402 
22403 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22404   predicate(!VM_Version::supports_avx10_2() &&
22405             !VM_Version::supports_avx512vl() &&
22406             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22407             is_integral_type(Matcher::vector_element_basic_type(n)));
22408   match(Set dst (VectorCastD2X src));
22409   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22410   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22411   ins_encode %{
22412     int vlen_enc = vector_length_encoding(this, $src);
22413     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22414     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22415                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22416                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22417   %}
22418   ins_pipe( pipe_slow );
22419 %}
22420 
22421 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22422   predicate(!VM_Version::supports_avx10_2() &&
22423             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22424             is_integral_type(Matcher::vector_element_basic_type(n)));
22425   match(Set dst (VectorCastD2X src));
22426   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22427   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22428   ins_encode %{
22429     int vlen_enc = vector_length_encoding(this, $src);
22430     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22431     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22432                               ExternalAddress(vector_float_signflip());
22433     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22434                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22435   %}
22436   ins_pipe( pipe_slow );
22437 %}
22438 
22439 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22440   predicate(VM_Version::supports_avx10_2() &&
22441             is_integral_type(Matcher::vector_element_basic_type(n)));
22442   match(Set dst (VectorCastD2X src));
22443   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22444   ins_encode %{
22445     int vlen_enc = vector_length_encoding(this, $src);
22446     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22447     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22448   %}
22449   ins_pipe( pipe_slow );
22450 %}
22451 
22452 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22453   predicate(VM_Version::supports_avx10_2() &&
22454             is_integral_type(Matcher::vector_element_basic_type(n)));
22455   match(Set dst (VectorCastD2X (LoadVector src)));
22456   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22457   ins_encode %{
22458     int vlen = Matcher::vector_length(this);
22459     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22460     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22461     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22462   %}
22463   ins_pipe( pipe_slow );
22464 %}
22465 
22466 instruct vucast(vec dst, vec src) %{
22467   match(Set dst (VectorUCastB2X src));
22468   match(Set dst (VectorUCastS2X src));
22469   match(Set dst (VectorUCastI2X src));
22470   format %{ "vector_ucast $dst,$src\t!" %}
22471   ins_encode %{
22472     assert(UseAVX > 0, "required");
22473 
22474     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22475     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22476     int vlen_enc = vector_length_encoding(this);
22477     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22478   %}
22479   ins_pipe( pipe_slow );
22480 %}
22481 
22482 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22483   predicate(!VM_Version::supports_avx512vl() &&
22484             Matcher::vector_length_in_bytes(n) < 64 &&
22485             Matcher::vector_element_basic_type(n) == T_INT);
22486   match(Set dst (RoundVF src));
22487   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22488   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22489   ins_encode %{
22490     int vlen_enc = vector_length_encoding(this);
22491     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22492     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22493                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22494                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22495   %}
22496   ins_pipe( pipe_slow );
22497 %}
22498 
22499 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22500   predicate((VM_Version::supports_avx512vl() ||
22501              Matcher::vector_length_in_bytes(n) == 64) &&
22502              Matcher::vector_element_basic_type(n) == T_INT);
22503   match(Set dst (RoundVF src));
22504   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22505   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22506   ins_encode %{
22507     int vlen_enc = vector_length_encoding(this);
22508     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22509     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22510                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22511                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22512   %}
22513   ins_pipe( pipe_slow );
22514 %}
22515 
22516 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22517   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22518   match(Set dst (RoundVD src));
22519   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22520   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22521   ins_encode %{
22522     int vlen_enc = vector_length_encoding(this);
22523     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22524     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22525                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22526                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22527   %}
22528   ins_pipe( pipe_slow );
22529 %}
22530 
22531 // --------------------------------- VectorMaskCmp --------------------------------------
22532 
22533 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22534   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22535             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22536             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22537             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22538   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22539   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22540   ins_encode %{
22541     int vlen_enc = vector_length_encoding(this, $src1);
22542     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22543     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22544       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22545     } else {
22546       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22547     }
22548   %}
22549   ins_pipe( pipe_slow );
22550 %}
22551 
22552 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22553   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22554             n->bottom_type()->isa_vectmask() == nullptr &&
22555             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22556   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22557   effect(TEMP ktmp);
22558   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22559   ins_encode %{
22560     int vlen_enc = Assembler::AVX_512bit;
22561     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22562     KRegister mask = k0; // The comparison itself is not being masked.
22563     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22564       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22565       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22566     } else {
22567       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22568       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22569     }
22570   %}
22571   ins_pipe( pipe_slow );
22572 %}
22573 
22574 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22575   predicate(n->bottom_type()->isa_vectmask() &&
22576             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22577   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22578   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22579   ins_encode %{
22580     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22581     int vlen_enc = vector_length_encoding(this, $src1);
22582     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22583     KRegister mask = k0; // The comparison itself is not being masked.
22584     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22585       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22586     } else {
22587       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22588     }
22589   %}
22590   ins_pipe( pipe_slow );
22591 %}
22592 
22593 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22594   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22595             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22596             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22597             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22598             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22599             (n->in(2)->get_int() == BoolTest::eq ||
22600              n->in(2)->get_int() == BoolTest::lt ||
22601              n->in(2)->get_int() == BoolTest::gt)); // cond
22602   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22603   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22604   ins_encode %{
22605     int vlen_enc = vector_length_encoding(this, $src1);
22606     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22607     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22608     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22609   %}
22610   ins_pipe( pipe_slow );
22611 %}
22612 
22613 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22614   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22615             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22616             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22617             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22618             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22619             (n->in(2)->get_int() == BoolTest::ne ||
22620              n->in(2)->get_int() == BoolTest::le ||
22621              n->in(2)->get_int() == BoolTest::ge)); // cond
22622   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22623   effect(TEMP dst, TEMP xtmp);
22624   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22625   ins_encode %{
22626     int vlen_enc = vector_length_encoding(this, $src1);
22627     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22628     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22629     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22630   %}
22631   ins_pipe( pipe_slow );
22632 %}
22633 
22634 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22635   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22636             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22637             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22638             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22639             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22640   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22641   effect(TEMP dst, TEMP xtmp);
22642   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22643   ins_encode %{
22644     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22645     int vlen_enc = vector_length_encoding(this, $src1);
22646     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22647     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22648 
22649     if (vlen_enc == Assembler::AVX_128bit) {
22650       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22651     } else {
22652       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22653     }
22654     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22655     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22656     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22657   %}
22658   ins_pipe( pipe_slow );
22659 %}
22660 
22661 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22662   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22663              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22664              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22665   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22666   effect(TEMP ktmp);
22667   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22668   ins_encode %{
22669     assert(UseAVX > 2, "required");
22670 
22671     int vlen_enc = vector_length_encoding(this, $src1);
22672     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22673     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22674     KRegister mask = k0; // The comparison itself is not being masked.
22675     bool merge = false;
22676     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22677 
22678     switch (src1_elem_bt) {
22679       case T_INT: {
22680         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22681         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22682         break;
22683       }
22684       case T_LONG: {
22685         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22686         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22687         break;
22688       }
22689       default: assert(false, "%s", type2name(src1_elem_bt));
22690     }
22691   %}
22692   ins_pipe( pipe_slow );
22693 %}
22694 
22695 
22696 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22697   predicate(n->bottom_type()->isa_vectmask() &&
22698             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22699   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22700   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22701   ins_encode %{
22702     assert(UseAVX > 2, "required");
22703     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22704 
22705     int vlen_enc = vector_length_encoding(this, $src1);
22706     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22707     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22708     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22709 
22710     // Comparison i
22711     switch (src1_elem_bt) {
22712       case T_BYTE: {
22713         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22714         break;
22715       }
22716       case T_SHORT: {
22717         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22718         break;
22719       }
22720       case T_INT: {
22721         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22722         break;
22723       }
22724       case T_LONG: {
22725         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22726         break;
22727       }
22728       default: assert(false, "%s", type2name(src1_elem_bt));
22729     }
22730   %}
22731   ins_pipe( pipe_slow );
22732 %}
22733 
22734 // Extract
22735 
22736 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22737   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22738   match(Set dst (ExtractI src idx));
22739   match(Set dst (ExtractS src idx));
22740   match(Set dst (ExtractB src idx));
22741   format %{ "extractI $dst,$src,$idx\t!" %}
22742   ins_encode %{
22743     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22744 
22745     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22746     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22747   %}
22748   ins_pipe( pipe_slow );
22749 %}
22750 
22751 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22752   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22753             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22754   match(Set dst (ExtractI src idx));
22755   match(Set dst (ExtractS src idx));
22756   match(Set dst (ExtractB src idx));
22757   effect(TEMP vtmp);
22758   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22759   ins_encode %{
22760     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22761 
22762     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22763     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22764     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22765   %}
22766   ins_pipe( pipe_slow );
22767 %}
22768 
22769 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22770   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22771   match(Set dst (ExtractL src idx));
22772   format %{ "extractL $dst,$src,$idx\t!" %}
22773   ins_encode %{
22774     assert(UseSSE >= 4, "required");
22775     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22776 
22777     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22778   %}
22779   ins_pipe( pipe_slow );
22780 %}
22781 
22782 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22783   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22784             Matcher::vector_length(n->in(1)) == 8);  // src
22785   match(Set dst (ExtractL src idx));
22786   effect(TEMP vtmp);
22787   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22788   ins_encode %{
22789     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22790 
22791     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22792     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22793   %}
22794   ins_pipe( pipe_slow );
22795 %}
22796 
22797 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22798   predicate(Matcher::vector_length(n->in(1)) <= 4);
22799   match(Set dst (ExtractF src idx));
22800   effect(TEMP dst, TEMP vtmp);
22801   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22802   ins_encode %{
22803     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22804 
22805     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22806   %}
22807   ins_pipe( pipe_slow );
22808 %}
22809 
22810 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22811   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22812             Matcher::vector_length(n->in(1)/*src*/) == 16);
22813   match(Set dst (ExtractF src idx));
22814   effect(TEMP vtmp);
22815   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22816   ins_encode %{
22817     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22818 
22819     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22820     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22821   %}
22822   ins_pipe( pipe_slow );
22823 %}
22824 
22825 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22826   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22827   match(Set dst (ExtractD src idx));
22828   format %{ "extractD $dst,$src,$idx\t!" %}
22829   ins_encode %{
22830     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22831 
22832     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22833   %}
22834   ins_pipe( pipe_slow );
22835 %}
22836 
22837 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22838   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22839             Matcher::vector_length(n->in(1)) == 8);  // src
22840   match(Set dst (ExtractD src idx));
22841   effect(TEMP vtmp);
22842   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22843   ins_encode %{
22844     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22845 
22846     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22847     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22848   %}
22849   ins_pipe( pipe_slow );
22850 %}
22851 
22852 // --------------------------------- Vector Blend --------------------------------------
22853 
22854 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22855   predicate(UseAVX == 0);
22856   match(Set dst (VectorBlend (Binary dst src) mask));
22857   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22858   effect(TEMP tmp);
22859   ins_encode %{
22860     assert(UseSSE >= 4, "required");
22861 
22862     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22863       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22864     }
22865     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22866   %}
22867   ins_pipe( pipe_slow );
22868 %}
22869 
22870 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22871   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22872             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22873             Matcher::vector_length_in_bytes(n) <= 32 &&
22874             is_integral_type(Matcher::vector_element_basic_type(n)));
22875   match(Set dst (VectorBlend (Binary src1 src2) mask));
22876   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22877   ins_encode %{
22878     int vlen_enc = vector_length_encoding(this);
22879     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22880   %}
22881   ins_pipe( pipe_slow );
22882 %}
22883 
22884 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22885   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22886             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22887             Matcher::vector_length_in_bytes(n) <= 32 &&
22888             !is_integral_type(Matcher::vector_element_basic_type(n)));
22889   match(Set dst (VectorBlend (Binary src1 src2) mask));
22890   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22891   ins_encode %{
22892     int vlen_enc = vector_length_encoding(this);
22893     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22894   %}
22895   ins_pipe( pipe_slow );
22896 %}
22897 
22898 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22899   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22900             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22901             Matcher::vector_length_in_bytes(n) <= 32);
22902   match(Set dst (VectorBlend (Binary src1 src2) mask));
22903   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22904   effect(TEMP vtmp, TEMP dst);
22905   ins_encode %{
22906     int vlen_enc = vector_length_encoding(this);
22907     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22908     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22909     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22910   %}
22911   ins_pipe( pipe_slow );
22912 %}
22913 
22914 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22915   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22916             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22917   match(Set dst (VectorBlend (Binary src1 src2) mask));
22918   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22919   effect(TEMP ktmp);
22920   ins_encode %{
22921      int vlen_enc = Assembler::AVX_512bit;
22922      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22923     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22924     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22925   %}
22926   ins_pipe( pipe_slow );
22927 %}
22928 
22929 
22930 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22931   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22932             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22933              VM_Version::supports_avx512bw()));
22934   match(Set dst (VectorBlend (Binary src1 src2) mask));
22935   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22936   ins_encode %{
22937     int vlen_enc = vector_length_encoding(this);
22938     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22939     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22940   %}
22941   ins_pipe( pipe_slow );
22942 %}
22943 
22944 // --------------------------------- ABS --------------------------------------
22945 // a = |a|
22946 instruct vabsB_reg(vec dst, vec src) %{
22947   match(Set dst (AbsVB  src));
22948   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22949   ins_encode %{
22950     uint vlen = Matcher::vector_length(this);
22951     if (vlen <= 16) {
22952       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22953     } else {
22954       int vlen_enc = vector_length_encoding(this);
22955       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956     }
22957   %}
22958   ins_pipe( pipe_slow );
22959 %}
22960 
22961 instruct vabsS_reg(vec dst, vec src) %{
22962   match(Set dst (AbsVS  src));
22963   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22964   ins_encode %{
22965     uint vlen = Matcher::vector_length(this);
22966     if (vlen <= 8) {
22967       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22968     } else {
22969       int vlen_enc = vector_length_encoding(this);
22970       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22971     }
22972   %}
22973   ins_pipe( pipe_slow );
22974 %}
22975 
22976 instruct vabsI_reg(vec dst, vec src) %{
22977   match(Set dst (AbsVI  src));
22978   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22979   ins_encode %{
22980     uint vlen = Matcher::vector_length(this);
22981     if (vlen <= 4) {
22982       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22983     } else {
22984       int vlen_enc = vector_length_encoding(this);
22985       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986     }
22987   %}
22988   ins_pipe( pipe_slow );
22989 %}
22990 
22991 instruct vabsL_reg(vec dst, vec src) %{
22992   match(Set dst (AbsVL  src));
22993   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22994   ins_encode %{
22995     assert(UseAVX > 2, "required");
22996     int vlen_enc = vector_length_encoding(this);
22997     if (!VM_Version::supports_avx512vl()) {
22998       vlen_enc = Assembler::AVX_512bit;
22999     }
23000     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23001   %}
23002   ins_pipe( pipe_slow );
23003 %}
23004 
23005 // --------------------------------- ABSNEG --------------------------------------
23006 
23007 instruct vabsnegF(vec dst, vec src) %{
23008   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23009   match(Set dst (AbsVF src));
23010   match(Set dst (NegVF src));
23011   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23012   ins_cost(150);
23013   ins_encode %{
23014     int opcode = this->ideal_Opcode();
23015     int vlen = Matcher::vector_length(this);
23016     if (vlen == 2) {
23017       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23018     } else {
23019       assert(vlen == 8 || vlen == 16, "required");
23020       int vlen_enc = vector_length_encoding(this);
23021       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23022     }
23023   %}
23024   ins_pipe( pipe_slow );
23025 %}
23026 
23027 instruct vabsneg4F(vec dst) %{
23028   predicate(Matcher::vector_length(n) == 4);
23029   match(Set dst (AbsVF dst));
23030   match(Set dst (NegVF dst));
23031   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23032   ins_cost(150);
23033   ins_encode %{
23034     int opcode = this->ideal_Opcode();
23035     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23036   %}
23037   ins_pipe( pipe_slow );
23038 %}
23039 
23040 instruct vabsnegD(vec dst, vec src) %{
23041   match(Set dst (AbsVD  src));
23042   match(Set dst (NegVD  src));
23043   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23044   ins_encode %{
23045     int opcode = this->ideal_Opcode();
23046     uint vlen = Matcher::vector_length(this);
23047     if (vlen == 2) {
23048       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23049     } else {
23050       int vlen_enc = vector_length_encoding(this);
23051       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23052     }
23053   %}
23054   ins_pipe( pipe_slow );
23055 %}
23056 
23057 //------------------------------------- VectorTest --------------------------------------------
23058 
23059 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23060   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23061   match(Set cr (VectorTest src1 src2));
23062   effect(TEMP vtmp);
23063   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23064   ins_encode %{
23065     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23066     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23067     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23068   %}
23069   ins_pipe( pipe_slow );
23070 %}
23071 
23072 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23073   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23074   match(Set cr (VectorTest src1 src2));
23075   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23076   ins_encode %{
23077     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23078     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23079     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23080   %}
23081   ins_pipe( pipe_slow );
23082 %}
23083 
23084 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23085   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23086              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23087             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23088   match(Set cr (VectorTest src1 src2));
23089   effect(TEMP tmp);
23090   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23091   ins_encode %{
23092     uint masklen = Matcher::vector_length(this, $src1);
23093     __ kmovwl($tmp$$Register, $src1$$KRegister);
23094     __ andl($tmp$$Register, (1 << masklen) - 1);
23095     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23096   %}
23097   ins_pipe( pipe_slow );
23098 %}
23099 
23100 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23101   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23102              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23103             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23104   match(Set cr (VectorTest src1 src2));
23105   effect(TEMP tmp);
23106   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23107   ins_encode %{
23108     uint masklen = Matcher::vector_length(this, $src1);
23109     __ kmovwl($tmp$$Register, $src1$$KRegister);
23110     __ andl($tmp$$Register, (1 << masklen) - 1);
23111   %}
23112   ins_pipe( pipe_slow );
23113 %}
23114 
23115 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23116   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23117             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23118   match(Set cr (VectorTest src1 src2));
23119   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23120   ins_encode %{
23121     uint masklen = Matcher::vector_length(this, $src1);
23122     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23123   %}
23124   ins_pipe( pipe_slow );
23125 %}
23126 
23127 //------------------------------------- LoadMask --------------------------------------------
23128 
23129 instruct loadMask(legVec dst, legVec src) %{
23130   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23131   match(Set dst (VectorLoadMask src));
23132   effect(TEMP dst);
23133   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23134   ins_encode %{
23135     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23136     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23137     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23138   %}
23139   ins_pipe( pipe_slow );
23140 %}
23141 
23142 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23143   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23144   match(Set dst (VectorLoadMask src));
23145   effect(TEMP xtmp);
23146   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23147   ins_encode %{
23148     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23149                         true, Assembler::AVX_512bit);
23150   %}
23151   ins_pipe( pipe_slow );
23152 %}
23153 
23154 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23155   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23156   match(Set dst (VectorLoadMask src));
23157   effect(TEMP xtmp);
23158   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23159   ins_encode %{
23160     int vlen_enc = vector_length_encoding(in(1));
23161     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23162                         false, vlen_enc);
23163   %}
23164   ins_pipe( pipe_slow );
23165 %}
23166 
23167 //------------------------------------- StoreMask --------------------------------------------
23168 
23169 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23170   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23171   match(Set dst (VectorStoreMask src size));
23172   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23173   ins_encode %{
23174     int vlen = Matcher::vector_length(this);
23175     if (vlen <= 16 && UseAVX <= 2) {
23176       assert(UseSSE >= 3, "required");
23177       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23178     } else {
23179       assert(UseAVX > 0, "required");
23180       int src_vlen_enc = vector_length_encoding(this, $src);
23181       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23182     }
23183   %}
23184   ins_pipe( pipe_slow );
23185 %}
23186 
23187 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23188   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23189   match(Set dst (VectorStoreMask src size));
23190   effect(TEMP_DEF dst, TEMP xtmp);
23191   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23192   ins_encode %{
23193     int vlen_enc = Assembler::AVX_128bit;
23194     int vlen = Matcher::vector_length(this);
23195     if (vlen <= 8) {
23196       assert(UseSSE >= 3, "required");
23197       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23198       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23199       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23200     } else {
23201       assert(UseAVX > 0, "required");
23202       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23203       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23204       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23205     }
23206   %}
23207   ins_pipe( pipe_slow );
23208 %}
23209 
23210 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23211   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23212   match(Set dst (VectorStoreMask src size));
23213   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23214   effect(TEMP_DEF dst, TEMP xtmp);
23215   ins_encode %{
23216     int vlen_enc = Assembler::AVX_128bit;
23217     int vlen = Matcher::vector_length(this);
23218     if (vlen <= 4) {
23219       assert(UseSSE >= 3, "required");
23220       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23221       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23222       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23223       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23224     } else {
23225       assert(UseAVX > 0, "required");
23226       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23227       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23228       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23229       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23230       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23231     }
23232   %}
23233   ins_pipe( pipe_slow );
23234 %}
23235 
23236 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23237   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23238   match(Set dst (VectorStoreMask src size));
23239   effect(TEMP_DEF dst, TEMP xtmp);
23240   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23241   ins_encode %{
23242     assert(UseSSE >= 3, "required");
23243     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23244     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23245     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23246     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23247     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23248   %}
23249   ins_pipe( pipe_slow );
23250 %}
23251 
23252 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23253   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23254   match(Set dst (VectorStoreMask src size));
23255   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23256   effect(TEMP_DEF dst, TEMP vtmp);
23257   ins_encode %{
23258     int vlen_enc = Assembler::AVX_128bit;
23259     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23260     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23261     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23262     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23263     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23264     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23265     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23266   %}
23267   ins_pipe( pipe_slow );
23268 %}
23269 
23270 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23271   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23272   match(Set dst (VectorStoreMask src size));
23273   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23274   ins_encode %{
23275     int src_vlen_enc = vector_length_encoding(this, $src);
23276     int dst_vlen_enc = vector_length_encoding(this);
23277     if (!VM_Version::supports_avx512vl()) {
23278       src_vlen_enc = Assembler::AVX_512bit;
23279     }
23280     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23281     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23282   %}
23283   ins_pipe( pipe_slow );
23284 %}
23285 
23286 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23287   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23288   match(Set dst (VectorStoreMask src size));
23289   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23290   ins_encode %{
23291     int src_vlen_enc = vector_length_encoding(this, $src);
23292     int dst_vlen_enc = vector_length_encoding(this);
23293     if (!VM_Version::supports_avx512vl()) {
23294       src_vlen_enc = Assembler::AVX_512bit;
23295     }
23296     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23297     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23298   %}
23299   ins_pipe( pipe_slow );
23300 %}
23301 
23302 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23303   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23304   match(Set dst (VectorStoreMask mask size));
23305   effect(TEMP_DEF dst);
23306   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23307   ins_encode %{
23308     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23309     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23310                  false, Assembler::AVX_512bit, noreg);
23311     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23312   %}
23313   ins_pipe( pipe_slow );
23314 %}
23315 
23316 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23317   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23318   match(Set dst (VectorStoreMask mask size));
23319   effect(TEMP_DEF dst);
23320   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23321   ins_encode %{
23322     int dst_vlen_enc = vector_length_encoding(this);
23323     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23324     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 instruct vmaskcast_evex(kReg dst) %{
23330   match(Set dst (VectorMaskCast dst));
23331   ins_cost(0);
23332   format %{ "vector_mask_cast $dst" %}
23333   ins_encode %{
23334     // empty
23335   %}
23336   ins_pipe(empty);
23337 %}
23338 
23339 instruct vmaskcast(vec dst) %{
23340   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23341   match(Set dst (VectorMaskCast dst));
23342   ins_cost(0);
23343   format %{ "vector_mask_cast $dst" %}
23344   ins_encode %{
23345     // empty
23346   %}
23347   ins_pipe(empty);
23348 %}
23349 
23350 instruct vmaskcast_avx(vec dst, vec src) %{
23351   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23352   match(Set dst (VectorMaskCast src));
23353   format %{ "vector_mask_cast $dst, $src" %}
23354   ins_encode %{
23355     int vlen = Matcher::vector_length(this);
23356     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23357     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23358     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23359   %}
23360   ins_pipe(pipe_slow);
23361 %}
23362 
23363 //-------------------------------- Load Iota Indices ----------------------------------
23364 
23365 instruct loadIotaIndices(vec dst, immI_0 src) %{
23366   match(Set dst (VectorLoadConst src));
23367   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23368   ins_encode %{
23369      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23370      BasicType bt = Matcher::vector_element_basic_type(this);
23371      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23372   %}
23373   ins_pipe( pipe_slow );
23374 %}
23375 
23376 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23377   match(Set dst (PopulateIndex src1 src2));
23378   effect(TEMP dst, TEMP vtmp);
23379   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23380   ins_encode %{
23381      assert($src2$$constant == 1, "required");
23382      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23383      int vlen_enc = vector_length_encoding(this);
23384      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23385      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23386      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23387      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23388   %}
23389   ins_pipe( pipe_slow );
23390 %}
23391 
23392 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23393   match(Set dst (PopulateIndex src1 src2));
23394   effect(TEMP dst, TEMP vtmp);
23395   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23396   ins_encode %{
23397      assert($src2$$constant == 1, "required");
23398      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23399      int vlen_enc = vector_length_encoding(this);
23400      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23401      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23402      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23403      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23404   %}
23405   ins_pipe( pipe_slow );
23406 %}
23407 
23408 //-------------------------------- Rearrange ----------------------------------
23409 
23410 // LoadShuffle/Rearrange for Byte
23411 instruct rearrangeB(vec dst, vec shuffle) %{
23412   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23413             Matcher::vector_length(n) < 32);
23414   match(Set dst (VectorRearrange dst shuffle));
23415   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23416   ins_encode %{
23417     assert(UseSSE >= 4, "required");
23418     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23419   %}
23420   ins_pipe( pipe_slow );
23421 %}
23422 
23423 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23424   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23425             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23426   match(Set dst (VectorRearrange src shuffle));
23427   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23428   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23429   ins_encode %{
23430     assert(UseAVX >= 2, "required");
23431     // Swap src into vtmp1
23432     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23433     // Shuffle swapped src to get entries from other 128 bit lane
23434     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23435     // Shuffle original src to get entries from self 128 bit lane
23436     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23437     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23438     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23439     // Perform the blend
23440     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23441   %}
23442   ins_pipe( pipe_slow );
23443 %}
23444 
23445 
23446 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23447   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23448             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23449   match(Set dst (VectorRearrange src shuffle));
23450   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23451   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23452   ins_encode %{
23453     int vlen_enc = vector_length_encoding(this);
23454     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23455                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23456                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23457   %}
23458   ins_pipe( pipe_slow );
23459 %}
23460 
23461 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23462   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23463             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23464   match(Set dst (VectorRearrange src shuffle));
23465   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23466   ins_encode %{
23467     int vlen_enc = vector_length_encoding(this);
23468     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23469   %}
23470   ins_pipe( pipe_slow );
23471 %}
23472 
23473 // LoadShuffle/Rearrange for Short
23474 
23475 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23476   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23477             !VM_Version::supports_avx512bw());
23478   match(Set dst (VectorLoadShuffle src));
23479   effect(TEMP dst, TEMP vtmp);
23480   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23481   ins_encode %{
23482     // Create a byte shuffle mask from short shuffle mask
23483     // only byte shuffle instruction available on these platforms
23484     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23485     if (UseAVX == 0) {
23486       assert(vlen_in_bytes <= 16, "required");
23487       // Multiply each shuffle by two to get byte index
23488       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23489       __ psllw($vtmp$$XMMRegister, 1);
23490 
23491       // Duplicate to create 2 copies of byte index
23492       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23493       __ psllw($dst$$XMMRegister, 8);
23494       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23495 
23496       // Add one to get alternate byte index
23497       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23498       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23499     } else {
23500       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23501       int vlen_enc = vector_length_encoding(this);
23502       // Multiply each shuffle by two to get byte index
23503       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23504 
23505       // Duplicate to create 2 copies of byte index
23506       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23507       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23508 
23509       // Add one to get alternate byte index
23510       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23511     }
23512   %}
23513   ins_pipe( pipe_slow );
23514 %}
23515 
23516 instruct rearrangeS(vec dst, vec shuffle) %{
23517   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23518             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23519   match(Set dst (VectorRearrange dst shuffle));
23520   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23521   ins_encode %{
23522     assert(UseSSE >= 4, "required");
23523     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23524   %}
23525   ins_pipe( pipe_slow );
23526 %}
23527 
23528 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23529   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23530             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23531   match(Set dst (VectorRearrange src shuffle));
23532   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23533   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23534   ins_encode %{
23535     assert(UseAVX >= 2, "required");
23536     // Swap src into vtmp1
23537     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23538     // Shuffle swapped src to get entries from other 128 bit lane
23539     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23540     // Shuffle original src to get entries from self 128 bit lane
23541     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23542     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23543     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23544     // Perform the blend
23545     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23546   %}
23547   ins_pipe( pipe_slow );
23548 %}
23549 
23550 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23551   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23552             VM_Version::supports_avx512bw());
23553   match(Set dst (VectorRearrange src shuffle));
23554   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23555   ins_encode %{
23556     int vlen_enc = vector_length_encoding(this);
23557     if (!VM_Version::supports_avx512vl()) {
23558       vlen_enc = Assembler::AVX_512bit;
23559     }
23560     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23561   %}
23562   ins_pipe( pipe_slow );
23563 %}
23564 
23565 // LoadShuffle/Rearrange for Integer and Float
23566 
23567 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23568   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23569             Matcher::vector_length(n) == 4 && UseAVX == 0);
23570   match(Set dst (VectorLoadShuffle src));
23571   effect(TEMP dst, TEMP vtmp);
23572   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23573   ins_encode %{
23574     assert(UseSSE >= 4, "required");
23575 
23576     // Create a byte shuffle mask from int shuffle mask
23577     // only byte shuffle instruction available on these platforms
23578 
23579     // Duplicate and multiply each shuffle by 4
23580     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23581     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23582     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23583     __ psllw($vtmp$$XMMRegister, 2);
23584 
23585     // Duplicate again to create 4 copies of byte index
23586     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23587     __ psllw($dst$$XMMRegister, 8);
23588     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23589 
23590     // Add 3,2,1,0 to get alternate byte index
23591     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23592     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23593   %}
23594   ins_pipe( pipe_slow );
23595 %}
23596 
23597 instruct rearrangeI(vec dst, vec shuffle) %{
23598   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23599             UseAVX == 0);
23600   match(Set dst (VectorRearrange dst shuffle));
23601   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23602   ins_encode %{
23603     assert(UseSSE >= 4, "required");
23604     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23605   %}
23606   ins_pipe( pipe_slow );
23607 %}
23608 
23609 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23610   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23611             UseAVX > 0);
23612   match(Set dst (VectorRearrange src shuffle));
23613   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23614   ins_encode %{
23615     int vlen_enc = vector_length_encoding(this);
23616     BasicType bt = Matcher::vector_element_basic_type(this);
23617     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23618   %}
23619   ins_pipe( pipe_slow );
23620 %}
23621 
23622 // LoadShuffle/Rearrange for Long and Double
23623 
23624 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23625   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23626             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23627   match(Set dst (VectorLoadShuffle src));
23628   effect(TEMP dst, TEMP vtmp);
23629   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23630   ins_encode %{
23631     assert(UseAVX >= 2, "required");
23632 
23633     int vlen_enc = vector_length_encoding(this);
23634     // Create a double word shuffle mask from long shuffle mask
23635     // only double word shuffle instruction available on these platforms
23636 
23637     // Multiply each shuffle by two to get double word index
23638     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23639 
23640     // Duplicate each double word shuffle
23641     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23642     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23643 
23644     // Add one to get alternate double word index
23645     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23646   %}
23647   ins_pipe( pipe_slow );
23648 %}
23649 
23650 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23651   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23652             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23653   match(Set dst (VectorRearrange src shuffle));
23654   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23655   ins_encode %{
23656     assert(UseAVX >= 2, "required");
23657 
23658     int vlen_enc = vector_length_encoding(this);
23659     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23660   %}
23661   ins_pipe( pipe_slow );
23662 %}
23663 
23664 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23665   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23666             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23667   match(Set dst (VectorRearrange src shuffle));
23668   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23669   ins_encode %{
23670     assert(UseAVX > 2, "required");
23671 
23672     int vlen_enc = vector_length_encoding(this);
23673     if (vlen_enc == Assembler::AVX_128bit) {
23674       vlen_enc = Assembler::AVX_256bit;
23675     }
23676     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23677   %}
23678   ins_pipe( pipe_slow );
23679 %}
23680 
23681 // --------------------------------- FMA --------------------------------------
23682 // a * b + c
23683 
23684 instruct vfmaF_reg(vec a, vec b, vec c) %{
23685   match(Set c (FmaVF  c (Binary a b)));
23686   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23687   ins_cost(150);
23688   ins_encode %{
23689     assert(UseFMA, "not enabled");
23690     int vlen_enc = vector_length_encoding(this);
23691     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23692   %}
23693   ins_pipe( pipe_slow );
23694 %}
23695 
23696 instruct vfmaF_mem(vec a, memory b, vec c) %{
23697   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23698   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23699   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23700   ins_cost(150);
23701   ins_encode %{
23702     assert(UseFMA, "not enabled");
23703     int vlen_enc = vector_length_encoding(this);
23704     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23705   %}
23706   ins_pipe( pipe_slow );
23707 %}
23708 
23709 instruct vfmaD_reg(vec a, vec b, vec c) %{
23710   match(Set c (FmaVD  c (Binary a b)));
23711   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23712   ins_cost(150);
23713   ins_encode %{
23714     assert(UseFMA, "not enabled");
23715     int vlen_enc = vector_length_encoding(this);
23716     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23717   %}
23718   ins_pipe( pipe_slow );
23719 %}
23720 
23721 instruct vfmaD_mem(vec a, memory b, vec c) %{
23722   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23723   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23724   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23725   ins_cost(150);
23726   ins_encode %{
23727     assert(UseFMA, "not enabled");
23728     int vlen_enc = vector_length_encoding(this);
23729     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23730   %}
23731   ins_pipe( pipe_slow );
23732 %}
23733 
23734 // --------------------------------- Vector Multiply Add --------------------------------------
23735 
23736 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23737   predicate(UseAVX == 0);
23738   match(Set dst (MulAddVS2VI dst src1));
23739   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23740   ins_encode %{
23741     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23742   %}
23743   ins_pipe( pipe_slow );
23744 %}
23745 
23746 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23747   predicate(UseAVX > 0);
23748   match(Set dst (MulAddVS2VI src1 src2));
23749   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23750   ins_encode %{
23751     int vlen_enc = vector_length_encoding(this);
23752     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23753   %}
23754   ins_pipe( pipe_slow );
23755 %}
23756 
23757 // --------------------------------- Vector Multiply Add Add ----------------------------------
23758 
23759 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23760   predicate(VM_Version::supports_avx512_vnni());
23761   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23762   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23763   ins_encode %{
23764     assert(UseAVX > 2, "required");
23765     int vlen_enc = vector_length_encoding(this);
23766     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23767   %}
23768   ins_pipe( pipe_slow );
23769   ins_cost(10);
23770 %}
23771 
23772 // --------------------------------- PopCount --------------------------------------
23773 
23774 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23775   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23776   match(Set dst (PopCountVI src));
23777   match(Set dst (PopCountVL src));
23778   format %{ "vector_popcount_integral $dst, $src" %}
23779   ins_encode %{
23780     int opcode = this->ideal_Opcode();
23781     int vlen_enc = vector_length_encoding(this, $src);
23782     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23783     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23784   %}
23785   ins_pipe( pipe_slow );
23786 %}
23787 
23788 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23789   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23790   match(Set dst (PopCountVI src mask));
23791   match(Set dst (PopCountVL src mask));
23792   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23793   ins_encode %{
23794     int vlen_enc = vector_length_encoding(this, $src);
23795     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23796     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23797     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23798   %}
23799   ins_pipe( pipe_slow );
23800 %}
23801 
23802 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23803   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23804   match(Set dst (PopCountVI src));
23805   match(Set dst (PopCountVL src));
23806   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23807   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23808   ins_encode %{
23809     int opcode = this->ideal_Opcode();
23810     int vlen_enc = vector_length_encoding(this, $src);
23811     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23812     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23813                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23814   %}
23815   ins_pipe( pipe_slow );
23816 %}
23817 
23818 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23819 
23820 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23821   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23822                                               Matcher::vector_length_in_bytes(n->in(1))));
23823   match(Set dst (CountTrailingZerosV src));
23824   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23825   ins_cost(400);
23826   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23827   ins_encode %{
23828     int vlen_enc = vector_length_encoding(this, $src);
23829     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23830     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23831                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23832   %}
23833   ins_pipe( pipe_slow );
23834 %}
23835 
23836 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23837   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23838             VM_Version::supports_avx512cd() &&
23839             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23840   match(Set dst (CountTrailingZerosV src));
23841   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23842   ins_cost(400);
23843   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23844   ins_encode %{
23845     int vlen_enc = vector_length_encoding(this, $src);
23846     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23847     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23848                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23849   %}
23850   ins_pipe( pipe_slow );
23851 %}
23852 
23853 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23854   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23855   match(Set dst (CountTrailingZerosV src));
23856   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23857   ins_cost(400);
23858   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23859   ins_encode %{
23860     int vlen_enc = vector_length_encoding(this, $src);
23861     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23862     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23863                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23864                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23865   %}
23866   ins_pipe( pipe_slow );
23867 %}
23868 
23869 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23870   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23871   match(Set dst (CountTrailingZerosV src));
23872   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23873   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23874   ins_encode %{
23875     int vlen_enc = vector_length_encoding(this, $src);
23876     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23877     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23878                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23879   %}
23880   ins_pipe( pipe_slow );
23881 %}
23882 
23883 
23884 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23885 
23886 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23887   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23888   effect(TEMP dst);
23889   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23890   ins_encode %{
23891     int vector_len = vector_length_encoding(this);
23892     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23893   %}
23894   ins_pipe( pipe_slow );
23895 %}
23896 
23897 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23898   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23899   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23900   effect(TEMP dst);
23901   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23902   ins_encode %{
23903     int vector_len = vector_length_encoding(this);
23904     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23905   %}
23906   ins_pipe( pipe_slow );
23907 %}
23908 
23909 // --------------------------------- Rotation Operations ----------------------------------
23910 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23911   match(Set dst (RotateLeftV src shift));
23912   match(Set dst (RotateRightV src shift));
23913   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23914   ins_encode %{
23915     int opcode      = this->ideal_Opcode();
23916     int vector_len  = vector_length_encoding(this);
23917     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23918     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23919   %}
23920   ins_pipe( pipe_slow );
23921 %}
23922 
23923 instruct vprorate(vec dst, vec src, vec shift) %{
23924   match(Set dst (RotateLeftV src shift));
23925   match(Set dst (RotateRightV src shift));
23926   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23927   ins_encode %{
23928     int opcode      = this->ideal_Opcode();
23929     int vector_len  = vector_length_encoding(this);
23930     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23931     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23932   %}
23933   ins_pipe( pipe_slow );
23934 %}
23935 
23936 // ---------------------------------- Masked Operations ------------------------------------
23937 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23938   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23939   match(Set dst (LoadVectorMasked mem mask));
23940   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23941   ins_encode %{
23942     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23943     int vlen_enc = vector_length_encoding(this);
23944     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23945   %}
23946   ins_pipe( pipe_slow );
23947 %}
23948 
23949 
23950 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23951   predicate(n->in(3)->bottom_type()->isa_vectmask());
23952   match(Set dst (LoadVectorMasked mem mask));
23953   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23954   ins_encode %{
23955     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23956     int vector_len = vector_length_encoding(this);
23957     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23958   %}
23959   ins_pipe( pipe_slow );
23960 %}
23961 
23962 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23963   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23964   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23965   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23966   ins_encode %{
23967     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23968     int vlen_enc = vector_length_encoding(src_node);
23969     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23970     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23971   %}
23972   ins_pipe( pipe_slow );
23973 %}
23974 
23975 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23976   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23977   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23978   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23979   ins_encode %{
23980     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23981     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23982     int vlen_enc = vector_length_encoding(src_node);
23983     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23984   %}
23985   ins_pipe( pipe_slow );
23986 %}
23987 
23988 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23989   match(Set addr (VerifyVectorAlignment addr mask));
23990   effect(KILL cr);
23991   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23992   ins_encode %{
23993     Label Lskip;
23994     // check if masked bits of addr are zero
23995     __ testq($addr$$Register, $mask$$constant);
23996     __ jccb(Assembler::equal, Lskip);
23997     __ stop("verify_vector_alignment found a misaligned vector memory access");
23998     __ bind(Lskip);
23999   %}
24000   ins_pipe(pipe_slow);
24001 %}
24002 
24003 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24004   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24005   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24006   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24007   ins_encode %{
24008     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24009     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24010 
24011     Label DONE;
24012     int vlen_enc = vector_length_encoding(this, $src1);
24013     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24014 
24015     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24016     __ mov64($dst$$Register, -1L);
24017     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24018     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24019     __ jccb(Assembler::carrySet, DONE);
24020     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24021     __ notq($dst$$Register);
24022     __ tzcntq($dst$$Register, $dst$$Register);
24023     __ bind(DONE);
24024   %}
24025   ins_pipe( pipe_slow );
24026 %}
24027 
24028 
24029 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24030   match(Set dst (VectorMaskGen len));
24031   effect(TEMP temp, KILL cr);
24032   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24033   ins_encode %{
24034     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24035   %}
24036   ins_pipe( pipe_slow );
24037 %}
24038 
24039 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24040   match(Set dst (VectorMaskGen len));
24041   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24042   effect(TEMP temp);
24043   ins_encode %{
24044     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24045     __ kmovql($dst$$KRegister, $temp$$Register);
24046   %}
24047   ins_pipe( pipe_slow );
24048 %}
24049 
24050 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24051   predicate(n->in(1)->bottom_type()->isa_vectmask());
24052   match(Set dst (VectorMaskToLong mask));
24053   effect(TEMP dst, KILL cr);
24054   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24055   ins_encode %{
24056     int opcode = this->ideal_Opcode();
24057     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24058     int mask_len = Matcher::vector_length(this, $mask);
24059     int mask_size = mask_len * type2aelembytes(mbt);
24060     int vlen_enc = vector_length_encoding(this, $mask);
24061     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24062                              $dst$$Register, mask_len, mask_size, vlen_enc);
24063   %}
24064   ins_pipe( pipe_slow );
24065 %}
24066 
24067 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24068   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24069   match(Set dst (VectorMaskToLong mask));
24070   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24071   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24072   ins_encode %{
24073     int opcode = this->ideal_Opcode();
24074     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24075     int mask_len = Matcher::vector_length(this, $mask);
24076     int vlen_enc = vector_length_encoding(this, $mask);
24077     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24078                              $dst$$Register, mask_len, mbt, vlen_enc);
24079   %}
24080   ins_pipe( pipe_slow );
24081 %}
24082 
24083 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24084   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24085   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24086   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24087   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24088   ins_encode %{
24089     int opcode = this->ideal_Opcode();
24090     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24091     int mask_len = Matcher::vector_length(this, $mask);
24092     int vlen_enc = vector_length_encoding(this, $mask);
24093     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24094                              $dst$$Register, mask_len, mbt, vlen_enc);
24095   %}
24096   ins_pipe( pipe_slow );
24097 %}
24098 
24099 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24100   predicate(n->in(1)->bottom_type()->isa_vectmask());
24101   match(Set dst (VectorMaskTrueCount mask));
24102   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24103   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24104   ins_encode %{
24105     int opcode = this->ideal_Opcode();
24106     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24107     int mask_len = Matcher::vector_length(this, $mask);
24108     int mask_size = mask_len * type2aelembytes(mbt);
24109     int vlen_enc = vector_length_encoding(this, $mask);
24110     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24111                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24112   %}
24113   ins_pipe( pipe_slow );
24114 %}
24115 
24116 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24117   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24118   match(Set dst (VectorMaskTrueCount mask));
24119   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24120   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24121   ins_encode %{
24122     int opcode = this->ideal_Opcode();
24123     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24124     int mask_len = Matcher::vector_length(this, $mask);
24125     int vlen_enc = vector_length_encoding(this, $mask);
24126     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24127                              $tmp$$Register, mask_len, mbt, vlen_enc);
24128   %}
24129   ins_pipe( pipe_slow );
24130 %}
24131 
24132 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24133   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24134   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24135   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24136   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24137   ins_encode %{
24138     int opcode = this->ideal_Opcode();
24139     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24140     int mask_len = Matcher::vector_length(this, $mask);
24141     int vlen_enc = vector_length_encoding(this, $mask);
24142     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24143                              $tmp$$Register, mask_len, mbt, vlen_enc);
24144   %}
24145   ins_pipe( pipe_slow );
24146 %}
24147 
24148 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24149   predicate(n->in(1)->bottom_type()->isa_vectmask());
24150   match(Set dst (VectorMaskFirstTrue mask));
24151   match(Set dst (VectorMaskLastTrue mask));
24152   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24153   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24154   ins_encode %{
24155     int opcode = this->ideal_Opcode();
24156     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24157     int mask_len = Matcher::vector_length(this, $mask);
24158     int mask_size = mask_len * type2aelembytes(mbt);
24159     int vlen_enc = vector_length_encoding(this, $mask);
24160     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24161                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24162   %}
24163   ins_pipe( pipe_slow );
24164 %}
24165 
24166 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24167   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24168   match(Set dst (VectorMaskFirstTrue mask));
24169   match(Set dst (VectorMaskLastTrue mask));
24170   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24171   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24172   ins_encode %{
24173     int opcode = this->ideal_Opcode();
24174     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24175     int mask_len = Matcher::vector_length(this, $mask);
24176     int vlen_enc = vector_length_encoding(this, $mask);
24177     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24178                              $tmp$$Register, mask_len, mbt, vlen_enc);
24179   %}
24180   ins_pipe( pipe_slow );
24181 %}
24182 
24183 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24184   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24185   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24186   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24187   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24188   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24189   ins_encode %{
24190     int opcode = this->ideal_Opcode();
24191     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24192     int mask_len = Matcher::vector_length(this, $mask);
24193     int vlen_enc = vector_length_encoding(this, $mask);
24194     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24195                              $tmp$$Register, mask_len, mbt, vlen_enc);
24196   %}
24197   ins_pipe( pipe_slow );
24198 %}
24199 
24200 // --------------------------------- Compress/Expand Operations ---------------------------
24201 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24202   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24203   match(Set dst (CompressV src mask));
24204   match(Set dst (ExpandV src mask));
24205   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24206   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24207   ins_encode %{
24208     int opcode = this->ideal_Opcode();
24209     int vlen_enc = vector_length_encoding(this);
24210     BasicType bt  = Matcher::vector_element_basic_type(this);
24211     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24212                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24213   %}
24214   ins_pipe( pipe_slow );
24215 %}
24216 
24217 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24218   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24219   match(Set dst (CompressV src mask));
24220   match(Set dst (ExpandV src mask));
24221   format %{ "vector_compress_expand $dst, $src, $mask" %}
24222   ins_encode %{
24223     int opcode = this->ideal_Opcode();
24224     int vector_len = vector_length_encoding(this);
24225     BasicType bt  = Matcher::vector_element_basic_type(this);
24226     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24227   %}
24228   ins_pipe( pipe_slow );
24229 %}
24230 
24231 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24232   match(Set dst (CompressM mask));
24233   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24234   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24235   ins_encode %{
24236     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24237     int mask_len = Matcher::vector_length(this);
24238     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24239   %}
24240   ins_pipe( pipe_slow );
24241 %}
24242 
24243 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24244 
24245 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24246   predicate(!VM_Version::supports_gfni());
24247   match(Set dst (ReverseV src));
24248   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24249   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24250   ins_encode %{
24251     int vec_enc = vector_length_encoding(this);
24252     BasicType bt = Matcher::vector_element_basic_type(this);
24253     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24254                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24255   %}
24256   ins_pipe( pipe_slow );
24257 %}
24258 
24259 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24260   predicate(VM_Version::supports_gfni());
24261   match(Set dst (ReverseV src));
24262   effect(TEMP dst, TEMP xtmp);
24263   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24264   ins_encode %{
24265     int vec_enc = vector_length_encoding(this);
24266     BasicType bt  = Matcher::vector_element_basic_type(this);
24267     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24268     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24269                                $xtmp$$XMMRegister);
24270   %}
24271   ins_pipe( pipe_slow );
24272 %}
24273 
24274 instruct vreverse_byte_reg(vec dst, vec src) %{
24275   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24276   match(Set dst (ReverseBytesV src));
24277   effect(TEMP dst);
24278   format %{ "vector_reverse_byte $dst, $src" %}
24279   ins_encode %{
24280     int vec_enc = vector_length_encoding(this);
24281     BasicType bt = Matcher::vector_element_basic_type(this);
24282     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24283   %}
24284   ins_pipe( pipe_slow );
24285 %}
24286 
24287 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24288   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24289   match(Set dst (ReverseBytesV src));
24290   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24291   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24292   ins_encode %{
24293     int vec_enc = vector_length_encoding(this);
24294     BasicType bt = Matcher::vector_element_basic_type(this);
24295     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24296                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24297   %}
24298   ins_pipe( pipe_slow );
24299 %}
24300 
24301 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24302 
24303 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24304   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24305                                               Matcher::vector_length_in_bytes(n->in(1))));
24306   match(Set dst (CountLeadingZerosV src));
24307   format %{ "vector_count_leading_zeros $dst, $src" %}
24308   ins_encode %{
24309      int vlen_enc = vector_length_encoding(this, $src);
24310      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24311      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24312                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24313   %}
24314   ins_pipe( pipe_slow );
24315 %}
24316 
24317 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24318   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24319                                               Matcher::vector_length_in_bytes(n->in(1))));
24320   match(Set dst (CountLeadingZerosV src mask));
24321   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24322   ins_encode %{
24323     int vlen_enc = vector_length_encoding(this, $src);
24324     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24325     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24326     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24327                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24328   %}
24329   ins_pipe( pipe_slow );
24330 %}
24331 
24332 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24333   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24334             VM_Version::supports_avx512cd() &&
24335             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24336   match(Set dst (CountLeadingZerosV src));
24337   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24338   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24339   ins_encode %{
24340     int vlen_enc = vector_length_encoding(this, $src);
24341     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24342     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24343                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24344   %}
24345   ins_pipe( pipe_slow );
24346 %}
24347 
24348 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24349   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24350   match(Set dst (CountLeadingZerosV src));
24351   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24352   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24353   ins_encode %{
24354     int vlen_enc = vector_length_encoding(this, $src);
24355     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24356     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24357                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24358                                        $rtmp$$Register, true, vlen_enc);
24359   %}
24360   ins_pipe( pipe_slow );
24361 %}
24362 
24363 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24364   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24365             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24366   match(Set dst (CountLeadingZerosV src));
24367   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24368   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24369   ins_encode %{
24370     int vlen_enc = vector_length_encoding(this, $src);
24371     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24372     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24373                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24374   %}
24375   ins_pipe( pipe_slow );
24376 %}
24377 
24378 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24379   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24380             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24381   match(Set dst (CountLeadingZerosV src));
24382   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24383   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24384   ins_encode %{
24385     int vlen_enc = vector_length_encoding(this, $src);
24386     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24387     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24388                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24389   %}
24390   ins_pipe( pipe_slow );
24391 %}
24392 
24393 // ---------------------------------- Vector Masked Operations ------------------------------------
24394 
24395 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24396   match(Set dst (AddVB (Binary dst src2) mask));
24397   match(Set dst (AddVS (Binary dst src2) mask));
24398   match(Set dst (AddVI (Binary dst src2) mask));
24399   match(Set dst (AddVL (Binary dst src2) mask));
24400   match(Set dst (AddVF (Binary dst src2) mask));
24401   match(Set dst (AddVD (Binary dst src2) mask));
24402   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24403   ins_encode %{
24404     int vlen_enc = vector_length_encoding(this);
24405     BasicType bt = Matcher::vector_element_basic_type(this);
24406     int opc = this->ideal_Opcode();
24407     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24409   %}
24410   ins_pipe( pipe_slow );
24411 %}
24412 
24413 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24414   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24415   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24416   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24417   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24418   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24419   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24420   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24421   ins_encode %{
24422     int vlen_enc = vector_length_encoding(this);
24423     BasicType bt = Matcher::vector_element_basic_type(this);
24424     int opc = this->ideal_Opcode();
24425     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24427   %}
24428   ins_pipe( pipe_slow );
24429 %}
24430 
24431 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24432   match(Set dst (XorV (Binary dst src2) mask));
24433   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24434   ins_encode %{
24435     int vlen_enc = vector_length_encoding(this);
24436     BasicType bt = Matcher::vector_element_basic_type(this);
24437     int opc = this->ideal_Opcode();
24438     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24439                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24440   %}
24441   ins_pipe( pipe_slow );
24442 %}
24443 
24444 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24445   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24446   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24447   ins_encode %{
24448     int vlen_enc = vector_length_encoding(this);
24449     BasicType bt = Matcher::vector_element_basic_type(this);
24450     int opc = this->ideal_Opcode();
24451     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24452                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24453   %}
24454   ins_pipe( pipe_slow );
24455 %}
24456 
24457 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24458   match(Set dst (OrV (Binary dst src2) mask));
24459   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24460   ins_encode %{
24461     int vlen_enc = vector_length_encoding(this);
24462     BasicType bt = Matcher::vector_element_basic_type(this);
24463     int opc = this->ideal_Opcode();
24464     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24465                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24466   %}
24467   ins_pipe( pipe_slow );
24468 %}
24469 
24470 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24471   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24472   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24473   ins_encode %{
24474     int vlen_enc = vector_length_encoding(this);
24475     BasicType bt = Matcher::vector_element_basic_type(this);
24476     int opc = this->ideal_Opcode();
24477     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24479   %}
24480   ins_pipe( pipe_slow );
24481 %}
24482 
24483 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24484   match(Set dst (AndV (Binary dst src2) mask));
24485   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24486   ins_encode %{
24487     int vlen_enc = vector_length_encoding(this);
24488     BasicType bt = Matcher::vector_element_basic_type(this);
24489     int opc = this->ideal_Opcode();
24490     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24492   %}
24493   ins_pipe( pipe_slow );
24494 %}
24495 
24496 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24497   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24498   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24499   ins_encode %{
24500     int vlen_enc = vector_length_encoding(this);
24501     BasicType bt = Matcher::vector_element_basic_type(this);
24502     int opc = this->ideal_Opcode();
24503     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24504                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24505   %}
24506   ins_pipe( pipe_slow );
24507 %}
24508 
24509 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24510   match(Set dst (SubVB (Binary dst src2) mask));
24511   match(Set dst (SubVS (Binary dst src2) mask));
24512   match(Set dst (SubVI (Binary dst src2) mask));
24513   match(Set dst (SubVL (Binary dst src2) mask));
24514   match(Set dst (SubVF (Binary dst src2) mask));
24515   match(Set dst (SubVD (Binary dst src2) mask));
24516   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24517   ins_encode %{
24518     int vlen_enc = vector_length_encoding(this);
24519     BasicType bt = Matcher::vector_element_basic_type(this);
24520     int opc = this->ideal_Opcode();
24521     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24523   %}
24524   ins_pipe( pipe_slow );
24525 %}
24526 
24527 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24528   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24529   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24530   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24531   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24532   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24533   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24534   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24535   ins_encode %{
24536     int vlen_enc = vector_length_encoding(this);
24537     BasicType bt = Matcher::vector_element_basic_type(this);
24538     int opc = this->ideal_Opcode();
24539     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24540                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24541   %}
24542   ins_pipe( pipe_slow );
24543 %}
24544 
24545 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24546   match(Set dst (MulVS (Binary dst src2) mask));
24547   match(Set dst (MulVI (Binary dst src2) mask));
24548   match(Set dst (MulVL (Binary dst src2) mask));
24549   match(Set dst (MulVF (Binary dst src2) mask));
24550   match(Set dst (MulVD (Binary dst src2) mask));
24551   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24552   ins_encode %{
24553     int vlen_enc = vector_length_encoding(this);
24554     BasicType bt = Matcher::vector_element_basic_type(this);
24555     int opc = this->ideal_Opcode();
24556     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24557                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24558   %}
24559   ins_pipe( pipe_slow );
24560 %}
24561 
24562 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24563   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24564   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24565   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24566   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24567   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24568   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24569   ins_encode %{
24570     int vlen_enc = vector_length_encoding(this);
24571     BasicType bt = Matcher::vector_element_basic_type(this);
24572     int opc = this->ideal_Opcode();
24573     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24574                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24575   %}
24576   ins_pipe( pipe_slow );
24577 %}
24578 
24579 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24580   match(Set dst (SqrtVF dst mask));
24581   match(Set dst (SqrtVD dst mask));
24582   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24583   ins_encode %{
24584     int vlen_enc = vector_length_encoding(this);
24585     BasicType bt = Matcher::vector_element_basic_type(this);
24586     int opc = this->ideal_Opcode();
24587     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24588                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24589   %}
24590   ins_pipe( pipe_slow );
24591 %}
24592 
24593 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24594   match(Set dst (DivVF (Binary dst src2) mask));
24595   match(Set dst (DivVD (Binary dst src2) mask));
24596   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24597   ins_encode %{
24598     int vlen_enc = vector_length_encoding(this);
24599     BasicType bt = Matcher::vector_element_basic_type(this);
24600     int opc = this->ideal_Opcode();
24601     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24603   %}
24604   ins_pipe( pipe_slow );
24605 %}
24606 
24607 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24608   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24609   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24610   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24611   ins_encode %{
24612     int vlen_enc = vector_length_encoding(this);
24613     BasicType bt = Matcher::vector_element_basic_type(this);
24614     int opc = this->ideal_Opcode();
24615     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24616                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24617   %}
24618   ins_pipe( pipe_slow );
24619 %}
24620 
24621 
24622 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24623   match(Set dst (RotateLeftV (Binary dst shift) mask));
24624   match(Set dst (RotateRightV (Binary dst shift) mask));
24625   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24626   ins_encode %{
24627     int vlen_enc = vector_length_encoding(this);
24628     BasicType bt = Matcher::vector_element_basic_type(this);
24629     int opc = this->ideal_Opcode();
24630     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24631                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24632   %}
24633   ins_pipe( pipe_slow );
24634 %}
24635 
24636 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24637   match(Set dst (RotateLeftV (Binary dst src2) mask));
24638   match(Set dst (RotateRightV (Binary dst src2) mask));
24639   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24640   ins_encode %{
24641     int vlen_enc = vector_length_encoding(this);
24642     BasicType bt = Matcher::vector_element_basic_type(this);
24643     int opc = this->ideal_Opcode();
24644     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24645                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24646   %}
24647   ins_pipe( pipe_slow );
24648 %}
24649 
24650 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24651   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24652   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24653   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24654   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24655   ins_encode %{
24656     int vlen_enc = vector_length_encoding(this);
24657     BasicType bt = Matcher::vector_element_basic_type(this);
24658     int opc = this->ideal_Opcode();
24659     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24660                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24661   %}
24662   ins_pipe( pipe_slow );
24663 %}
24664 
24665 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24666   predicate(!n->as_ShiftV()->is_var_shift());
24667   match(Set dst (LShiftVS (Binary dst src2) mask));
24668   match(Set dst (LShiftVI (Binary dst src2) mask));
24669   match(Set dst (LShiftVL (Binary dst src2) mask));
24670   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24671   ins_encode %{
24672     int vlen_enc = vector_length_encoding(this);
24673     BasicType bt = Matcher::vector_element_basic_type(this);
24674     int opc = this->ideal_Opcode();
24675     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24676                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24677   %}
24678   ins_pipe( pipe_slow );
24679 %}
24680 
24681 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24682   predicate(n->as_ShiftV()->is_var_shift());
24683   match(Set dst (LShiftVS (Binary dst src2) mask));
24684   match(Set dst (LShiftVI (Binary dst src2) mask));
24685   match(Set dst (LShiftVL (Binary dst src2) mask));
24686   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24687   ins_encode %{
24688     int vlen_enc = vector_length_encoding(this);
24689     BasicType bt = Matcher::vector_element_basic_type(this);
24690     int opc = this->ideal_Opcode();
24691     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24692                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24693   %}
24694   ins_pipe( pipe_slow );
24695 %}
24696 
24697 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24698   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24699   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24700   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24701   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24702   ins_encode %{
24703     int vlen_enc = vector_length_encoding(this);
24704     BasicType bt = Matcher::vector_element_basic_type(this);
24705     int opc = this->ideal_Opcode();
24706     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24707                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24708   %}
24709   ins_pipe( pipe_slow );
24710 %}
24711 
24712 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24713   predicate(!n->as_ShiftV()->is_var_shift());
24714   match(Set dst (RShiftVS (Binary dst src2) mask));
24715   match(Set dst (RShiftVI (Binary dst src2) mask));
24716   match(Set dst (RShiftVL (Binary dst src2) mask));
24717   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24718   ins_encode %{
24719     int vlen_enc = vector_length_encoding(this);
24720     BasicType bt = Matcher::vector_element_basic_type(this);
24721     int opc = this->ideal_Opcode();
24722     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24724   %}
24725   ins_pipe( pipe_slow );
24726 %}
24727 
24728 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24729   predicate(n->as_ShiftV()->is_var_shift());
24730   match(Set dst (RShiftVS (Binary dst src2) mask));
24731   match(Set dst (RShiftVI (Binary dst src2) mask));
24732   match(Set dst (RShiftVL (Binary dst src2) mask));
24733   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24734   ins_encode %{
24735     int vlen_enc = vector_length_encoding(this);
24736     BasicType bt = Matcher::vector_element_basic_type(this);
24737     int opc = this->ideal_Opcode();
24738     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24739                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24740   %}
24741   ins_pipe( pipe_slow );
24742 %}
24743 
24744 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24745   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24746   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24747   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24748   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24749   ins_encode %{
24750     int vlen_enc = vector_length_encoding(this);
24751     BasicType bt = Matcher::vector_element_basic_type(this);
24752     int opc = this->ideal_Opcode();
24753     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24754                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24755   %}
24756   ins_pipe( pipe_slow );
24757 %}
24758 
24759 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24760   predicate(!n->as_ShiftV()->is_var_shift());
24761   match(Set dst (URShiftVS (Binary dst src2) mask));
24762   match(Set dst (URShiftVI (Binary dst src2) mask));
24763   match(Set dst (URShiftVL (Binary dst src2) mask));
24764   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24765   ins_encode %{
24766     int vlen_enc = vector_length_encoding(this);
24767     BasicType bt = Matcher::vector_element_basic_type(this);
24768     int opc = this->ideal_Opcode();
24769     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24770                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24771   %}
24772   ins_pipe( pipe_slow );
24773 %}
24774 
24775 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24776   predicate(n->as_ShiftV()->is_var_shift());
24777   match(Set dst (URShiftVS (Binary dst src2) mask));
24778   match(Set dst (URShiftVI (Binary dst src2) mask));
24779   match(Set dst (URShiftVL (Binary dst src2) mask));
24780   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24781   ins_encode %{
24782     int vlen_enc = vector_length_encoding(this);
24783     BasicType bt = Matcher::vector_element_basic_type(this);
24784     int opc = this->ideal_Opcode();
24785     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24787   %}
24788   ins_pipe( pipe_slow );
24789 %}
24790 
24791 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24792   match(Set dst (MaxV (Binary dst src2) mask));
24793   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24794   ins_encode %{
24795     int vlen_enc = vector_length_encoding(this);
24796     BasicType bt = Matcher::vector_element_basic_type(this);
24797     int opc = this->ideal_Opcode();
24798     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24800   %}
24801   ins_pipe( pipe_slow );
24802 %}
24803 
24804 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24805   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24806   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24807   ins_encode %{
24808     int vlen_enc = vector_length_encoding(this);
24809     BasicType bt = Matcher::vector_element_basic_type(this);
24810     int opc = this->ideal_Opcode();
24811     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24812                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24813   %}
24814   ins_pipe( pipe_slow );
24815 %}
24816 
24817 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24818   match(Set dst (MinV (Binary dst src2) mask));
24819   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24820   ins_encode %{
24821     int vlen_enc = vector_length_encoding(this);
24822     BasicType bt = Matcher::vector_element_basic_type(this);
24823     int opc = this->ideal_Opcode();
24824     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24825                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24826   %}
24827   ins_pipe( pipe_slow );
24828 %}
24829 
24830 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24831   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24832   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24833   ins_encode %{
24834     int vlen_enc = vector_length_encoding(this);
24835     BasicType bt = Matcher::vector_element_basic_type(this);
24836     int opc = this->ideal_Opcode();
24837     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24838                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24839   %}
24840   ins_pipe( pipe_slow );
24841 %}
24842 
24843 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24844   match(Set dst (VectorRearrange (Binary dst src2) mask));
24845   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24846   ins_encode %{
24847     int vlen_enc = vector_length_encoding(this);
24848     BasicType bt = Matcher::vector_element_basic_type(this);
24849     int opc = this->ideal_Opcode();
24850     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24852   %}
24853   ins_pipe( pipe_slow );
24854 %}
24855 
24856 instruct vabs_masked(vec dst, kReg mask) %{
24857   match(Set dst (AbsVB dst mask));
24858   match(Set dst (AbsVS dst mask));
24859   match(Set dst (AbsVI dst mask));
24860   match(Set dst (AbsVL dst mask));
24861   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24862   ins_encode %{
24863     int vlen_enc = vector_length_encoding(this);
24864     BasicType bt = Matcher::vector_element_basic_type(this);
24865     int opc = this->ideal_Opcode();
24866     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24868   %}
24869   ins_pipe( pipe_slow );
24870 %}
24871 
24872 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24873   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24874   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24875   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24876   ins_encode %{
24877     assert(UseFMA, "Needs FMA instructions support.");
24878     int vlen_enc = vector_length_encoding(this);
24879     BasicType bt = Matcher::vector_element_basic_type(this);
24880     int opc = this->ideal_Opcode();
24881     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24882                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24883   %}
24884   ins_pipe( pipe_slow );
24885 %}
24886 
24887 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24888   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24889   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24890   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24891   ins_encode %{
24892     assert(UseFMA, "Needs FMA instructions support.");
24893     int vlen_enc = vector_length_encoding(this);
24894     BasicType bt = Matcher::vector_element_basic_type(this);
24895     int opc = this->ideal_Opcode();
24896     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24897                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24898   %}
24899   ins_pipe( pipe_slow );
24900 %}
24901 
24902 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24903   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24904   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24905   ins_encode %{
24906     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24907     int vlen_enc = vector_length_encoding(this, $src1);
24908     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24909 
24910     // Comparison i
24911     switch (src1_elem_bt) {
24912       case T_BYTE: {
24913         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24914         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24915         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24916         break;
24917       }
24918       case T_SHORT: {
24919         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24920         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24921         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24922         break;
24923       }
24924       case T_INT: {
24925         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24926         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24927         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24928         break;
24929       }
24930       case T_LONG: {
24931         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24932         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24933         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24934         break;
24935       }
24936       case T_FLOAT: {
24937         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24938         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24939         break;
24940       }
24941       case T_DOUBLE: {
24942         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24943         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24944         break;
24945       }
24946       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24947     }
24948   %}
24949   ins_pipe( pipe_slow );
24950 %}
24951 
24952 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24953   predicate(Matcher::vector_length(n) <= 32);
24954   match(Set dst (MaskAll src));
24955   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24956   ins_encode %{
24957     int mask_len = Matcher::vector_length(this);
24958     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24959   %}
24960   ins_pipe( pipe_slow );
24961 %}
24962 
24963 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24964   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24965   match(Set dst (XorVMask src (MaskAll cnt)));
24966   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24967   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24968   ins_encode %{
24969     uint masklen = Matcher::vector_length(this);
24970     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24971   %}
24972   ins_pipe( pipe_slow );
24973 %}
24974 
24975 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24976   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24977             (Matcher::vector_length(n) == 16) ||
24978             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24979   match(Set dst (XorVMask src (MaskAll cnt)));
24980   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24981   ins_encode %{
24982     uint masklen = Matcher::vector_length(this);
24983     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24984   %}
24985   ins_pipe( pipe_slow );
24986 %}
24987 
24988 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24989   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24990   match(Set dst (VectorLongToMask src));
24991   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24992   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24993   ins_encode %{
24994     int mask_len = Matcher::vector_length(this);
24995     int vec_enc  = vector_length_encoding(mask_len);
24996     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24997                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24998   %}
24999   ins_pipe( pipe_slow );
25000 %}
25001 
25002 
25003 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25004   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25005   match(Set dst (VectorLongToMask src));
25006   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25007   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25008   ins_encode %{
25009     int mask_len = Matcher::vector_length(this);
25010     assert(mask_len <= 32, "invalid mask length");
25011     int vec_enc  = vector_length_encoding(mask_len);
25012     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25013                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25014   %}
25015   ins_pipe( pipe_slow );
25016 %}
25017 
25018 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25019   predicate(n->bottom_type()->isa_vectmask());
25020   match(Set dst (VectorLongToMask src));
25021   format %{ "long_to_mask_evex $dst, $src\t!" %}
25022   ins_encode %{
25023     __ kmov($dst$$KRegister, $src$$Register);
25024   %}
25025   ins_pipe( pipe_slow );
25026 %}
25027 
25028 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25029   match(Set dst (AndVMask src1 src2));
25030   match(Set dst (OrVMask src1 src2));
25031   match(Set dst (XorVMask src1 src2));
25032   effect(TEMP kscratch);
25033   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25034   ins_encode %{
25035     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25036     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25037     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25038     uint masklen = Matcher::vector_length(this);
25039     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25040     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25041   %}
25042   ins_pipe( pipe_slow );
25043 %}
25044 
25045 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25046   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25047   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25048   ins_encode %{
25049     int vlen_enc = vector_length_encoding(this);
25050     BasicType bt = Matcher::vector_element_basic_type(this);
25051     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25052                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25053   %}
25054   ins_pipe( pipe_slow );
25055 %}
25056 
25057 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25058   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25059   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25060   ins_encode %{
25061     int vlen_enc = vector_length_encoding(this);
25062     BasicType bt = Matcher::vector_element_basic_type(this);
25063     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25064                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25065   %}
25066   ins_pipe( pipe_slow );
25067 %}
25068 
25069 instruct castMM(kReg dst)
25070 %{
25071   match(Set dst (CastVV dst));
25072 
25073   size(0);
25074   format %{ "# castVV of $dst" %}
25075   ins_encode(/* empty encoding */);
25076   ins_cost(0);
25077   ins_pipe(empty);
25078 %}
25079 
25080 instruct castVV(vec dst)
25081 %{
25082   match(Set dst (CastVV dst));
25083 
25084   size(0);
25085   format %{ "# castVV of $dst" %}
25086   ins_encode(/* empty encoding */);
25087   ins_cost(0);
25088   ins_pipe(empty);
25089 %}
25090 
25091 instruct castVVLeg(legVec dst)
25092 %{
25093   match(Set dst (CastVV dst));
25094 
25095   size(0);
25096   format %{ "# castVV of $dst" %}
25097   ins_encode(/* empty encoding */);
25098   ins_cost(0);
25099   ins_pipe(empty);
25100 %}
25101 
25102 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25103 %{
25104   match(Set dst (IsInfiniteF src));
25105   effect(TEMP ktmp, KILL cr);
25106   format %{ "float_class_check $dst, $src" %}
25107   ins_encode %{
25108     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25109     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25110   %}
25111   ins_pipe(pipe_slow);
25112 %}
25113 
25114 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25115 %{
25116   match(Set dst (IsInfiniteD src));
25117   effect(TEMP ktmp, KILL cr);
25118   format %{ "double_class_check $dst, $src" %}
25119   ins_encode %{
25120     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25121     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25122   %}
25123   ins_pipe(pipe_slow);
25124 %}
25125 
25126 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25127 %{
25128   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25129             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25130   match(Set dst (SaturatingAddV src1 src2));
25131   match(Set dst (SaturatingSubV src1 src2));
25132   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25133   ins_encode %{
25134     int vlen_enc = vector_length_encoding(this);
25135     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25136     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25137                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25138   %}
25139   ins_pipe(pipe_slow);
25140 %}
25141 
25142 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25143 %{
25144   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25145             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25146   match(Set dst (SaturatingAddV src1 src2));
25147   match(Set dst (SaturatingSubV src1 src2));
25148   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25149   ins_encode %{
25150     int vlen_enc = vector_length_encoding(this);
25151     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25152     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25153                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25154   %}
25155   ins_pipe(pipe_slow);
25156 %}
25157 
25158 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25159 %{
25160   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25161             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25162             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25163   match(Set dst (SaturatingAddV src1 src2));
25164   match(Set dst (SaturatingSubV src1 src2));
25165   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25166   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25167   ins_encode %{
25168     int vlen_enc = vector_length_encoding(this);
25169     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25170     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25171                                         $src1$$XMMRegister, $src2$$XMMRegister,
25172                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25173                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25174   %}
25175   ins_pipe(pipe_slow);
25176 %}
25177 
25178 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25179 %{
25180   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25181             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25182             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25183   match(Set dst (SaturatingAddV src1 src2));
25184   match(Set dst (SaturatingSubV src1 src2));
25185   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25186   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25187   ins_encode %{
25188     int vlen_enc = vector_length_encoding(this);
25189     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25190     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25191                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25192                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25193   %}
25194   ins_pipe(pipe_slow);
25195 %}
25196 
25197 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25198 %{
25199   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25200             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25201             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25202   match(Set dst (SaturatingAddV src1 src2));
25203   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25204   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25205   ins_encode %{
25206     int vlen_enc = vector_length_encoding(this);
25207     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25208     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25209                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25210   %}
25211   ins_pipe(pipe_slow);
25212 %}
25213 
25214 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25215 %{
25216   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25217             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25218             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25219   match(Set dst (SaturatingAddV src1 src2));
25220   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25221   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25222   ins_encode %{
25223     int vlen_enc = vector_length_encoding(this);
25224     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25226                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25227   %}
25228   ins_pipe(pipe_slow);
25229 %}
25230 
25231 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25232 %{
25233   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25234             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25235             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25236   match(Set dst (SaturatingSubV src1 src2));
25237   effect(TEMP ktmp);
25238   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25239   ins_encode %{
25240     int vlen_enc = vector_length_encoding(this);
25241     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25242     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25243                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25244   %}
25245   ins_pipe(pipe_slow);
25246 %}
25247 
25248 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25249 %{
25250   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25251             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25252             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25253   match(Set dst (SaturatingSubV src1 src2));
25254   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25255   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25256   ins_encode %{
25257     int vlen_enc = vector_length_encoding(this);
25258     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25259     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25260                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25261   %}
25262   ins_pipe(pipe_slow);
25263 %}
25264 
25265 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25266 %{
25267   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25269   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25270   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25271   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25272   ins_encode %{
25273     int vlen_enc = vector_length_encoding(this);
25274     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25276                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25277   %}
25278   ins_pipe(pipe_slow);
25279 %}
25280 
25281 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25282 %{
25283   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25284             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25285   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25286   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25287   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25288   ins_encode %{
25289     int vlen_enc = vector_length_encoding(this);
25290     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25291     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25292                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25293   %}
25294   ins_pipe(pipe_slow);
25295 %}
25296 
25297 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25298   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25299             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25300   match(Set dst (SaturatingAddV (Binary dst src) mask));
25301   match(Set dst (SaturatingSubV (Binary dst src) mask));
25302   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25303   ins_encode %{
25304     int vlen_enc = vector_length_encoding(this);
25305     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25306     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25307                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25308   %}
25309   ins_pipe( pipe_slow );
25310 %}
25311 
25312 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25313   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25314             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25315   match(Set dst (SaturatingAddV (Binary dst src) mask));
25316   match(Set dst (SaturatingSubV (Binary dst src) mask));
25317   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25318   ins_encode %{
25319     int vlen_enc = vector_length_encoding(this);
25320     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25321     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25322                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25323   %}
25324   ins_pipe( pipe_slow );
25325 %}
25326 
25327 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25328   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25329             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25330   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25331   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25332   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25333   ins_encode %{
25334     int vlen_enc = vector_length_encoding(this);
25335     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25336     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25337                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25338   %}
25339   ins_pipe( pipe_slow );
25340 %}
25341 
25342 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25343   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25344             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25345   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25346   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25347   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25348   ins_encode %{
25349     int vlen_enc = vector_length_encoding(this);
25350     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25351     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25352                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25353   %}
25354   ins_pipe( pipe_slow );
25355 %}
25356 
25357 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25358 %{
25359   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25360   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25361   ins_encode %{
25362     int vlen_enc = vector_length_encoding(this);
25363     BasicType bt = Matcher::vector_element_basic_type(this);
25364     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25365   %}
25366   ins_pipe(pipe_slow);
25367 %}
25368 
25369 instruct reinterpretS2HF(regF dst, rRegI src)
25370 %{
25371   match(Set dst (ReinterpretS2HF src));
25372   format %{ "vmovw $dst, $src" %}
25373   ins_encode %{
25374     __ vmovw($dst$$XMMRegister, $src$$Register);
25375   %}
25376   ins_pipe(pipe_slow);
25377 %}
25378 
25379 instruct reinterpretHF2S(rRegI dst, regF src)
25380 %{
25381   match(Set dst (ReinterpretHF2S src));
25382   format %{ "vmovw $dst, $src" %}
25383   ins_encode %{
25384     __ vmovw($dst$$Register, $src$$XMMRegister);
25385   %}
25386   ins_pipe(pipe_slow);
25387 %}
25388 
25389 instruct convF2HFAndS2HF(regF dst, regF src)
25390 %{
25391   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25392   format %{ "convF2HFAndS2HF $dst, $src" %}
25393   ins_encode %{
25394     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25395   %}
25396   ins_pipe(pipe_slow);
25397 %}
25398 
25399 instruct convHF2SAndHF2F(regF dst, regF src)
25400 %{
25401   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25402   format %{ "convHF2SAndHF2F $dst, $src" %}
25403   ins_encode %{
25404     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25405   %}
25406   ins_pipe(pipe_slow);
25407 %}
25408 
25409 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25410 %{
25411   match(Set dst (SqrtHF src));
25412   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25413   ins_encode %{
25414     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25415   %}
25416   ins_pipe(pipe_slow);
25417 %}
25418 
25419 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25420 %{
25421   match(Set dst (AddHF src1 src2));
25422   match(Set dst (DivHF src1 src2));
25423   match(Set dst (MulHF src1 src2));
25424   match(Set dst (SubHF src1 src2));
25425   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25426   ins_encode %{
25427     int opcode = this->ideal_Opcode();
25428     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25429   %}
25430   ins_pipe(pipe_slow);
25431 %}
25432 
25433 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25434 %{
25435   predicate(VM_Version::supports_avx10_2());
25436   match(Set dst (MaxHF src1 src2));
25437   match(Set dst (MinHF src1 src2));
25438   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25439   ins_encode %{
25440     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25441     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25442   %}
25443   ins_pipe( pipe_slow );
25444 %}
25445 
25446 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25447 %{
25448   predicate(!VM_Version::supports_avx10_2());
25449   match(Set dst (MaxHF src1 src2));
25450   match(Set dst (MinHF src1 src2));
25451   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25452   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25453   ins_encode %{
25454     int opcode = this->ideal_Opcode();
25455     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25456                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25457   %}
25458   ins_pipe( pipe_slow );
25459 %}
25460 
25461 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25462 %{
25463   match(Set dst (FmaHF  src2 (Binary dst src1)));
25464   effect(DEF dst);
25465   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25466   ins_encode %{
25467     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25468   %}
25469   ins_pipe( pipe_slow );
25470 %}
25471 
25472 
25473 instruct vector_sqrt_HF_reg(vec dst, vec src)
25474 %{
25475   match(Set dst (SqrtVHF src));
25476   format %{ "vector_sqrt_fp16 $dst, $src" %}
25477   ins_encode %{
25478     int vlen_enc = vector_length_encoding(this);
25479     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25480   %}
25481   ins_pipe(pipe_slow);
25482 %}
25483 
25484 instruct vector_sqrt_HF_mem(vec dst, memory src)
25485 %{
25486   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25487   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25488   ins_encode %{
25489     int vlen_enc = vector_length_encoding(this);
25490     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25491   %}
25492   ins_pipe(pipe_slow);
25493 %}
25494 
25495 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25496 %{
25497   match(Set dst (AddVHF src1 src2));
25498   match(Set dst (DivVHF src1 src2));
25499   match(Set dst (MulVHF src1 src2));
25500   match(Set dst (SubVHF src1 src2));
25501   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25502   ins_encode %{
25503     int vlen_enc = vector_length_encoding(this);
25504     int opcode = this->ideal_Opcode();
25505     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25506   %}
25507   ins_pipe(pipe_slow);
25508 %}
25509 
25510 
25511 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25512 %{
25513   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25514   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25515   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25516   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25517   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25518   ins_encode %{
25519     int vlen_enc = vector_length_encoding(this);
25520     int opcode = this->ideal_Opcode();
25521     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25522   %}
25523   ins_pipe(pipe_slow);
25524 %}
25525 
25526 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25527 %{
25528   match(Set dst (FmaVHF src2 (Binary dst src1)));
25529   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25530   ins_encode %{
25531     int vlen_enc = vector_length_encoding(this);
25532     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25533   %}
25534   ins_pipe( pipe_slow );
25535 %}
25536 
25537 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25538 %{
25539   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25540   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25541   ins_encode %{
25542     int vlen_enc = vector_length_encoding(this);
25543     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25544   %}
25545   ins_pipe( pipe_slow );
25546 %}
25547 
25548 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25549 %{
25550   predicate(VM_Version::supports_avx10_2());
25551   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25552   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25553   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25554   ins_encode %{
25555     int vlen_enc = vector_length_encoding(this);
25556     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25557     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25558   %}
25559   ins_pipe( pipe_slow );
25560 %}
25561 
25562 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25563 %{
25564   predicate(VM_Version::supports_avx10_2());
25565   match(Set dst (MinVHF src1 src2));
25566   match(Set dst (MaxVHF src1 src2));
25567   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25568   ins_encode %{
25569     int vlen_enc = vector_length_encoding(this);
25570     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25571     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25572   %}
25573   ins_pipe( pipe_slow );
25574 %}
25575 
25576 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25577 %{
25578   predicate(!VM_Version::supports_avx10_2());
25579   match(Set dst (MinVHF src1 src2));
25580   match(Set dst (MaxVHF src1 src2));
25581   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25582   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25583   ins_encode %{
25584     int vlen_enc = vector_length_encoding(this);
25585     int opcode = this->ideal_Opcode();
25586     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25587                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25588   %}
25589   ins_pipe( pipe_slow );
25590 %}
25591 
25592 //----------PEEPHOLE RULES-----------------------------------------------------
25593 // These must follow all instruction definitions as they use the names
25594 // defined in the instructions definitions.
25595 //
25596 // peeppredicate ( rule_predicate );
25597 // // the predicate unless which the peephole rule will be ignored
25598 //
25599 // peepmatch ( root_instr_name [preceding_instruction]* );
25600 //
25601 // peepprocedure ( procedure_name );
25602 // // provide a procedure name to perform the optimization, the procedure should
25603 // // reside in the architecture dependent peephole file, the method has the
25604 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25605 // // with the arguments being the basic block, the current node index inside the
25606 // // block, the register allocator, the functions upon invoked return a new node
25607 // // defined in peepreplace, and the rules of the nodes appearing in the
25608 // // corresponding peepmatch, the function return true if successful, else
25609 // // return false
25610 //
25611 // peepconstraint %{
25612 // (instruction_number.operand_name relational_op instruction_number.operand_name
25613 //  [, ...] );
25614 // // instruction numbers are zero-based using left to right order in peepmatch
25615 //
25616 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25617 // // provide an instruction_number.operand_name for each operand that appears
25618 // // in the replacement instruction's match rule
25619 //
25620 // ---------VM FLAGS---------------------------------------------------------
25621 //
25622 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25623 //
25624 // Each peephole rule is given an identifying number starting with zero and
25625 // increasing by one in the order seen by the parser.  An individual peephole
25626 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25627 // on the command-line.
25628 //
25629 // ---------CURRENT LIMITATIONS----------------------------------------------
25630 //
25631 // Only transformations inside a basic block (do we need more for peephole)
25632 //
25633 // ---------EXAMPLE----------------------------------------------------------
25634 //
25635 // // pertinent parts of existing instructions in architecture description
25636 // instruct movI(rRegI dst, rRegI src)
25637 // %{
25638 //   match(Set dst (CopyI src));
25639 // %}
25640 //
25641 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25642 // %{
25643 //   match(Set dst (AddI dst src));
25644 //   effect(KILL cr);
25645 // %}
25646 //
25647 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25648 // %{
25649 //   match(Set dst (AddI dst src));
25650 // %}
25651 //
25652 // 1. Simple replacement
25653 // - Only match adjacent instructions in same basic block
25654 // - Only equality constraints
25655 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25656 // - Only one replacement instruction
25657 //
25658 // // Change (inc mov) to lea
25659 // peephole %{
25660 //   // lea should only be emitted when beneficial
25661 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25662 //   // increment preceded by register-register move
25663 //   peepmatch ( incI_rReg movI );
25664 //   // require that the destination register of the increment
25665 //   // match the destination register of the move
25666 //   peepconstraint ( 0.dst == 1.dst );
25667 //   // construct a replacement instruction that sets
25668 //   // the destination to ( move's source register + one )
25669 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25670 // %}
25671 //
25672 // 2. Procedural replacement
25673 // - More flexible finding relevent nodes
25674 // - More flexible constraints
25675 // - More flexible transformations
25676 // - May utilise architecture-dependent API more effectively
25677 // - Currently only one replacement instruction due to adlc parsing capabilities
25678 //
25679 // // Change (inc mov) to lea
25680 // peephole %{
25681 //   // lea should only be emitted when beneficial
25682 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25683 //   // the rule numbers of these nodes inside are passed into the function below
25684 //   peepmatch ( incI_rReg movI );
25685 //   // the method that takes the responsibility of transformation
25686 //   peepprocedure ( inc_mov_to_lea );
25687 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25688 //   // node is passed into the function above
25689 //   peepreplace ( leaI_rReg_immI() );
25690 // %}
25691 
25692 // These instructions is not matched by the matcher but used by the peephole
25693 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25694 %{
25695   predicate(false);
25696   match(Set dst (AddI src1 src2));
25697   format %{ "leal    $dst, [$src1 + $src2]" %}
25698   ins_encode %{
25699     Register dst = $dst$$Register;
25700     Register src1 = $src1$$Register;
25701     Register src2 = $src2$$Register;
25702     if (src1 != rbp && src1 != r13) {
25703       __ leal(dst, Address(src1, src2, Address::times_1));
25704     } else {
25705       assert(src2 != rbp && src2 != r13, "");
25706       __ leal(dst, Address(src2, src1, Address::times_1));
25707     }
25708   %}
25709   ins_pipe(ialu_reg_reg);
25710 %}
25711 
25712 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25713 %{
25714   predicate(false);
25715   match(Set dst (AddI src1 src2));
25716   format %{ "leal    $dst, [$src1 + $src2]" %}
25717   ins_encode %{
25718     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25719   %}
25720   ins_pipe(ialu_reg_reg);
25721 %}
25722 
25723 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25724 %{
25725   predicate(false);
25726   match(Set dst (LShiftI src shift));
25727   format %{ "leal    $dst, [$src << $shift]" %}
25728   ins_encode %{
25729     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25730     Register src = $src$$Register;
25731     if (scale == Address::times_2 && src != rbp && src != r13) {
25732       __ leal($dst$$Register, Address(src, src, Address::times_1));
25733     } else {
25734       __ leal($dst$$Register, Address(noreg, src, scale));
25735     }
25736   %}
25737   ins_pipe(ialu_reg_reg);
25738 %}
25739 
25740 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25741 %{
25742   predicate(false);
25743   match(Set dst (AddL src1 src2));
25744   format %{ "leaq    $dst, [$src1 + $src2]" %}
25745   ins_encode %{
25746     Register dst = $dst$$Register;
25747     Register src1 = $src1$$Register;
25748     Register src2 = $src2$$Register;
25749     if (src1 != rbp && src1 != r13) {
25750       __ leaq(dst, Address(src1, src2, Address::times_1));
25751     } else {
25752       assert(src2 != rbp && src2 != r13, "");
25753       __ leaq(dst, Address(src2, src1, Address::times_1));
25754     }
25755   %}
25756   ins_pipe(ialu_reg_reg);
25757 %}
25758 
25759 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25760 %{
25761   predicate(false);
25762   match(Set dst (AddL src1 src2));
25763   format %{ "leaq    $dst, [$src1 + $src2]" %}
25764   ins_encode %{
25765     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25766   %}
25767   ins_pipe(ialu_reg_reg);
25768 %}
25769 
25770 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25771 %{
25772   predicate(false);
25773   match(Set dst (LShiftL src shift));
25774   format %{ "leaq    $dst, [$src << $shift]" %}
25775   ins_encode %{
25776     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25777     Register src = $src$$Register;
25778     if (scale == Address::times_2 && src != rbp && src != r13) {
25779       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25780     } else {
25781       __ leaq($dst$$Register, Address(noreg, src, scale));
25782     }
25783   %}
25784   ins_pipe(ialu_reg_reg);
25785 %}
25786 
25787 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25788 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25789 // processors with at least partial ALU support for lea
25790 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25791 // beneficial for processors with full ALU support
25792 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25793 
25794 peephole
25795 %{
25796   peeppredicate(VM_Version::supports_fast_2op_lea());
25797   peepmatch (addI_rReg);
25798   peepprocedure (lea_coalesce_reg);
25799   peepreplace (leaI_rReg_rReg_peep());
25800 %}
25801 
25802 peephole
25803 %{
25804   peeppredicate(VM_Version::supports_fast_2op_lea());
25805   peepmatch (addI_rReg_imm);
25806   peepprocedure (lea_coalesce_imm);
25807   peepreplace (leaI_rReg_immI_peep());
25808 %}
25809 
25810 peephole
25811 %{
25812   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25813                 VM_Version::is_intel_cascade_lake());
25814   peepmatch (incI_rReg);
25815   peepprocedure (lea_coalesce_imm);
25816   peepreplace (leaI_rReg_immI_peep());
25817 %}
25818 
25819 peephole
25820 %{
25821   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25822                 VM_Version::is_intel_cascade_lake());
25823   peepmatch (decI_rReg);
25824   peepprocedure (lea_coalesce_imm);
25825   peepreplace (leaI_rReg_immI_peep());
25826 %}
25827 
25828 peephole
25829 %{
25830   peeppredicate(VM_Version::supports_fast_2op_lea());
25831   peepmatch (salI_rReg_immI2);
25832   peepprocedure (lea_coalesce_imm);
25833   peepreplace (leaI_rReg_immI2_peep());
25834 %}
25835 
25836 peephole
25837 %{
25838   peeppredicate(VM_Version::supports_fast_2op_lea());
25839   peepmatch (addL_rReg);
25840   peepprocedure (lea_coalesce_reg);
25841   peepreplace (leaL_rReg_rReg_peep());
25842 %}
25843 
25844 peephole
25845 %{
25846   peeppredicate(VM_Version::supports_fast_2op_lea());
25847   peepmatch (addL_rReg_imm);
25848   peepprocedure (lea_coalesce_imm);
25849   peepreplace (leaL_rReg_immL32_peep());
25850 %}
25851 
25852 peephole
25853 %{
25854   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25855                 VM_Version::is_intel_cascade_lake());
25856   peepmatch (incL_rReg);
25857   peepprocedure (lea_coalesce_imm);
25858   peepreplace (leaL_rReg_immL32_peep());
25859 %}
25860 
25861 peephole
25862 %{
25863   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25864                 VM_Version::is_intel_cascade_lake());
25865   peepmatch (decL_rReg);
25866   peepprocedure (lea_coalesce_imm);
25867   peepreplace (leaL_rReg_immL32_peep());
25868 %}
25869 
25870 peephole
25871 %{
25872   peeppredicate(VM_Version::supports_fast_2op_lea());
25873   peepmatch (salL_rReg_immI2);
25874   peepprocedure (lea_coalesce_imm);
25875   peepreplace (leaL_rReg_immI2_peep());
25876 %}
25877 
25878 peephole
25879 %{
25880   peepmatch (leaPCompressedOopOffset);
25881   peepprocedure (lea_remove_redundant);
25882 %}
25883 
25884 peephole
25885 %{
25886   peepmatch (leaP8Narrow);
25887   peepprocedure (lea_remove_redundant);
25888 %}
25889 
25890 peephole
25891 %{
25892   peepmatch (leaP32Narrow);
25893   peepprocedure (lea_remove_redundant);
25894 %}
25895 
25896 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25897 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25898 
25899 //int variant
25900 peephole
25901 %{
25902   peepmatch (testI_reg);
25903   peepprocedure (test_may_remove);
25904 %}
25905 
25906 //long variant
25907 peephole
25908 %{
25909   peepmatch (testL_reg);
25910   peepprocedure (test_may_remove);
25911 %}
25912 
25913 
25914 //----------SMARTSPILL RULES---------------------------------------------------
25915 // These must follow all instruction definitions as they use the names
25916 // defined in the instructions definitions.