1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 // Math.min()    # Math.max()
 1712 // --------------------------
 1713 // ucomis[s/d]   #
 1714 // ja   -> b     # a
 1715 // jp   -> NaN   # NaN
 1716 // jb   -> a     # b
 1717 // je            #
 1718 // |-jz -> a | b # a & b
 1719 // |    -> a     #
 1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1721                             XMMRegister a, XMMRegister b,
 1722                             XMMRegister xmmt, Register rt,
 1723                             bool min, bool single) {
 1724 
 1725   Label nan, zero, below, above, done;
 1726 
 1727   if (single)
 1728     __ ucomiss(a, b);
 1729   else
 1730     __ ucomisd(a, b);
 1731 
 1732   if (dst->encoding() != (min ? b : a)->encoding())
 1733     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1734   else
 1735     __ jccb(Assembler::above, done);
 1736 
 1737   __ jccb(Assembler::parity, nan);  // PF=1
 1738   __ jccb(Assembler::below, below); // CF=1
 1739 
 1740   // equal
 1741   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1742   if (single) {
 1743     __ ucomiss(a, xmmt);
 1744     __ jccb(Assembler::equal, zero);
 1745 
 1746     __ movflt(dst, a);
 1747     __ jmp(done);
 1748   }
 1749   else {
 1750     __ ucomisd(a, xmmt);
 1751     __ jccb(Assembler::equal, zero);
 1752 
 1753     __ movdbl(dst, a);
 1754     __ jmp(done);
 1755   }
 1756 
 1757   __ bind(zero);
 1758   if (min)
 1759     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1760   else
 1761     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1762 
 1763   __ jmp(done);
 1764 
 1765   __ bind(above);
 1766   if (single)
 1767     __ movflt(dst, min ? b : a);
 1768   else
 1769     __ movdbl(dst, min ? b : a);
 1770 
 1771   __ jmp(done);
 1772 
 1773   __ bind(nan);
 1774   if (single) {
 1775     __ movl(rt, 0x7fc00000); // Float.NaN
 1776     __ movdl(dst, rt);
 1777   }
 1778   else {
 1779     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1780     __ movdq(dst, rt);
 1781   }
 1782   __ jmp(done);
 1783 
 1784   __ bind(below);
 1785   if (single)
 1786     __ movflt(dst, min ? a : b);
 1787   else
 1788     __ movdbl(dst, min ? a : b);
 1789 
 1790   __ bind(done);
 1791 }
 1792 
 1793 //=============================================================================
 1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1795 
 1796 int ConstantTable::calculate_table_base_offset() const {
 1797   return 0;  // absolute addressing, no offset
 1798 }
 1799 
 1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1802   ShouldNotReachHere();
 1803 }
 1804 
 1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1806   // Empty encoding
 1807 }
 1808 
 1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1810   return 0;
 1811 }
 1812 
 1813 #ifndef PRODUCT
 1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1815   st->print("# MachConstantBaseNode (empty encoding)");
 1816 }
 1817 #endif
 1818 
 1819 
 1820 //=============================================================================
 1821 #ifndef PRODUCT
 1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1823   Compile* C = ra_->C;
 1824 
 1825   int framesize = C->output()->frame_size_in_bytes();
 1826   int bangsize = C->output()->bang_size_in_bytes();
 1827   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1828   // Remove wordSize for return addr which is already pushed.
 1829   framesize -= wordSize;
 1830 
 1831   if (C->output()->need_stack_bang(bangsize)) {
 1832     framesize -= wordSize;
 1833     st->print("# stack bang (%d bytes)", bangsize);
 1834     st->print("\n\t");
 1835     st->print("pushq   rbp\t# Save rbp");
 1836     if (PreserveFramePointer) {
 1837         st->print("\n\t");
 1838         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1839     }
 1840     if (framesize) {
 1841       st->print("\n\t");
 1842       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1843     }
 1844   } else {
 1845     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1846     st->print("\n\t");
 1847     framesize -= wordSize;
 1848     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1849     if (PreserveFramePointer) {
 1850       st->print("\n\t");
 1851       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1852       if (framesize > 0) {
 1853         st->print("\n\t");
 1854         st->print("addq    rbp, #%d", framesize);
 1855       }
 1856     }
 1857   }
 1858 
 1859   if (VerifyStackAtCalls) {
 1860     st->print("\n\t");
 1861     framesize -= wordSize;
 1862     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1863 #ifdef ASSERT
 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP
 1939   framesize -= 2*wordSize;
 1940 
 1941   if (framesize) {
 1942     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1943     st->print("\t");
 1944   }
 1945 
 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 
 2023 static enum RC rc_class(OptoReg::Name reg)
 2024 {
 2025   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2026 
 2027   if (OptoReg::is_stack(reg)) return rc_stack;
 2028 
 2029   VMReg r = OptoReg::as_VMReg(reg);
 2030 
 2031   if (r->is_Register()) return rc_int;
 2032 
 2033   if (r->is_KRegister()) return rc_kreg;
 2034 
 2035   assert(r->is_XMMRegister(), "must be");
 2036   return rc_float;
 2037 }
 2038 
 2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2041                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2042 
 2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2044                      int stack_offset, int reg, uint ireg, outputStream* st);
 2045 
 2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2047                                       int dst_offset, uint ireg, outputStream* st) {
 2048   if (masm) {
 2049     switch (ireg) {
 2050     case Op_VecS:
 2051       __ movq(Address(rsp, -8), rax);
 2052       __ movl(rax, Address(rsp, src_offset));
 2053       __ movl(Address(rsp, dst_offset), rax);
 2054       __ movq(rax, Address(rsp, -8));
 2055       break;
 2056     case Op_VecD:
 2057       __ pushq(Address(rsp, src_offset));
 2058       __ popq (Address(rsp, dst_offset));
 2059       break;
 2060     case Op_VecX:
 2061       __ pushq(Address(rsp, src_offset));
 2062       __ popq (Address(rsp, dst_offset));
 2063       __ pushq(Address(rsp, src_offset+8));
 2064       __ popq (Address(rsp, dst_offset+8));
 2065       break;
 2066     case Op_VecY:
 2067       __ vmovdqu(Address(rsp, -32), xmm0);
 2068       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2069       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2070       __ vmovdqu(xmm0, Address(rsp, -32));
 2071       break;
 2072     case Op_VecZ:
 2073       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2074       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2075       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2076       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2077       break;
 2078     default:
 2079       ShouldNotReachHere();
 2080     }
 2081 #ifndef PRODUCT
 2082   } else {
 2083     switch (ireg) {
 2084     case Op_VecS:
 2085       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2086                 "movl    rax, [rsp + #%d]\n\t"
 2087                 "movl    [rsp + #%d], rax\n\t"
 2088                 "movq    rax, [rsp - #8]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     case Op_VecD:
 2092       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2093                 "popq    [rsp + #%d]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096      case Op_VecX:
 2097       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2098                 "popq    [rsp + #%d]\n\t"
 2099                 "pushq   [rsp + #%d]\n\t"
 2100                 "popq    [rsp + #%d]",
 2101                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2102       break;
 2103     case Op_VecY:
 2104       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2105                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2106                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2107                 "vmovdqu xmm0, [rsp - #32]",
 2108                 src_offset, dst_offset);
 2109       break;
 2110     case Op_VecZ:
 2111       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2112                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2113                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2114                 "vmovdqu xmm0, [rsp - #64]",
 2115                 src_offset, dst_offset);
 2116       break;
 2117     default:
 2118       ShouldNotReachHere();
 2119     }
 2120 #endif
 2121   }
 2122 }
 2123 
 2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2125                                        PhaseRegAlloc* ra_,
 2126                                        bool do_size,
 2127                                        outputStream* st) const {
 2128   assert(masm != nullptr || st  != nullptr, "sanity");
 2129   // Get registers to move
 2130   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2131   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2132   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2133   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2134 
 2135   enum RC src_second_rc = rc_class(src_second);
 2136   enum RC src_first_rc = rc_class(src_first);
 2137   enum RC dst_second_rc = rc_class(dst_second);
 2138   enum RC dst_first_rc = rc_class(dst_first);
 2139 
 2140   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2141          "must move at least 1 register" );
 2142 
 2143   if (src_first == dst_first && src_second == dst_second) {
 2144     // Self copy, no move
 2145     return 0;
 2146   }
 2147   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2148     uint ireg = ideal_reg();
 2149     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2150     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2151     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2152       // mem -> mem
 2153       int src_offset = ra_->reg2offset(src_first);
 2154       int dst_offset = ra_->reg2offset(dst_first);
 2155       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2156     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2157       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2158     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2159       int stack_offset = ra_->reg2offset(dst_first);
 2160       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2161     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2162       int stack_offset = ra_->reg2offset(src_first);
 2163       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2164     } else {
 2165       ShouldNotReachHere();
 2166     }
 2167     return 0;
 2168   }
 2169   if (src_first_rc == rc_stack) {
 2170     // mem ->
 2171     if (dst_first_rc == rc_stack) {
 2172       // mem -> mem
 2173       assert(src_second != dst_first, "overlap");
 2174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2176         // 64-bit
 2177         int src_offset = ra_->reg2offset(src_first);
 2178         int dst_offset = ra_->reg2offset(dst_first);
 2179         if (masm) {
 2180           __ pushq(Address(rsp, src_offset));
 2181           __ popq (Address(rsp, dst_offset));
 2182 #ifndef PRODUCT
 2183         } else {
 2184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2185                     "popq    [rsp + #%d]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       } else {
 2190         // 32-bit
 2191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2193         // No pushl/popl, so:
 2194         int src_offset = ra_->reg2offset(src_first);
 2195         int dst_offset = ra_->reg2offset(dst_first);
 2196         if (masm) {
 2197           __ movq(Address(rsp, -8), rax);
 2198           __ movl(rax, Address(rsp, src_offset));
 2199           __ movl(Address(rsp, dst_offset), rax);
 2200           __ movq(rax, Address(rsp, -8));
 2201 #ifndef PRODUCT
 2202         } else {
 2203           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2204                     "movl    rax, [rsp + #%d]\n\t"
 2205                     "movl    [rsp + #%d], rax\n\t"
 2206                     "movq    rax, [rsp - #8]",
 2207                      src_offset, dst_offset);
 2208 #endif
 2209         }
 2210       }
 2211       return 0;
 2212     } else if (dst_first_rc == rc_int) {
 2213       // mem -> gpr
 2214       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2215           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2216         // 64-bit
 2217         int offset = ra_->reg2offset(src_first);
 2218         if (masm) {
 2219           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2220 #ifndef PRODUCT
 2221         } else {
 2222           st->print("movq    %s, [rsp + #%d]\t# spill",
 2223                      Matcher::regName[dst_first],
 2224                      offset);
 2225 #endif
 2226         }
 2227       } else {
 2228         // 32-bit
 2229         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2230         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2231         int offset = ra_->reg2offset(src_first);
 2232         if (masm) {
 2233           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2234 #ifndef PRODUCT
 2235         } else {
 2236           st->print("movl    %s, [rsp + #%d]\t# spill",
 2237                      Matcher::regName[dst_first],
 2238                      offset);
 2239 #endif
 2240         }
 2241       }
 2242       return 0;
 2243     } else if (dst_first_rc == rc_float) {
 2244       // mem-> xmm
 2245       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2246           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2247         // 64-bit
 2248         int offset = ra_->reg2offset(src_first);
 2249         if (masm) {
 2250           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2251 #ifndef PRODUCT
 2252         } else {
 2253           st->print("%s  %s, [rsp + #%d]\t# spill",
 2254                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2255                      Matcher::regName[dst_first],
 2256                      offset);
 2257 #endif
 2258         }
 2259       } else {
 2260         // 32-bit
 2261         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2262         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("movss   %s, [rsp + #%d]\t# spill",
 2269                      Matcher::regName[dst_first],
 2270                      offset);
 2271 #endif
 2272         }
 2273       }
 2274       return 0;
 2275     } else if (dst_first_rc == rc_kreg) {
 2276       // mem -> kreg
 2277       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2278           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2279         // 64-bit
 2280         int offset = ra_->reg2offset(src_first);
 2281         if (masm) {
 2282           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2283 #ifndef PRODUCT
 2284         } else {
 2285           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2286                      Matcher::regName[dst_first],
 2287                      offset);
 2288 #endif
 2289         }
 2290       }
 2291       return 0;
 2292     }
 2293   } else if (src_first_rc == rc_int) {
 2294     // gpr ->
 2295     if (dst_first_rc == rc_stack) {
 2296       // gpr -> mem
 2297       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2298           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2299         // 64-bit
 2300         int offset = ra_->reg2offset(dst_first);
 2301         if (masm) {
 2302           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2303 #ifndef PRODUCT
 2304         } else {
 2305           st->print("movq    [rsp + #%d], %s\t# spill",
 2306                      offset,
 2307                      Matcher::regName[src_first]);
 2308 #endif
 2309         }
 2310       } else {
 2311         // 32-bit
 2312         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2313         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2314         int offset = ra_->reg2offset(dst_first);
 2315         if (masm) {
 2316           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2317 #ifndef PRODUCT
 2318         } else {
 2319           st->print("movl    [rsp + #%d], %s\t# spill",
 2320                      offset,
 2321                      Matcher::regName[src_first]);
 2322 #endif
 2323         }
 2324       }
 2325       return 0;
 2326     } else if (dst_first_rc == rc_int) {
 2327       // gpr -> gpr
 2328       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2329           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2330         // 64-bit
 2331         if (masm) {
 2332           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2333                   as_Register(Matcher::_regEncode[src_first]));
 2334 #ifndef PRODUCT
 2335         } else {
 2336           st->print("movq    %s, %s\t# spill",
 2337                      Matcher::regName[dst_first],
 2338                      Matcher::regName[src_first]);
 2339 #endif
 2340         }
 2341         return 0;
 2342       } else {
 2343         // 32-bit
 2344         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2345         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2346         if (masm) {
 2347           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movl    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       }
 2358     } else if (dst_first_rc == rc_float) {
 2359       // gpr -> xmm
 2360       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2361           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2362         // 64-bit
 2363         if (masm) {
 2364           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2365 #ifndef PRODUCT
 2366         } else {
 2367           st->print("movdq   %s, %s\t# spill",
 2368                      Matcher::regName[dst_first],
 2369                      Matcher::regName[src_first]);
 2370 #endif
 2371         }
 2372       } else {
 2373         // 32-bit
 2374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2376         if (masm) {
 2377           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2378 #ifndef PRODUCT
 2379         } else {
 2380           st->print("movdl   %s, %s\t# spill",
 2381                      Matcher::regName[dst_first],
 2382                      Matcher::regName[src_first]);
 2383 #endif
 2384         }
 2385       }
 2386       return 0;
 2387     } else if (dst_first_rc == rc_kreg) {
 2388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2390         // 64-bit
 2391         if (masm) {
 2392           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393   #ifndef PRODUCT
 2394         } else {
 2395            st->print("kmovq   %s, %s\t# spill",
 2396                        Matcher::regName[dst_first],
 2397                        Matcher::regName[src_first]);
 2398   #endif
 2399         }
 2400       }
 2401       Unimplemented();
 2402       return 0;
 2403     }
 2404   } else if (src_first_rc == rc_float) {
 2405     // xmm ->
 2406     if (dst_first_rc == rc_stack) {
 2407       // xmm -> mem
 2408       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2409           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2410         // 64-bit
 2411         int offset = ra_->reg2offset(dst_first);
 2412         if (masm) {
 2413           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2414 #ifndef PRODUCT
 2415         } else {
 2416           st->print("movsd   [rsp + #%d], %s\t# spill",
 2417                      offset,
 2418                      Matcher::regName[src_first]);
 2419 #endif
 2420         }
 2421       } else {
 2422         // 32-bit
 2423         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2424         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2425         int offset = ra_->reg2offset(dst_first);
 2426         if (masm) {
 2427           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2428 #ifndef PRODUCT
 2429         } else {
 2430           st->print("movss   [rsp + #%d], %s\t# spill",
 2431                      offset,
 2432                      Matcher::regName[src_first]);
 2433 #endif
 2434         }
 2435       }
 2436       return 0;
 2437     } else if (dst_first_rc == rc_int) {
 2438       // xmm -> gpr
 2439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2441         // 64-bit
 2442         if (masm) {
 2443           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2444 #ifndef PRODUCT
 2445         } else {
 2446           st->print("movdq   %s, %s\t# spill",
 2447                      Matcher::regName[dst_first],
 2448                      Matcher::regName[src_first]);
 2449 #endif
 2450         }
 2451       } else {
 2452         // 32-bit
 2453         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2454         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2455         if (masm) {
 2456           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2457 #ifndef PRODUCT
 2458         } else {
 2459           st->print("movdl   %s, %s\t# spill",
 2460                      Matcher::regName[dst_first],
 2461                      Matcher::regName[src_first]);
 2462 #endif
 2463         }
 2464       }
 2465       return 0;
 2466     } else if (dst_first_rc == rc_float) {
 2467       // xmm -> xmm
 2468       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2469           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2470         // 64-bit
 2471         if (masm) {
 2472           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2473 #ifndef PRODUCT
 2474         } else {
 2475           st->print("%s  %s, %s\t# spill",
 2476                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2477                      Matcher::regName[dst_first],
 2478                      Matcher::regName[src_first]);
 2479 #endif
 2480         }
 2481       } else {
 2482         // 32-bit
 2483         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2484         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2485         if (masm) {
 2486           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2487 #ifndef PRODUCT
 2488         } else {
 2489           st->print("%s  %s, %s\t# spill",
 2490                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2491                      Matcher::regName[dst_first],
 2492                      Matcher::regName[src_first]);
 2493 #endif
 2494         }
 2495       }
 2496       return 0;
 2497     } else if (dst_first_rc == rc_kreg) {
 2498       assert(false, "Illegal spilling");
 2499       return 0;
 2500     }
 2501   } else if (src_first_rc == rc_kreg) {
 2502     if (dst_first_rc == rc_stack) {
 2503       // mem -> kreg
 2504       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2505           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2506         // 64-bit
 2507         int offset = ra_->reg2offset(dst_first);
 2508         if (masm) {
 2509           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2510 #ifndef PRODUCT
 2511         } else {
 2512           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2513                      offset,
 2514                      Matcher::regName[src_first]);
 2515 #endif
 2516         }
 2517       }
 2518       return 0;
 2519     } else if (dst_first_rc == rc_int) {
 2520       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2521           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2522         // 64-bit
 2523         if (masm) {
 2524           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527          st->print("kmovq   %s, %s\t# spill",
 2528                      Matcher::regName[dst_first],
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       Unimplemented();
 2534       return 0;
 2535     } else if (dst_first_rc == rc_kreg) {
 2536       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2537           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2538         // 64-bit
 2539         if (masm) {
 2540           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2541 #ifndef PRODUCT
 2542         } else {
 2543          st->print("kmovq   %s, %s\t# spill",
 2544                      Matcher::regName[dst_first],
 2545                      Matcher::regName[src_first]);
 2546 #endif
 2547         }
 2548       }
 2549       return 0;
 2550     } else if (dst_first_rc == rc_float) {
 2551       assert(false, "Illegal spill");
 2552       return 0;
 2553     }
 2554   }
 2555 
 2556   assert(0," foo ");
 2557   Unimplemented();
 2558   return 0;
 2559 }
 2560 
 2561 #ifndef PRODUCT
 2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2563   implementation(nullptr, ra_, false, st);
 2564 }
 2565 #endif
 2566 
 2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2568   implementation(masm, ra_, false, nullptr);
 2569 }
 2570 
 2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2572   return MachNode::size(ra_);
 2573 }
 2574 
 2575 //=============================================================================
 2576 #ifndef PRODUCT
 2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2578 {
 2579   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2580   int reg = ra_->get_reg_first(this);
 2581   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2582             Matcher::regName[reg], offset);
 2583 }
 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 
 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif
 2650 
 2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2652                                             int oper_index) {
 2653   if (mdef == nullptr) {
 2654     return false;
 2655   }
 2656 
 2657   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2658       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2659     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2660     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2661     return false;
 2662   }
 2663 
 2664   // Complex memory operand covers multiple incoming edges needed for
 2665   // address computation. Biasing def towards any address component will not
 2666   // result in NDD demotion by assembler.
 2667   if (mdef->operand_num_edges(oper_index) != 1) {
 2668     return false;
 2669   }
 2670 
 2671   // Demotion candidate must be register mask compatible with definition.
 2672   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2673   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2674     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2675     return false;
 2676   }
 2677 
 2678   switch (oper_index) {
 2679   // First operand of MachNode corresponding to Intel APX NDD selection
 2680   // pattern can share its assigned register with definition operand if
 2681   // their live ranges do not overlap. In such a scenario we can demote
 2682   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2683   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2684   // are decorated with a special flag by instruction selector.
 2685   case 1:
 2686     return is_ndd_demotable_opr1(mdef);
 2687 
 2688   // Definition operand of commutative operation can be biased towards second
 2689   // operand.
 2690   case 2:
 2691     return is_ndd_demotable_opr2(mdef);
 2692 
 2693   // Current scheme only selects up to two biasing candidates
 2694   default:
 2695     assert(false, "unhandled operand index: %s", mdef->Name());
 2696     break;
 2697   }
 2698 
 2699   return false;
 2700 }
 2701 
 2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2703   assert(EnableVectorSupport, "sanity");
 2704   int lo = XMM0_num;
 2705   int hi = XMM0b_num;
 2706   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2707   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2708   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2709   return OptoRegPair(hi, lo);
 2710 }
 2711 
 2712 // Is this branch offset short enough that a short branch can be used?
 2713 //
 2714 // NOTE: If the platform does not provide any short branch variants, then
 2715 //       this method should return false for offset 0.
 2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2717   // The passed offset is relative to address of the branch.
 2718   // On 86 a branch displacement is calculated relative to address
 2719   // of a next instruction.
 2720   offset -= br_size;
 2721 
 2722   // the short version of jmpConUCF2 contains multiple branches,
 2723   // making the reach slightly less
 2724   if (rule == jmpConUCF2_rule)
 2725     return (-126 <= offset && offset <= 125);
 2726   return (-128 <= offset && offset <= 127);
 2727 }
 2728 
 2729 #ifdef ASSERT
 2730 // Return whether or not this register is ever used as an argument.
 2731 bool Matcher::can_be_java_arg(int reg)
 2732 {
 2733   return
 2734     reg ==  RDI_num || reg == RDI_H_num ||
 2735     reg ==  RSI_num || reg == RSI_H_num ||
 2736     reg ==  RDX_num || reg == RDX_H_num ||
 2737     reg ==  RCX_num || reg == RCX_H_num ||
 2738     reg ==   R8_num || reg ==  R8_H_num ||
 2739     reg ==   R9_num || reg ==  R9_H_num ||
 2740     reg ==  R12_num || reg == R12_H_num ||
 2741     reg == XMM0_num || reg == XMM0b_num ||
 2742     reg == XMM1_num || reg == XMM1b_num ||
 2743     reg == XMM2_num || reg == XMM2b_num ||
 2744     reg == XMM3_num || reg == XMM3b_num ||
 2745     reg == XMM4_num || reg == XMM4b_num ||
 2746     reg == XMM5_num || reg == XMM5b_num ||
 2747     reg == XMM6_num || reg == XMM6b_num ||
 2748     reg == XMM7_num || reg == XMM7b_num;
 2749 }
 2750 #endif
 2751 
 2752 uint Matcher::int_pressure_limit()
 2753 {
 2754   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2755 }
 2756 
 2757 uint Matcher::float_pressure_limit()
 2758 {
 2759   // After experiment around with different values, the following default threshold
 2760   // works best for LCM's register pressure scheduling on x64.
 2761   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2762   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2763   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2764 }
 2765 
 2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2767   // In 64 bit mode a code which use multiply when
 2768   // devisor is constant is faster than hardware
 2769   // DIV instruction (it uses MulHiL).
 2770   return false;
 2771 }
 2772 
 2773 // Register for DIVI projection of divmodI
 2774 const RegMask& Matcher::divI_proj_mask() {
 2775   return INT_RAX_REG_mask();
 2776 }
 2777 
 2778 // Register for MODI projection of divmodI
 2779 const RegMask& Matcher::modI_proj_mask() {
 2780   return INT_RDX_REG_mask();
 2781 }
 2782 
 2783 // Register for DIVL projection of divmodL
 2784 const RegMask& Matcher::divL_proj_mask() {
 2785   return LONG_RAX_REG_mask();
 2786 }
 2787 
 2788 // Register for MODL projection of divmodL
 2789 const RegMask& Matcher::modL_proj_mask() {
 2790   return LONG_RDX_REG_mask();
 2791 }
 2792 
 2793 %}
 2794 
 2795 source_hpp %{
 2796 // Header information of the source block.
 2797 // Method declarations/definitions which are used outside
 2798 // the ad-scope can conveniently be defined here.
 2799 //
 2800 // To keep related declarations/definitions/uses close together,
 2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2802 
 2803 #include "runtime/vm_version.hpp"
 2804 
 2805 class NativeJump;
 2806 
 2807 class CallStubImpl {
 2808 
 2809   //--------------------------------------------------------------
 2810   //---<  Used for optimization in Compile::shorten_branches  >---
 2811   //--------------------------------------------------------------
 2812 
 2813  public:
 2814   // Size of call trampoline stub.
 2815   static uint size_call_trampoline() {
 2816     return 0; // no call trampolines on this platform
 2817   }
 2818 
 2819   // number of relocations needed by a call trampoline stub
 2820   static uint reloc_call_trampoline() {
 2821     return 0; // no call trampolines on this platform
 2822   }
 2823 };
 2824 
 2825 class HandlerImpl {
 2826 
 2827  public:
 2828 
 2829   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2830 
 2831   static uint size_deopt_handler() {
 2832     // one call and one jmp.
 2833     return 7;
 2834   }
 2835 };
 2836 
 2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2838   switch(bytes) {
 2839     case  4: // fall-through
 2840     case  8: // fall-through
 2841     case 16: return Assembler::AVX_128bit;
 2842     case 32: return Assembler::AVX_256bit;
 2843     case 64: return Assembler::AVX_512bit;
 2844 
 2845     default: {
 2846       ShouldNotReachHere();
 2847       return Assembler::AVX_NoVec;
 2848     }
 2849   }
 2850 }
 2851 
 2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2853   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2854 }
 2855 
 2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2857   uint def_idx = use->operand_index(opnd);
 2858   Node* def = use->in(def_idx);
 2859   return vector_length_encoding(def);
 2860 }
 2861 
 2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2863   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2864          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2865 }
 2866 
 2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2868   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2869            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2870 }
 2871 
 2872 class Node::PD {
 2873 public:
 2874   enum NodeFlags : uint64_t {
 2875     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2876     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2877     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2878     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2879     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2880     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2881     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2882     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2883     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2884     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2885     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2886     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2887     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2888     _last_flag                = Flag_ndd_demotable_opr2
 2889   };
 2890 };
 2891 
 2892 %} // end source_hpp
 2893 
 2894 source %{
 2895 
 2896 #include "opto/addnode.hpp"
 2897 #include "c2_intelJccErratum_x86.hpp"
 2898 
 2899 void PhaseOutput::pd_perform_mach_node_analysis() {
 2900   if (VM_Version::has_intel_jcc_erratum()) {
 2901     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2902     _buf_sizes._code += extra_padding;
 2903   }
 2904 }
 2905 
 2906 int MachNode::pd_alignment_required() const {
 2907   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2908     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2909     return IntelJccErratum::largest_jcc_size() + 1;
 2910   } else {
 2911     return 1;
 2912   }
 2913 }
 2914 
 2915 int MachNode::compute_padding(int current_offset) const {
 2916   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2917     Compile* C = Compile::current();
 2918     PhaseOutput* output = C->output();
 2919     Block* block = output->block();
 2920     int index = output->index();
 2921     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2922   } else {
 2923     return 0;
 2924   }
 2925 }
 2926 
 2927 // Emit deopt handler code.
 2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2929 
 2930   // Note that the code buffer's insts_mark is always relative to insts.
 2931   // That's why we must use the macroassembler to generate a handler.
 2932   address base = __ start_a_stub(size_deopt_handler());
 2933   if (base == nullptr) {
 2934     ciEnv::current()->record_failure("CodeCache is full");
 2935     return 0;  // CodeBuffer::expand failed
 2936   }
 2937   int offset = __ offset();
 2938 
 2939   Label start;
 2940   __ bind(start);
 2941 
 2942   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2943 
 2944   int entry_offset = __ offset();
 2945 
 2946   __ jmp(start);
 2947 
 2948   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2949   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2950          "out of bounds read in post-call NOP check");
 2951   __ end_a_stub();
 2952   return entry_offset;
 2953 }
 2954 
 2955 static Assembler::Width widthForType(BasicType bt) {
 2956   if (bt == T_BYTE) {
 2957     return Assembler::B;
 2958   } else if (bt == T_SHORT) {
 2959     return Assembler::W;
 2960   } else if (bt == T_INT) {
 2961     return Assembler::D;
 2962   } else {
 2963     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2964     return Assembler::Q;
 2965   }
 2966 }
 2967 
 2968 //=============================================================================
 2969 
 2970   // Float masks come from different places depending on platform.
 2971   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2972   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2973   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2974   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2975   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2976   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2977   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2978   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2979   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2980   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2981   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2982   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2983   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2984   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2985   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2986   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2987   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2988   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2989   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2990 
 2991 //=============================================================================
 2992 bool Matcher::match_rule_supported(int opcode) {
 2993   if (!has_match_rule(opcode)) {
 2994     return false; // no match rule present
 2995   }
 2996   switch (opcode) {
 2997     case Op_AbsVL:
 2998     case Op_StoreVectorScatter:
 2999       if (UseAVX < 3) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountI:
 3004     case Op_PopCountL:
 3005       if (!UsePopCountInstruction) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountVI:
 3010       if (UseAVX < 2) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_CompressV:
 3015     case Op_ExpandV:
 3016     case Op_PopCountVL:
 3017       if (UseAVX < 2) {
 3018         return false;
 3019       }
 3020       break;
 3021     case Op_MulVI:
 3022       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_MulVL:
 3027       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_MulReductionVL:
 3032       if (VM_Version::supports_avx512dq() == false) {
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_AbsVB:
 3037     case Op_AbsVS:
 3038     case Op_AbsVI:
 3039     case Op_AddReductionVI:
 3040     case Op_AndReductionV:
 3041     case Op_OrReductionV:
 3042     case Op_XorReductionV:
 3043       if (UseSSE < 3) { // requires at least SSSE3
 3044         return false;
 3045       }
 3046       break;
 3047     case Op_MaxHF:
 3048     case Op_MinHF:
 3049       if (!VM_Version::supports_avx512vlbw()) {
 3050         return false;
 3051       }  // fallthrough
 3052     case Op_AddHF:
 3053     case Op_DivHF:
 3054     case Op_FmaHF:
 3055     case Op_MulHF:
 3056     case Op_ReinterpretS2HF:
 3057     case Op_ReinterpretHF2S:
 3058     case Op_SubHF:
 3059     case Op_SqrtHF:
 3060       if (!VM_Version::supports_avx512_fp16()) {
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_VectorLoadShuffle:
 3065     case Op_VectorRearrange:
 3066     case Op_MulReductionVI:
 3067       if (UseSSE < 4) { // requires at least SSE4
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_IsInfiniteF:
 3072     case Op_IsInfiniteD:
 3073       if (!VM_Version::supports_avx512dq()) {
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_SqrtVD:
 3078     case Op_SqrtVF:
 3079     case Op_VectorMaskCmp:
 3080     case Op_VectorCastB2X:
 3081     case Op_VectorCastS2X:
 3082     case Op_VectorCastI2X:
 3083     case Op_VectorCastL2X:
 3084     case Op_VectorCastF2X:
 3085     case Op_VectorCastD2X:
 3086     case Op_VectorUCastB2X:
 3087     case Op_VectorUCastS2X:
 3088     case Op_VectorUCastI2X:
 3089     case Op_VectorMaskCast:
 3090       if (UseAVX < 1) { // enabled for AVX only
 3091         return false;
 3092       }
 3093       break;
 3094     case Op_PopulateIndex:
 3095       if (UseAVX < 2) {
 3096         return false;
 3097       }
 3098       break;
 3099     case Op_RoundVF:
 3100       if (UseAVX < 2) { // enabled for AVX2 only
 3101         return false;
 3102       }
 3103       break;
 3104     case Op_RoundVD:
 3105       if (UseAVX < 3) {
 3106         return false;  // enabled for AVX3 only
 3107       }
 3108       break;
 3109     case Op_CompareAndSwapL:
 3110     case Op_CompareAndSwapP:
 3111       break;
 3112     case Op_StrIndexOf:
 3113       if (!UseSSE42Intrinsics) {
 3114         return false;
 3115       }
 3116       break;
 3117     case Op_StrIndexOfChar:
 3118       if (!UseSSE42Intrinsics) {
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_OnSpinWait:
 3123       if (VM_Version::supports_on_spin_wait() == false) {
 3124         return false;
 3125       }
 3126       break;
 3127     case Op_MulVB:
 3128     case Op_LShiftVB:
 3129     case Op_RShiftVB:
 3130     case Op_URShiftVB:
 3131     case Op_VectorInsert:
 3132     case Op_VectorLoadMask:
 3133     case Op_VectorStoreMask:
 3134     case Op_VectorBlend:
 3135       if (UseSSE < 4) {
 3136         return false;
 3137       }
 3138       break;
 3139     case Op_MaxD:
 3140     case Op_MaxF:
 3141     case Op_MinD:
 3142     case Op_MinF:
 3143       if (UseAVX < 1) { // enabled for AVX only
 3144         return false;
 3145       }
 3146       break;
 3147     case Op_CacheWB:
 3148     case Op_CacheWBPreSync:
 3149     case Op_CacheWBPostSync:
 3150       if (!VM_Version::supports_data_cache_line_flush()) {
 3151         return false;
 3152       }
 3153       break;
 3154     case Op_ExtractB:
 3155     case Op_ExtractL:
 3156     case Op_ExtractI:
 3157     case Op_RoundDoubleMode:
 3158       if (UseSSE < 4) {
 3159         return false;
 3160       }
 3161       break;
 3162     case Op_RoundDoubleModeV:
 3163       if (VM_Version::supports_avx() == false) {
 3164         return false; // 128bit vroundpd is not available
 3165       }
 3166       break;
 3167     case Op_LoadVectorGather:
 3168     case Op_LoadVectorGatherMasked:
 3169       if (UseAVX < 2) {
 3170         return false;
 3171       }
 3172       break;
 3173     case Op_FmaF:
 3174     case Op_FmaD:
 3175     case Op_FmaVD:
 3176     case Op_FmaVF:
 3177       if (!UseFMA) {
 3178         return false;
 3179       }
 3180       break;
 3181     case Op_MacroLogicV:
 3182       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3183         return false;
 3184       }
 3185       break;
 3186 
 3187     case Op_VectorCmpMasked:
 3188     case Op_VectorMaskGen:
 3189       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3190         return false;
 3191       }
 3192       break;
 3193     case Op_VectorMaskFirstTrue:
 3194     case Op_VectorMaskLastTrue:
 3195     case Op_VectorMaskTrueCount:
 3196     case Op_VectorMaskToLong:
 3197       if (UseAVX < 1) {
 3198          return false;
 3199       }
 3200       break;
 3201     case Op_RoundF:
 3202     case Op_RoundD:
 3203       break;
 3204     case Op_CopySignD:
 3205     case Op_CopySignF:
 3206       if (UseAVX < 3)  {
 3207         return false;
 3208       }
 3209       if (!VM_Version::supports_avx512vl()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressBits:
 3214     case Op_ExpandBits:
 3215       if (!VM_Version::supports_bmi2()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressM:
 3220       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_ConvF2HF:
 3225     case Op_ConvHF2F:
 3226       if (!VM_Version::supports_float16()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_VectorCastF2HF:
 3231     case Op_VectorCastHF2F:
 3232       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3233         return false;
 3234       }
 3235       break;
 3236   }
 3237   return true;  // Match rules are supported by default.
 3238 }
 3239 
 3240 //------------------------------------------------------------------------
 3241 
 3242 static inline bool is_pop_count_instr_target(BasicType bt) {
 3243   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3244          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3245 }
 3246 
 3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3248   return match_rule_supported_vector(opcode, vlen, bt);
 3249 }
 3250 
 3251 // Identify extra cases that we might want to provide match rules for vector nodes and
 3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3254   if (!match_rule_supported(opcode)) {
 3255     return false;
 3256   }
 3257   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3258   //   * SSE2 supports 128bit vectors for all types;
 3259   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3260   //   * AVX2 supports 256bit vectors for all types;
 3261   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3262   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3263   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3264   // And MaxVectorSize is taken into account as well.
 3265   if (!vector_size_supported(bt, vlen)) {
 3266     return false;
 3267   }
 3268   // Special cases which require vector length follow:
 3269   //   * implementation limitations
 3270   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3271   //   * 128bit vroundpd instruction is present only in AVX1
 3272   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3273   switch (opcode) {
 3274     case Op_MaxVHF:
 3275     case Op_MinVHF:
 3276       if (!VM_Version::supports_avx512bw()) {
 3277         return false;
 3278       }
 3279     case Op_AddVHF:
 3280     case Op_DivVHF:
 3281     case Op_FmaVHF:
 3282     case Op_MulVHF:
 3283     case Op_SubVHF:
 3284     case Op_SqrtVHF:
 3285       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3286         return false;
 3287       }
 3288       if (!VM_Version::supports_avx512_fp16()) {
 3289         return false;
 3290       }
 3291       break;
 3292     case Op_AbsVF:
 3293     case Op_NegVF:
 3294       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vandps and vxorps are not available
 3296       }
 3297       break;
 3298     case Op_AbsVD:
 3299     case Op_NegVD:
 3300       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3302       }
 3303       break;
 3304     case Op_RotateRightV:
 3305     case Op_RotateLeftV:
 3306       if (bt != T_INT && bt != T_LONG) {
 3307         return false;
 3308       } // fallthrough
 3309     case Op_MacroLogicV:
 3310       if (!VM_Version::supports_evex() ||
 3311           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3312         return false;
 3313       }
 3314       break;
 3315     case Op_ClearArray:
 3316     case Op_VectorMaskGen:
 3317     case Op_VectorCmpMasked:
 3318       if (!VM_Version::supports_avx512bw()) {
 3319         return false;
 3320       }
 3321       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_LoadVectorMasked:
 3326     case Op_StoreVectorMasked:
 3327       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinV:
 3332     case Op_UMaxV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinReductionV:
 3338     case Op_UMaxReductionV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3343         return false;
 3344       }
 3345       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       break;
 3349     case Op_MaxV:
 3350     case Op_MinV:
 3351       if (UseSSE < 4 && is_integral_type(bt)) {
 3352         return false;
 3353       }
 3354       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3355           // Float/Double intrinsics are enabled for AVX family currently.
 3356           if (UseAVX == 0) {
 3357             return false;
 3358           }
 3359           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3360             return false;
 3361           }
 3362       }
 3363       break;
 3364     case Op_CallLeafVector:
 3365       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3366         return false;
 3367       }
 3368       break;
 3369     case Op_AddReductionVI:
 3370       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3371         return false;
 3372       }
 3373       // fallthrough
 3374     case Op_AndReductionV:
 3375     case Op_OrReductionV:
 3376     case Op_XorReductionV:
 3377       if (is_subword_type(bt) && (UseSSE < 4)) {
 3378         return false;
 3379       }
 3380       break;
 3381     case Op_MinReductionV:
 3382     case Op_MaxReductionV:
 3383       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3384         return false;
 3385       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3386         return false;
 3387       }
 3388       // Float/Double intrinsics enabled for AVX family.
 3389       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3390         return false;
 3391       }
 3392       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3393         return false;
 3394       }
 3395       break;
 3396     case Op_VectorBlend:
 3397       if (UseAVX == 0 && size_in_bits < 128) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_VectorTest:
 3402       if (UseSSE < 4) {
 3403         return false; // Implementation limitation
 3404       } else if (size_in_bits < 32) {
 3405         return false; // Implementation limitation
 3406       }
 3407       break;
 3408     case Op_VectorLoadShuffle:
 3409     case Op_VectorRearrange:
 3410       if(vlen == 2) {
 3411         return false; // Implementation limitation due to how shuffle is loaded
 3412       } else if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       break;
 3416     case Op_VectorLoadMask:
 3417     case Op_VectorMaskCast:
 3418       if (size_in_bits == 256 && UseAVX < 2) {
 3419         return false; // Implementation limitation
 3420       }
 3421       // fallthrough
 3422     case Op_VectorStoreMask:
 3423       if (vlen == 2) {
 3424         return false; // Implementation limitation
 3425       }
 3426       break;
 3427     case Op_PopulateIndex:
 3428       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3429         return false;
 3430       }
 3431       break;
 3432     case Op_VectorCastB2X:
 3433     case Op_VectorCastS2X:
 3434     case Op_VectorCastI2X:
 3435       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3436         return false;
 3437       }
 3438       break;
 3439     case Op_VectorCastL2X:
 3440       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3441         return false;
 3442       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3443         return false;
 3444       }
 3445       break;
 3446     case Op_VectorCastF2X: {
 3447         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3448         // happen after intermediate conversion to integer and special handling
 3449         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3450         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3451         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3452           return false;
 3453         }
 3454       }
 3455       // fallthrough
 3456     case Op_VectorCastD2X:
 3457       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3458         return false;
 3459       }
 3460       break;
 3461     case Op_VectorCastF2HF:
 3462     case Op_VectorCastHF2F:
 3463       if (!VM_Version::supports_f16c() &&
 3464          ((!VM_Version::supports_evex() ||
 3465          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3466         return false;
 3467       }
 3468       break;
 3469     case Op_RoundVD:
 3470       if (!VM_Version::supports_avx512dq()) {
 3471         return false;
 3472       }
 3473       break;
 3474     case Op_MulReductionVI:
 3475       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3476         return false;
 3477       }
 3478       break;
 3479     case Op_LoadVectorGatherMasked:
 3480       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3481         return false;
 3482       }
 3483       if (is_subword_type(bt) &&
 3484          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3485           (size_in_bits < 64)                                      ||
 3486           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3487         return false;
 3488       }
 3489       break;
 3490     case Op_StoreVectorScatterMasked:
 3491     case Op_StoreVectorScatter:
 3492       if (is_subword_type(bt)) {
 3493         return false;
 3494       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3495         return false;
 3496       }
 3497       // fallthrough
 3498     case Op_LoadVectorGather:
 3499       if (!is_subword_type(bt) && size_in_bits == 64) {
 3500         return false;
 3501       }
 3502       if (is_subword_type(bt) && size_in_bits < 64) {
 3503         return false;
 3504       }
 3505       break;
 3506     case Op_SaturatingAddV:
 3507     case Op_SaturatingSubV:
 3508       if (UseAVX < 1) {
 3509         return false; // Implementation limitation
 3510       }
 3511       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3512         return false;
 3513       }
 3514       break;
 3515     case Op_SelectFromTwoVector:
 3516        if (size_in_bits < 128) {
 3517          return false;
 3518        }
 3519        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3520          return false;
 3521        }
 3522        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3523          return false;
 3524        }
 3525        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3526          return false;
 3527        }
 3528        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3529          return false;
 3530        }
 3531        break;
 3532     case Op_MaskAll:
 3533       if (!VM_Version::supports_evex()) {
 3534         return false;
 3535       }
 3536       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3537         return false;
 3538       }
 3539       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3540         return false;
 3541       }
 3542       break;
 3543     case Op_VectorMaskCmp:
 3544       if (vlen < 2 || size_in_bits < 32) {
 3545         return false;
 3546       }
 3547       break;
 3548     case Op_CompressM:
 3549       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3550         return false;
 3551       }
 3552       break;
 3553     case Op_CompressV:
 3554     case Op_ExpandV:
 3555       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3556         return false;
 3557       }
 3558       if (size_in_bits < 128 ) {
 3559         return false;
 3560       }
 3561     case Op_VectorLongToMask:
 3562       if (UseAVX < 1) {
 3563         return false;
 3564       }
 3565       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_SignumVD:
 3570     case Op_SignumVF:
 3571       if (UseAVX < 1) {
 3572         return false;
 3573       }
 3574       break;
 3575     case Op_PopCountVI:
 3576     case Op_PopCountVL: {
 3577         if (!is_pop_count_instr_target(bt) &&
 3578             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3579           return false;
 3580         }
 3581       }
 3582       break;
 3583     case Op_ReverseV:
 3584     case Op_ReverseBytesV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589     case Op_CountTrailingZerosV:
 3590     case Op_CountLeadingZerosV:
 3591       if (UseAVX < 2) {
 3592         return false;
 3593       }
 3594       break;
 3595   }
 3596   return true;  // Per default match rules are supported.
 3597 }
 3598 
 3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3600   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3601   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3602   // of their non-masked counterpart with mask edge being the differentiator.
 3603   // This routine does a strict check on the existence of masked operation patterns
 3604   // by returning a default false value for all the other opcodes apart from the
 3605   // ones whose masked instruction patterns are defined in this file.
 3606   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3607     return false;
 3608   }
 3609 
 3610   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3611   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3612     return false;
 3613   }
 3614   switch(opcode) {
 3615     // Unary masked operations
 3616     case Op_AbsVB:
 3617     case Op_AbsVS:
 3618       if(!VM_Version::supports_avx512bw()) {
 3619         return false;  // Implementation limitation
 3620       }
 3621     case Op_AbsVI:
 3622     case Op_AbsVL:
 3623       return true;
 3624 
 3625     // Ternary masked operations
 3626     case Op_FmaVF:
 3627     case Op_FmaVD:
 3628       return true;
 3629 
 3630     case Op_MacroLogicV:
 3631       if(bt != T_INT && bt != T_LONG) {
 3632         return false;
 3633       }
 3634       return true;
 3635 
 3636     // Binary masked operations
 3637     case Op_AddVB:
 3638     case Op_AddVS:
 3639     case Op_SubVB:
 3640     case Op_SubVS:
 3641     case Op_MulVS:
 3642     case Op_LShiftVS:
 3643     case Op_RShiftVS:
 3644     case Op_URShiftVS:
 3645       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3646       if (!VM_Version::supports_avx512bw()) {
 3647         return false;  // Implementation limitation
 3648       }
 3649       return true;
 3650 
 3651     case Op_MulVL:
 3652       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3653       if (!VM_Version::supports_avx512dq()) {
 3654         return false;  // Implementation limitation
 3655       }
 3656       return true;
 3657 
 3658     case Op_AndV:
 3659     case Op_OrV:
 3660     case Op_XorV:
 3661     case Op_RotateRightV:
 3662     case Op_RotateLeftV:
 3663       if (bt != T_INT && bt != T_LONG) {
 3664         return false; // Implementation limitation
 3665       }
 3666       return true;
 3667 
 3668     case Op_VectorLoadMask:
 3669       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3670       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3671         return false;
 3672       }
 3673       return true;
 3674 
 3675     case Op_AddVI:
 3676     case Op_AddVL:
 3677     case Op_AddVF:
 3678     case Op_AddVD:
 3679     case Op_SubVI:
 3680     case Op_SubVL:
 3681     case Op_SubVF:
 3682     case Op_SubVD:
 3683     case Op_MulVI:
 3684     case Op_MulVF:
 3685     case Op_MulVD:
 3686     case Op_DivVF:
 3687     case Op_DivVD:
 3688     case Op_SqrtVF:
 3689     case Op_SqrtVD:
 3690     case Op_LShiftVI:
 3691     case Op_LShiftVL:
 3692     case Op_RShiftVI:
 3693     case Op_RShiftVL:
 3694     case Op_URShiftVI:
 3695     case Op_URShiftVL:
 3696     case Op_LoadVectorMasked:
 3697     case Op_StoreVectorMasked:
 3698     case Op_LoadVectorGatherMasked:
 3699     case Op_StoreVectorScatterMasked:
 3700       return true;
 3701 
 3702     case Op_UMinV:
 3703     case Op_UMaxV:
 3704       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3705         return false;
 3706       } // fallthrough
 3707     case Op_MaxV:
 3708     case Op_MinV:
 3709       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3710         return false; // Implementation limitation
 3711       }
 3712       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       return true;
 3716     case Op_SaturatingAddV:
 3717     case Op_SaturatingSubV:
 3718       if (!is_subword_type(bt)) {
 3719         return false;
 3720       }
 3721       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorMaskCmp:
 3727       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731 
 3732     case Op_VectorRearrange:
 3733       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3734         return false; // Implementation limitation
 3735       }
 3736       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3737         return false; // Implementation limitation
 3738       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3739         return false; // Implementation limitation
 3740       }
 3741       return true;
 3742 
 3743     // Binary Logical operations
 3744     case Op_AndVMask:
 3745     case Op_OrVMask:
 3746     case Op_XorVMask:
 3747       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3748         return false; // Implementation limitation
 3749       }
 3750       return true;
 3751 
 3752     case Op_PopCountVI:
 3753     case Op_PopCountVL:
 3754       if (!is_pop_count_instr_target(bt)) {
 3755         return false;
 3756       }
 3757       return true;
 3758 
 3759     case Op_MaskAll:
 3760       return true;
 3761 
 3762     case Op_CountLeadingZerosV:
 3763       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3764         return true;
 3765       }
 3766     default:
 3767       return false;
 3768   }
 3769 }
 3770 
 3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3772   return false;
 3773 }
 3774 
 3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3777   switch (elem_bt) {
 3778     case T_BYTE:  return false;
 3779     case T_SHORT: return !VM_Version::supports_avx512bw();
 3780     case T_INT:   return !VM_Version::supports_avx();
 3781     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3782     default:
 3783       ShouldNotReachHere();
 3784       return false;
 3785   }
 3786 }
 3787 
 3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3789   // Prefer predicate if the mask type is "TypeVectMask".
 3790   return vt->isa_vectmask() != nullptr;
 3791 }
 3792 
 3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3794   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3795   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3796   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3797       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3798     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3799     return new legVecZOper();
 3800   }
 3801   if (legacy) {
 3802     switch (ideal_reg) {
 3803       case Op_VecS: return new legVecSOper();
 3804       case Op_VecD: return new legVecDOper();
 3805       case Op_VecX: return new legVecXOper();
 3806       case Op_VecY: return new legVecYOper();
 3807       case Op_VecZ: return new legVecZOper();
 3808     }
 3809   } else {
 3810     switch (ideal_reg) {
 3811       case Op_VecS: return new vecSOper();
 3812       case Op_VecD: return new vecDOper();
 3813       case Op_VecX: return new vecXOper();
 3814       case Op_VecY: return new vecYOper();
 3815       case Op_VecZ: return new vecZOper();
 3816     }
 3817   }
 3818   ShouldNotReachHere();
 3819   return nullptr;
 3820 }
 3821 
 3822 bool Matcher::is_reg2reg_move(MachNode* m) {
 3823   switch (m->rule()) {
 3824     case MoveVec2Leg_rule:
 3825     case MoveLeg2Vec_rule:
 3826     case MoveF2VL_rule:
 3827     case MoveF2LEG_rule:
 3828     case MoveVL2F_rule:
 3829     case MoveLEG2F_rule:
 3830     case MoveD2VL_rule:
 3831     case MoveD2LEG_rule:
 3832     case MoveVL2D_rule:
 3833     case MoveLEG2D_rule:
 3834       return true;
 3835     default:
 3836       return false;
 3837   }
 3838 }
 3839 
 3840 bool Matcher::is_generic_vector(MachOper* opnd) {
 3841   switch (opnd->opcode()) {
 3842     case VEC:
 3843     case LEGVEC:
 3844       return true;
 3845     default:
 3846       return false;
 3847   }
 3848 }
 3849 
 3850 //------------------------------------------------------------------------
 3851 
 3852 const RegMask* Matcher::predicate_reg_mask(void) {
 3853   return &_VECTMASK_REG_mask;
 3854 }
 3855 
 3856 // Max vector size in bytes. 0 if not supported.
 3857 int Matcher::vector_width_in_bytes(BasicType bt) {
 3858   assert(is_java_primitive(bt), "only primitive type vectors");
 3859   // SSE2 supports 128bit vectors for all types.
 3860   // AVX2 supports 256bit vectors for all types.
 3861   // AVX2/EVEX supports 512bit vectors for all types.
 3862   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3863   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3864   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3865     size = (UseAVX > 2) ? 64 : 32;
 3866   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3867     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3868   // Use flag to limit vector size.
 3869   size = MIN2(size,(int)MaxVectorSize);
 3870   // Minimum 2 values in vector (or 4 for bytes).
 3871   switch (bt) {
 3872   case T_DOUBLE:
 3873   case T_LONG:
 3874     if (size < 16) return 0;
 3875     break;
 3876   case T_FLOAT:
 3877   case T_INT:
 3878     if (size < 8) return 0;
 3879     break;
 3880   case T_BOOLEAN:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_CHAR:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_BYTE:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_SHORT:
 3890     if (size < 4) return 0;
 3891     break;
 3892   default:
 3893     ShouldNotReachHere();
 3894   }
 3895   return size;
 3896 }
 3897 
 3898 // Limits on vector size (number of elements) loaded into vector.
 3899 int Matcher::max_vector_size(const BasicType bt) {
 3900   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3901 }
 3902 int Matcher::min_vector_size(const BasicType bt) {
 3903   int max_size = max_vector_size(bt);
 3904   // Min size which can be loaded into vector is 4 bytes.
 3905   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3906   // Support for calling svml double64 vectors
 3907   if (bt == T_DOUBLE) {
 3908     size = 1;
 3909   }
 3910   return MIN2(size,max_size);
 3911 }
 3912 
 3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3914   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3915   // by default on Cascade Lake
 3916   if (VM_Version::is_default_intel_cascade_lake()) {
 3917     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3918   }
 3919   return Matcher::max_vector_size(bt);
 3920 }
 3921 
 3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3923   return -1;
 3924 }
 3925 
 3926 // Vector ideal reg corresponding to specified size in bytes
 3927 uint Matcher::vector_ideal_reg(int size) {
 3928   assert(MaxVectorSize >= size, "");
 3929   switch(size) {
 3930     case  4: return Op_VecS;
 3931     case  8: return Op_VecD;
 3932     case 16: return Op_VecX;
 3933     case 32: return Op_VecY;
 3934     case 64: return Op_VecZ;
 3935   }
 3936   ShouldNotReachHere();
 3937   return 0;
 3938 }
 3939 
 3940 // Check for shift by small constant as well
 3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3942   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3943       shift->in(2)->get_int() <= 3 &&
 3944       // Are there other uses besides address expressions?
 3945       !matcher->is_visited(shift)) {
 3946     address_visited.set(shift->_idx); // Flag as address_visited
 3947     mstack.push(shift->in(2), Matcher::Visit);
 3948     Node *conv = shift->in(1);
 3949     // Allow Matcher to match the rule which bypass
 3950     // ConvI2L operation for an array index on LP64
 3951     // if the index value is positive.
 3952     if (conv->Opcode() == Op_ConvI2L &&
 3953         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3954         // Are there other uses besides address expressions?
 3955         !matcher->is_visited(conv)) {
 3956       address_visited.set(conv->_idx); // Flag as address_visited
 3957       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3958     } else {
 3959       mstack.push(conv, Matcher::Pre_Visit);
 3960     }
 3961     return true;
 3962   }
 3963   return false;
 3964 }
 3965 
 3966 // This function identifies sub-graphs in which a 'load' node is
 3967 // input to two different nodes, and such that it can be matched
 3968 // with BMI instructions like blsi, blsr, etc.
 3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3971 // refers to the same node.
 3972 //
 3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3974 // This is a temporary solution until we make DAGs expressible in ADL.
 3975 template<typename ConType>
 3976 class FusedPatternMatcher {
 3977   Node* _op1_node;
 3978   Node* _mop_node;
 3979   int _con_op;
 3980 
 3981   static int match_next(Node* n, int next_op, int next_op_idx) {
 3982     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3983       return -1;
 3984     }
 3985 
 3986     if (next_op_idx == -1) { // n is commutative, try rotations
 3987       if (n->in(1)->Opcode() == next_op) {
 3988         return 1;
 3989       } else if (n->in(2)->Opcode() == next_op) {
 3990         return 2;
 3991       }
 3992     } else {
 3993       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3994       if (n->in(next_op_idx)->Opcode() == next_op) {
 3995         return next_op_idx;
 3996       }
 3997     }
 3998     return -1;
 3999   }
 4000 
 4001  public:
 4002   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4003     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4004 
 4005   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4006              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4007              typename ConType::NativeType con_value) {
 4008     if (_op1_node->Opcode() != op1) {
 4009       return false;
 4010     }
 4011     if (_mop_node->outcnt() > 2) {
 4012       return false;
 4013     }
 4014     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4015     if (op1_op2_idx == -1) {
 4016       return false;
 4017     }
 4018     // Memory operation must be the other edge
 4019     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4020 
 4021     // Check that the mop node is really what we want
 4022     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4023       Node* op2_node = _op1_node->in(op1_op2_idx);
 4024       if (op2_node->outcnt() > 1) {
 4025         return false;
 4026       }
 4027       assert(op2_node->Opcode() == op2, "Should be");
 4028       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4029       if (op2_con_idx == -1) {
 4030         return false;
 4031       }
 4032       // Memory operation must be the other edge
 4033       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4034       // Check that the memory operation is the same node
 4035       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4036         // Now check the constant
 4037         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4038         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4039           return true;
 4040         }
 4041       }
 4042     }
 4043     return false;
 4044   }
 4045 };
 4046 
 4047 static bool is_bmi_pattern(Node* n, Node* m) {
 4048   assert(UseBMI1Instructions, "sanity");
 4049   if (n != nullptr && m != nullptr) {
 4050     if (m->Opcode() == Op_LoadI) {
 4051       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4052       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4053              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4054              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4055     } else if (m->Opcode() == Op_LoadL) {
 4056       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4057       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4058              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4059              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4060     }
 4061   }
 4062   return false;
 4063 }
 4064 
 4065 // Should the matcher clone input 'm' of node 'n'?
 4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4067   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4068   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4069     mstack.push(m, Visit);
 4070     return true;
 4071   }
 4072   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4073     mstack.push(m, Visit);           // m = ShiftCntV
 4074     return true;
 4075   }
 4076   if (is_encode_and_store_pattern(n, m)) {
 4077     mstack.push(m, Visit);
 4078     return true;
 4079   }
 4080   return false;
 4081 }
 4082 
 4083 // Should the Matcher clone shifts on addressing modes, expecting them
 4084 // to be subsumed into complex addressing expressions or compute them
 4085 // into registers?
 4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4087   Node *off = m->in(AddPNode::Offset);
 4088   if (off->is_Con()) {
 4089     address_visited.test_set(m->_idx); // Flag as address_visited
 4090     Node *adr = m->in(AddPNode::Address);
 4091 
 4092     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4093     // AtomicAdd is not an addressing expression.
 4094     // Cheap to find it by looking for screwy base.
 4095     if (adr->is_AddP() &&
 4096         !adr->in(AddPNode::Base)->is_top() &&
 4097         !adr->in(AddPNode::Offset)->is_Con() &&
 4098         off->get_long() == (int) (off->get_long()) && // immL32
 4099         // Are there other uses besides address expressions?
 4100         !is_visited(adr)) {
 4101       address_visited.set(adr->_idx); // Flag as address_visited
 4102       Node *shift = adr->in(AddPNode::Offset);
 4103       if (!clone_shift(shift, this, mstack, address_visited)) {
 4104         mstack.push(shift, Pre_Visit);
 4105       }
 4106       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4107       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4108     } else {
 4109       mstack.push(adr, Pre_Visit);
 4110     }
 4111 
 4112     // Clone X+offset as it also folds into most addressing expressions
 4113     mstack.push(off, Visit);
 4114     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4115     return true;
 4116   } else if (clone_shift(off, this, mstack, address_visited)) {
 4117     address_visited.test_set(m->_idx); // Flag as address_visited
 4118     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4119     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4120     return true;
 4121   }
 4122   return false;
 4123 }
 4124 
 4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4126   switch (bt) {
 4127     case BoolTest::eq:
 4128       return Assembler::eq;
 4129     case BoolTest::ne:
 4130       return Assembler::neq;
 4131     case BoolTest::le:
 4132     case BoolTest::ule:
 4133       return Assembler::le;
 4134     case BoolTest::ge:
 4135     case BoolTest::uge:
 4136       return Assembler::nlt;
 4137     case BoolTest::lt:
 4138     case BoolTest::ult:
 4139       return Assembler::lt;
 4140     case BoolTest::gt:
 4141     case BoolTest::ugt:
 4142       return Assembler::nle;
 4143     default : ShouldNotReachHere(); return Assembler::_false;
 4144   }
 4145 }
 4146 
 4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4148   switch (bt) {
 4149   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4150   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4151   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4152   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4153   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4154   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4155   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4156   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4157   }
 4158 }
 4159 
 4160 // Helper methods for MachSpillCopyNode::implementation().
 4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4162                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4163   assert(ireg == Op_VecS || // 32bit vector
 4164          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4165           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4166          "no non-adjacent vector moves" );
 4167   if (masm) {
 4168     switch (ireg) {
 4169     case Op_VecS: // copy whole register
 4170     case Op_VecD:
 4171     case Op_VecX:
 4172       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4173         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4174       } else {
 4175         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4176      }
 4177       break;
 4178     case Op_VecY:
 4179       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4180         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4181       } else {
 4182         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4183      }
 4184       break;
 4185     case Op_VecZ:
 4186       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4187       break;
 4188     default:
 4189       ShouldNotReachHere();
 4190     }
 4191 #ifndef PRODUCT
 4192   } else {
 4193     switch (ireg) {
 4194     case Op_VecS:
 4195     case Op_VecD:
 4196     case Op_VecX:
 4197       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4198       break;
 4199     case Op_VecY:
 4200     case Op_VecZ:
 4201       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4202       break;
 4203     default:
 4204       ShouldNotReachHere();
 4205     }
 4206 #endif
 4207   }
 4208 }
 4209 
 4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4211                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4212   if (masm) {
 4213     if (is_load) {
 4214       switch (ireg) {
 4215       case Op_VecS:
 4216         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4217         break;
 4218       case Op_VecD:
 4219         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecX:
 4222         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4223           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4224         } else {
 4225           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4226           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4227         }
 4228         break;
 4229       case Op_VecY:
 4230         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4231           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4232         } else {
 4233           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4234           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4235         }
 4236         break;
 4237       case Op_VecZ:
 4238         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4239         break;
 4240       default:
 4241         ShouldNotReachHere();
 4242       }
 4243     } else { // store
 4244       switch (ireg) {
 4245       case Op_VecS:
 4246         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4247         break;
 4248       case Op_VecD:
 4249         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecX:
 4252         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4253           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4254         }
 4255         else {
 4256           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4257         }
 4258         break;
 4259       case Op_VecY:
 4260         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4261           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4262         }
 4263         else {
 4264           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4265         }
 4266         break;
 4267       case Op_VecZ:
 4268         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4269         break;
 4270       default:
 4271         ShouldNotReachHere();
 4272       }
 4273     }
 4274 #ifndef PRODUCT
 4275   } else {
 4276     if (is_load) {
 4277       switch (ireg) {
 4278       case Op_VecS:
 4279         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecD:
 4282         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284        case Op_VecX:
 4285         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287       case Op_VecY:
 4288       case Op_VecZ:
 4289         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4290         break;
 4291       default:
 4292         ShouldNotReachHere();
 4293       }
 4294     } else { // store
 4295       switch (ireg) {
 4296       case Op_VecS:
 4297         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecD:
 4300         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302        case Op_VecX:
 4303         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305       case Op_VecY:
 4306       case Op_VecZ:
 4307         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4308         break;
 4309       default:
 4310         ShouldNotReachHere();
 4311       }
 4312     }
 4313 #endif
 4314   }
 4315 }
 4316 
 4317 template <class T>
 4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4319   int size = type2aelembytes(bt) * len;
 4320   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4321   for (int i = 0; i < len; i++) {
 4322     int offset = i * type2aelembytes(bt);
 4323     switch (bt) {
 4324       case T_BYTE: val->at(i) = con; break;
 4325       case T_SHORT: {
 4326         jshort c = con;
 4327         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4328         break;
 4329       }
 4330       case T_INT: {
 4331         jint c = con;
 4332         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4333         break;
 4334       }
 4335       case T_LONG: {
 4336         jlong c = con;
 4337         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4338         break;
 4339       }
 4340       case T_FLOAT: {
 4341         jfloat c = con;
 4342         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4343         break;
 4344       }
 4345       case T_DOUBLE: {
 4346         jdouble c = con;
 4347         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4348         break;
 4349       }
 4350       default: assert(false, "%s", type2name(bt));
 4351     }
 4352   }
 4353   return val;
 4354 }
 4355 
 4356 static inline jlong high_bit_set(BasicType bt) {
 4357   switch (bt) {
 4358     case T_BYTE:  return 0x8080808080808080;
 4359     case T_SHORT: return 0x8000800080008000;
 4360     case T_INT:   return 0x8000000080000000;
 4361     case T_LONG:  return 0x8000000000000000;
 4362     default:
 4363       ShouldNotReachHere();
 4364       return 0;
 4365   }
 4366 }
 4367 
 4368 #ifndef PRODUCT
 4369   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4370     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4371   }
 4372 #endif
 4373 
 4374   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4375     __ nop(_count);
 4376   }
 4377 
 4378   uint MachNopNode::size(PhaseRegAlloc*) const {
 4379     return _count;
 4380   }
 4381 
 4382 #ifndef PRODUCT
 4383   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4384     st->print("# breakpoint");
 4385   }
 4386 #endif
 4387 
 4388   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4389     __ int3();
 4390   }
 4391 
 4392   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4393     return MachNode::size(ra_);
 4394   }
 4395 
 4396 %}
 4397 
 4398 //----------ENCODING BLOCK-----------------------------------------------------
 4399 // This block specifies the encoding classes used by the compiler to
 4400 // output byte streams.  Encoding classes are parameterized macros
 4401 // used by Machine Instruction Nodes in order to generate the bit
 4402 // encoding of the instruction.  Operands specify their base encoding
 4403 // interface with the interface keyword.  There are currently
 4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4405 // COND_INTER.  REG_INTER causes an operand to generate a function
 4406 // which returns its register number when queried.  CONST_INTER causes
 4407 // an operand to generate a function which returns the value of the
 4408 // constant when queried.  MEMORY_INTER causes an operand to generate
 4409 // four functions which return the Base Register, the Index Register,
 4410 // the Scale Value, and the Offset Value of the operand when queried.
 4411 // COND_INTER causes an operand to generate six functions which return
 4412 // the encoding code (ie - encoding bits for the instruction)
 4413 // associated with each basic boolean condition for a conditional
 4414 // instruction.
 4415 //
 4416 // Instructions specify two basic values for encoding.  Again, a
 4417 // function is available to check if the constant displacement is an
 4418 // oop. They use the ins_encode keyword to specify their encoding
 4419 // classes (which must be a sequence of enc_class names, and their
 4420 // parameters, specified in the encoding block), and they use the
 4421 // opcode keyword to specify, in order, their primary, secondary, and
 4422 // tertiary opcode.  Only the opcode sections which a particular
 4423 // instruction needs for encoding need to be specified.
 4424 encode %{
 4425   enc_class cdql_enc(no_rax_rdx_RegI div)
 4426   %{
 4427     // Full implementation of Java idiv and irem; checks for
 4428     // special case as described in JVM spec., p.243 & p.271.
 4429     //
 4430     //         normal case                           special case
 4431     //
 4432     // input : rax: dividend                         min_int
 4433     //         reg: divisor                          -1
 4434     //
 4435     // output: rax: quotient  (= rax idiv reg)       min_int
 4436     //         rdx: remainder (= rax irem reg)       0
 4437     //
 4438     //  Code sequnce:
 4439     //
 4440     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4441     //    5:   75 07/08                jne    e <normal>
 4442     //    7:   33 d2                   xor    %edx,%edx
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4446     //    c:   74 03/04                je     11 <done>
 4447     // 000000000000000e <normal>:
 4448     //    e:   99                      cltd
 4449     //  [div >= 8 -> offset + 1]
 4450     //  [REX_B]
 4451     //    f:   f7 f9                   idiv   $div
 4452     // 0000000000000011 <done>:
 4453     Label normal;
 4454     Label done;
 4455 
 4456     // cmp    $0x80000000,%eax
 4457     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4458 
 4459     // jne    e <normal>
 4460     __ jccb(Assembler::notEqual, normal);
 4461 
 4462     // xor    %edx,%edx
 4463     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4464 
 4465     // cmp    $0xffffffffffffffff,%ecx
 4466     __ cmpl($div$$Register, -1);
 4467 
 4468     // je     11 <done>
 4469     __ jccb(Assembler::equal, done);
 4470 
 4471     // <normal>
 4472     // cltd
 4473     __ bind(normal);
 4474     __ cdql();
 4475 
 4476     // idivl
 4477     // <done>
 4478     __ idivl($div$$Register);
 4479     __ bind(done);
 4480   %}
 4481 
 4482   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4483   %{
 4484     // Full implementation of Java ldiv and lrem; checks for
 4485     // special case as described in JVM spec., p.243 & p.271.
 4486     //
 4487     //         normal case                           special case
 4488     //
 4489     // input : rax: dividend                         min_long
 4490     //         reg: divisor                          -1
 4491     //
 4492     // output: rax: quotient  (= rax idiv reg)       min_long
 4493     //         rdx: remainder (= rax irem reg)       0
 4494     //
 4495     //  Code sequnce:
 4496     //
 4497     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4498     //    7:   00 00 80
 4499     //    a:   48 39 d0                cmp    %rdx,%rax
 4500     //    d:   75 08                   jne    17 <normal>
 4501     //    f:   33 d2                   xor    %edx,%edx
 4502     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4503     //   15:   74 05                   je     1c <done>
 4504     // 0000000000000017 <normal>:
 4505     //   17:   48 99                   cqto
 4506     //   19:   48 f7 f9                idiv   $div
 4507     // 000000000000001c <done>:
 4508     Label normal;
 4509     Label done;
 4510 
 4511     // mov    $0x8000000000000000,%rdx
 4512     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4513 
 4514     // cmp    %rdx,%rax
 4515     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4516 
 4517     // jne    17 <normal>
 4518     __ jccb(Assembler::notEqual, normal);
 4519 
 4520     // xor    %edx,%edx
 4521     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4522 
 4523     // cmp    $0xffffffffffffffff,$div
 4524     __ cmpq($div$$Register, -1);
 4525 
 4526     // je     1e <done>
 4527     __ jccb(Assembler::equal, done);
 4528 
 4529     // <normal>
 4530     // cqto
 4531     __ bind(normal);
 4532     __ cdqq();
 4533 
 4534     // idivq (note: must be emitted by the user of this rule)
 4535     // <done>
 4536     __ idivq($div$$Register);
 4537     __ bind(done);
 4538   %}
 4539 
 4540   enc_class clear_avx %{
 4541     DEBUG_ONLY(int off0 = __ offset());
 4542     if (generate_vzeroupper(Compile::current())) {
 4543       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4544       // Clear upper bits of YMM registers when current compiled code uses
 4545       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4546       __ vzeroupper();
 4547     }
 4548     DEBUG_ONLY(int off1 = __ offset());
 4549     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4550   %}
 4551 
 4552   enc_class Java_To_Runtime(method meth) %{
 4553     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4554     __ call(r10);
 4555     __ post_call_nop();
 4556   %}
 4557 
 4558   enc_class Java_Static_Call(method meth)
 4559   %{
 4560     // JAVA STATIC CALL
 4561     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4562     // determine who we intended to call.
 4563     if (!_method) {
 4564       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4565     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4566       // The NOP here is purely to ensure that eliding a call to
 4567       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4568       __ addr_nop_5();
 4569       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4570     } else {
 4571       int method_index = resolved_method_index(masm);
 4572       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4573                                                   : static_call_Relocation::spec(method_index);
 4574       address mark = __ pc();
 4575       int call_offset = __ offset();
 4576       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4577       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4578         // Calls of the same statically bound method can share
 4579         // a stub to the interpreter.
 4580         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4581       } else {
 4582         // Emit stubs for static call.
 4583         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4584         __ clear_inst_mark();
 4585         if (stub == nullptr) {
 4586           ciEnv::current()->record_failure("CodeCache is full");
 4587           return;
 4588         }
 4589       }
 4590     }
 4591     __ post_call_nop();
 4592   %}
 4593 
 4594   enc_class Java_Dynamic_Call(method meth) %{
 4595     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class call_epilog %{
 4600     if (VerifyStackAtCalls) {
 4601       // Check that stack depth is unchanged: find majik cookie on stack
 4602       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4603       Label L;
 4604       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4605       __ jccb(Assembler::equal, L);
 4606       // Die if stack mismatch
 4607       __ int3();
 4608       __ bind(L);
 4609     }
 4610   %}
 4611 
 4612 %}
 4613 
 4614 //----------FRAME--------------------------------------------------------------
 4615 // Definition of frame structure and management information.
 4616 //
 4617 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4618 //                             |   (to get allocators register number
 4619 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4620 //  r   CALLER     |        |
 4621 //  o     |        +--------+      pad to even-align allocators stack-slot
 4622 //  w     V        |  pad0  |        numbers; owned by CALLER
 4623 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4624 //  h     ^        |   in   |  5
 4625 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4626 //  |     |        |        |  3
 4627 //  |     |        +--------+
 4628 //  V     |        | old out|      Empty on Intel, window on Sparc
 4629 //        |    old |preserve|      Must be even aligned.
 4630 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4631 //        |        |   in   |  3   area for Intel ret address
 4632 //     Owned by    |preserve|      Empty on Sparc.
 4633 //       SELF      +--------+
 4634 //        |        |  pad2  |  2   pad to align old SP
 4635 //        |        +--------+  1
 4636 //        |        | locks  |  0
 4637 //        |        +--------+----> OptoReg::stack0(), even aligned
 4638 //        |        |  pad1  | 11   pad to align new SP
 4639 //        |        +--------+
 4640 //        |        |        | 10
 4641 //        |        | spills |  9   spills
 4642 //        V        |        |  8   (pad0 slot for callee)
 4643 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4644 //        ^        |  out   |  7
 4645 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4646 //     Owned by    +--------+
 4647 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4648 //        |    new |preserve|      Must be even-aligned.
 4649 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4650 //        |        |        |
 4651 //
 4652 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4653 //         known from SELF's arguments and the Java calling convention.
 4654 //         Region 6-7 is determined per call site.
 4655 // Note 2: If the calling convention leaves holes in the incoming argument
 4656 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4657 //         are owned by the CALLEE.  Holes should not be necessary in the
 4658 //         incoming area, as the Java calling convention is completely under
 4659 //         the control of the AD file.  Doubles can be sorted and packed to
 4660 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4661 //         varargs C calling conventions.
 4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4663 //         even aligned with pad0 as needed.
 4664 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4665 //         region 6-11 is even aligned; it may be padded out more so that
 4666 //         the region from SP to FP meets the minimum stack alignment.
 4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4668 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4669 //         SP meets the minimum alignment.
 4670 
 4671 frame
 4672 %{
 4673   // These three registers define part of the calling convention
 4674   // between compiled code and the interpreter.
 4675   inline_cache_reg(RAX);                // Inline Cache Register
 4676 
 4677   // Optional: name the operand used by cisc-spilling to access
 4678   // [stack_pointer + offset]
 4679   cisc_spilling_operand_name(indOffset32);
 4680 
 4681   // Number of stack slots consumed by locking an object
 4682   sync_stack_slots(2);
 4683 
 4684   // Compiled code's Frame Pointer
 4685   frame_pointer(RSP);
 4686 
 4687   // Stack alignment requirement
 4688   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4689 
 4690   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4691   // for calls to C.  Supports the var-args backing area for register parms.
 4692   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4693 
 4694   // The after-PROLOG location of the return address.  Location of
 4695   // return address specifies a type (REG or STACK) and a number
 4696   // representing the register number (i.e. - use a register name) or
 4697   // stack slot.
 4698   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4699   // Otherwise, it is above the locks and verification slot and alignment word
 4700   return_addr(STACK - 2 +
 4701               align_up((Compile::current()->in_preserve_stack_slots() +
 4702                         Compile::current()->fixed_slots()),
 4703                        stack_alignment_in_slots()));
 4704 
 4705   // Location of compiled Java return values.  Same as C for now.
 4706   return_value
 4707   %{
 4708     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4709            "only return normal values");
 4710 
 4711     static const int lo[Op_RegL + 1] = {
 4712       0,
 4713       0,
 4714       RAX_num,  // Op_RegN
 4715       RAX_num,  // Op_RegI
 4716       RAX_num,  // Op_RegP
 4717       XMM0_num, // Op_RegF
 4718       XMM0_num, // Op_RegD
 4719       RAX_num   // Op_RegL
 4720     };
 4721     static const int hi[Op_RegL + 1] = {
 4722       0,
 4723       0,
 4724       OptoReg::Bad, // Op_RegN
 4725       OptoReg::Bad, // Op_RegI
 4726       RAX_H_num,    // Op_RegP
 4727       OptoReg::Bad, // Op_RegF
 4728       XMM0b_num,    // Op_RegD
 4729       RAX_H_num     // Op_RegL
 4730     };
 4731     // Excluded flags and vector registers.
 4732     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4733     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4734   %}
 4735 %}
 4736 
 4737 //----------ATTRIBUTES---------------------------------------------------------
 4738 //----------Operand Attributes-------------------------------------------------
 4739 op_attrib op_cost(0);        // Required cost attribute
 4740 
 4741 //----------Instruction Attributes---------------------------------------------
 4742 ins_attrib ins_cost(100);       // Required cost attribute
 4743 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4745                                 // a non-matching short branch variant
 4746                                 // of some long branch?
 4747 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4748                                 // be a power of 2) specifies the
 4749                                 // alignment that some part of the
 4750                                 // instruction (not necessarily the
 4751                                 // start) requires.  If > 1, a
 4752                                 // compute_padding() function must be
 4753                                 // provided for the instruction
 4754 
 4755 // Whether this node is expanded during code emission into a sequence of
 4756 // instructions and the first instruction can perform an implicit null check.
 4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4758 
 4759 //----------OPERANDS-----------------------------------------------------------
 4760 // Operand definitions must precede instruction definitions for correct parsing
 4761 // in the ADLC because operands constitute user defined types which are used in
 4762 // instruction definitions.
 4763 
 4764 //----------Simple Operands----------------------------------------------------
 4765 // Immediate Operands
 4766 // Integer Immediate
 4767 operand immI()
 4768 %{
 4769   match(ConI);
 4770 
 4771   op_cost(10);
 4772   format %{ %}
 4773   interface(CONST_INTER);
 4774 %}
 4775 
 4776 // Constant for test vs zero
 4777 operand immI_0()
 4778 %{
 4779   predicate(n->get_int() == 0);
 4780   match(ConI);
 4781 
 4782   op_cost(0);
 4783   format %{ %}
 4784   interface(CONST_INTER);
 4785 %}
 4786 
 4787 // Constant for increment
 4788 operand immI_1()
 4789 %{
 4790   predicate(n->get_int() == 1);
 4791   match(ConI);
 4792 
 4793   op_cost(0);
 4794   format %{ %}
 4795   interface(CONST_INTER);
 4796 %}
 4797 
 4798 // Constant for decrement
 4799 operand immI_M1()
 4800 %{
 4801   predicate(n->get_int() == -1);
 4802   match(ConI);
 4803 
 4804   op_cost(0);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 operand immI_2()
 4810 %{
 4811   predicate(n->get_int() == 2);
 4812   match(ConI);
 4813 
 4814   op_cost(0);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 operand immI_4()
 4820 %{
 4821   predicate(n->get_int() == 4);
 4822   match(ConI);
 4823 
 4824   op_cost(0);
 4825   format %{ %}
 4826   interface(CONST_INTER);
 4827 %}
 4828 
 4829 operand immI_8()
 4830 %{
 4831   predicate(n->get_int() == 8);
 4832   match(ConI);
 4833 
 4834   op_cost(0);
 4835   format %{ %}
 4836   interface(CONST_INTER);
 4837 %}
 4838 
 4839 // Valid scale values for addressing modes
 4840 operand immI2()
 4841 %{
 4842   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4843   match(ConI);
 4844 
 4845   format %{ %}
 4846   interface(CONST_INTER);
 4847 %}
 4848 
 4849 operand immU7()
 4850 %{
 4851   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4852   match(ConI);
 4853 
 4854   op_cost(5);
 4855   format %{ %}
 4856   interface(CONST_INTER);
 4857 %}
 4858 
 4859 operand immI8()
 4860 %{
 4861   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4862   match(ConI);
 4863 
 4864   op_cost(5);
 4865   format %{ %}
 4866   interface(CONST_INTER);
 4867 %}
 4868 
 4869 operand immU8()
 4870 %{
 4871   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4872   match(ConI);
 4873 
 4874   op_cost(5);
 4875   format %{ %}
 4876   interface(CONST_INTER);
 4877 %}
 4878 
 4879 operand immI16()
 4880 %{
 4881   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4882   match(ConI);
 4883 
 4884   op_cost(10);
 4885   format %{ %}
 4886   interface(CONST_INTER);
 4887 %}
 4888 
 4889 // Int Immediate non-negative
 4890 operand immU31()
 4891 %{
 4892   predicate(n->get_int() >= 0);
 4893   match(ConI);
 4894 
 4895   op_cost(0);
 4896   format %{ %}
 4897   interface(CONST_INTER);
 4898 %}
 4899 
 4900 // Pointer Immediate
 4901 operand immP()
 4902 %{
 4903   match(ConP);
 4904 
 4905   op_cost(10);
 4906   format %{ %}
 4907   interface(CONST_INTER);
 4908 %}
 4909 
 4910 // Null Pointer Immediate
 4911 operand immP0()
 4912 %{
 4913   predicate(n->get_ptr() == 0);
 4914   match(ConP);
 4915 
 4916   op_cost(5);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 // Pointer Immediate
 4922 operand immN() %{
 4923   match(ConN);
 4924 
 4925   op_cost(10);
 4926   format %{ %}
 4927   interface(CONST_INTER);
 4928 %}
 4929 
 4930 operand immNKlass() %{
 4931   match(ConNKlass);
 4932 
 4933   op_cost(10);
 4934   format %{ %}
 4935   interface(CONST_INTER);
 4936 %}
 4937 
 4938 // Null Pointer Immediate
 4939 operand immN0() %{
 4940   predicate(n->get_narrowcon() == 0);
 4941   match(ConN);
 4942 
 4943   op_cost(5);
 4944   format %{ %}
 4945   interface(CONST_INTER);
 4946 %}
 4947 
 4948 operand immP31()
 4949 %{
 4950   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4951             && (n->get_ptr() >> 31) == 0);
 4952   match(ConP);
 4953 
 4954   op_cost(5);
 4955   format %{ %}
 4956   interface(CONST_INTER);
 4957 %}
 4958 
 4959 
 4960 // Long Immediate
 4961 operand immL()
 4962 %{
 4963   match(ConL);
 4964 
 4965   op_cost(20);
 4966   format %{ %}
 4967   interface(CONST_INTER);
 4968 %}
 4969 
 4970 // Long Immediate 8-bit
 4971 operand immL8()
 4972 %{
 4973   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4974   match(ConL);
 4975 
 4976   op_cost(5);
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Long Immediate 32-bit unsigned
 4982 operand immUL32()
 4983 %{
 4984   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4985   match(ConL);
 4986 
 4987   op_cost(10);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 // Long Immediate 32-bit signed
 4993 operand immL32()
 4994 %{
 4995   predicate(n->get_long() == (int) (n->get_long()));
 4996   match(ConL);
 4997 
 4998   op_cost(15);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 operand immL_Pow2()
 5004 %{
 5005   predicate(is_power_of_2((julong)n->get_long()));
 5006   match(ConL);
 5007 
 5008   op_cost(15);
 5009   format %{ %}
 5010   interface(CONST_INTER);
 5011 %}
 5012 
 5013 operand immL_NotPow2()
 5014 %{
 5015   predicate(is_power_of_2((julong)~n->get_long()));
 5016   match(ConL);
 5017 
 5018   op_cost(15);
 5019   format %{ %}
 5020   interface(CONST_INTER);
 5021 %}
 5022 
 5023 // Long Immediate zero
 5024 operand immL0()
 5025 %{
 5026   predicate(n->get_long() == 0L);
 5027   match(ConL);
 5028 
 5029   op_cost(10);
 5030   format %{ %}
 5031   interface(CONST_INTER);
 5032 %}
 5033 
 5034 // Constant for increment
 5035 operand immL1()
 5036 %{
 5037   predicate(n->get_long() == 1);
 5038   match(ConL);
 5039 
 5040   format %{ %}
 5041   interface(CONST_INTER);
 5042 %}
 5043 
 5044 // Constant for decrement
 5045 operand immL_M1()
 5046 %{
 5047   predicate(n->get_long() == -1);
 5048   match(ConL);
 5049 
 5050   format %{ %}
 5051   interface(CONST_INTER);
 5052 %}
 5053 
 5054 // Long Immediate: low 32-bit mask
 5055 operand immL_32bits()
 5056 %{
 5057   predicate(n->get_long() == 0xFFFFFFFFL);
 5058   match(ConL);
 5059   op_cost(20);
 5060 
 5061   format %{ %}
 5062   interface(CONST_INTER);
 5063 %}
 5064 
 5065 // Int Immediate: 2^n-1, positive
 5066 operand immI_Pow2M1()
 5067 %{
 5068   predicate((n->get_int() > 0)
 5069             && is_power_of_2((juint)n->get_int() + 1));
 5070   match(ConI);
 5071 
 5072   op_cost(20);
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Float Immediate zero
 5078 operand immF0()
 5079 %{
 5080   predicate(jint_cast(n->getf()) == 0);
 5081   match(ConF);
 5082 
 5083   op_cost(5);
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Float Immediate
 5089 operand immF()
 5090 %{
 5091   match(ConF);
 5092 
 5093   op_cost(15);
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Half Float Immediate
 5099 operand immH()
 5100 %{
 5101   match(ConH);
 5102 
 5103   op_cost(15);
 5104   format %{ %}
 5105   interface(CONST_INTER);
 5106 %}
 5107 
 5108 // Double Immediate zero
 5109 operand immD0()
 5110 %{
 5111   predicate(jlong_cast(n->getd()) == 0);
 5112   match(ConD);
 5113 
 5114   op_cost(5);
 5115   format %{ %}
 5116   interface(CONST_INTER);
 5117 %}
 5118 
 5119 // Double Immediate
 5120 operand immD()
 5121 %{
 5122   match(ConD);
 5123 
 5124   op_cost(15);
 5125   format %{ %}
 5126   interface(CONST_INTER);
 5127 %}
 5128 
 5129 // Immediates for special shifts (sign extend)
 5130 
 5131 // Constants for increment
 5132 operand immI_16()
 5133 %{
 5134   predicate(n->get_int() == 16);
 5135   match(ConI);
 5136 
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 operand immI_24()
 5142 %{
 5143   predicate(n->get_int() == 24);
 5144   match(ConI);
 5145 
 5146   format %{ %}
 5147   interface(CONST_INTER);
 5148 %}
 5149 
 5150 // Constant for byte-wide masking
 5151 operand immI_255()
 5152 %{
 5153   predicate(n->get_int() == 255);
 5154   match(ConI);
 5155 
 5156   format %{ %}
 5157   interface(CONST_INTER);
 5158 %}
 5159 
 5160 // Constant for short-wide masking
 5161 operand immI_65535()
 5162 %{
 5163   predicate(n->get_int() == 65535);
 5164   match(ConI);
 5165 
 5166   format %{ %}
 5167   interface(CONST_INTER);
 5168 %}
 5169 
 5170 // Constant for byte-wide masking
 5171 operand immL_255()
 5172 %{
 5173   predicate(n->get_long() == 255);
 5174   match(ConL);
 5175 
 5176   format %{ %}
 5177   interface(CONST_INTER);
 5178 %}
 5179 
 5180 // Constant for short-wide masking
 5181 operand immL_65535()
 5182 %{
 5183   predicate(n->get_long() == 65535);
 5184   match(ConL);
 5185 
 5186   format %{ %}
 5187   interface(CONST_INTER);
 5188 %}
 5189 
 5190 // AOT Runtime Constants Address
 5191 operand immAOTRuntimeConstantsAddress()
 5192 %{
 5193   // Check if the address is in the range of AOT Runtime Constants
 5194   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5195   match(ConP);
 5196 
 5197   op_cost(0);
 5198   format %{ %}
 5199   interface(CONST_INTER);
 5200 %}
 5201 
 5202 operand kReg()
 5203 %{
 5204   constraint(ALLOC_IN_RC(vectmask_reg));
 5205   match(RegVectMask);
 5206   format %{%}
 5207   interface(REG_INTER);
 5208 %}
 5209 
 5210 // Register Operands
 5211 // Integer Register
 5212 operand rRegI()
 5213 %{
 5214   constraint(ALLOC_IN_RC(int_reg));
 5215   match(RegI);
 5216 
 5217   match(rax_RegI);
 5218   match(rbx_RegI);
 5219   match(rcx_RegI);
 5220   match(rdx_RegI);
 5221   match(rdi_RegI);
 5222 
 5223   format %{ %}
 5224   interface(REG_INTER);
 5225 %}
 5226 
 5227 // Special Registers
 5228 operand rax_RegI()
 5229 %{
 5230   constraint(ALLOC_IN_RC(int_rax_reg));
 5231   match(RegI);
 5232   match(rRegI);
 5233 
 5234   format %{ "RAX" %}
 5235   interface(REG_INTER);
 5236 %}
 5237 
 5238 // Special Registers
 5239 operand rbx_RegI()
 5240 %{
 5241   constraint(ALLOC_IN_RC(int_rbx_reg));
 5242   match(RegI);
 5243   match(rRegI);
 5244 
 5245   format %{ "RBX" %}
 5246   interface(REG_INTER);
 5247 %}
 5248 
 5249 operand rcx_RegI()
 5250 %{
 5251   constraint(ALLOC_IN_RC(int_rcx_reg));
 5252   match(RegI);
 5253   match(rRegI);
 5254 
 5255   format %{ "RCX" %}
 5256   interface(REG_INTER);
 5257 %}
 5258 
 5259 operand rdx_RegI()
 5260 %{
 5261   constraint(ALLOC_IN_RC(int_rdx_reg));
 5262   match(RegI);
 5263   match(rRegI);
 5264 
 5265   format %{ "RDX" %}
 5266   interface(REG_INTER);
 5267 %}
 5268 
 5269 operand rdi_RegI()
 5270 %{
 5271   constraint(ALLOC_IN_RC(int_rdi_reg));
 5272   match(RegI);
 5273   match(rRegI);
 5274 
 5275   format %{ "RDI" %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 operand no_rax_rdx_RegI()
 5280 %{
 5281   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5282   match(RegI);
 5283   match(rbx_RegI);
 5284   match(rcx_RegI);
 5285   match(rdi_RegI);
 5286 
 5287   format %{ %}
 5288   interface(REG_INTER);
 5289 %}
 5290 
 5291 operand no_rbp_r13_RegI()
 5292 %{
 5293   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5294   match(RegI);
 5295   match(rRegI);
 5296   match(rax_RegI);
 5297   match(rbx_RegI);
 5298   match(rcx_RegI);
 5299   match(rdx_RegI);
 5300   match(rdi_RegI);
 5301 
 5302   format %{ %}
 5303   interface(REG_INTER);
 5304 %}
 5305 
 5306 // Pointer Register
 5307 operand any_RegP()
 5308 %{
 5309   constraint(ALLOC_IN_RC(any_reg));
 5310   match(RegP);
 5311   match(rax_RegP);
 5312   match(rbx_RegP);
 5313   match(rdi_RegP);
 5314   match(rsi_RegP);
 5315   match(rbp_RegP);
 5316   match(r15_RegP);
 5317   match(rRegP);
 5318 
 5319   format %{ %}
 5320   interface(REG_INTER);
 5321 %}
 5322 
 5323 operand rRegP()
 5324 %{
 5325   constraint(ALLOC_IN_RC(ptr_reg));
 5326   match(RegP);
 5327   match(rax_RegP);
 5328   match(rbx_RegP);
 5329   match(rdi_RegP);
 5330   match(rsi_RegP);
 5331   match(rbp_RegP);  // See Q&A below about
 5332   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5333 
 5334   format %{ %}
 5335   interface(REG_INTER);
 5336 %}
 5337 
 5338 operand rRegN() %{
 5339   constraint(ALLOC_IN_RC(int_reg));
 5340   match(RegN);
 5341 
 5342   format %{ %}
 5343   interface(REG_INTER);
 5344 %}
 5345 
 5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5349 // The output of an instruction is controlled by the allocator, which respects
 5350 // register class masks, not match rules.  Unless an instruction mentions
 5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5352 // by the allocator as an input.
 5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5355 // result, RBP is not included in the output of the instruction either.
 5356 
 5357 // This operand is not allowed to use RBP even if
 5358 // RBP is not used to hold the frame pointer.
 5359 operand no_rbp_RegP()
 5360 %{
 5361   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5362   match(RegP);
 5363   match(rbx_RegP);
 5364   match(rsi_RegP);
 5365   match(rdi_RegP);
 5366 
 5367   format %{ %}
 5368   interface(REG_INTER);
 5369 %}
 5370 
 5371 // Special Registers
 5372 // Return a pointer value
 5373 operand rax_RegP()
 5374 %{
 5375   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5376   match(RegP);
 5377   match(rRegP);
 5378 
 5379   format %{ %}
 5380   interface(REG_INTER);
 5381 %}
 5382 
 5383 // Special Registers
 5384 // Return a compressed pointer value
 5385 operand rax_RegN()
 5386 %{
 5387   constraint(ALLOC_IN_RC(int_rax_reg));
 5388   match(RegN);
 5389   match(rRegN);
 5390 
 5391   format %{ %}
 5392   interface(REG_INTER);
 5393 %}
 5394 
 5395 // Used in AtomicAdd
 5396 operand rbx_RegP()
 5397 %{
 5398   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5399   match(RegP);
 5400   match(rRegP);
 5401 
 5402   format %{ %}
 5403   interface(REG_INTER);
 5404 %}
 5405 
 5406 operand rsi_RegP()
 5407 %{
 5408   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5409   match(RegP);
 5410   match(rRegP);
 5411 
 5412   format %{ %}
 5413   interface(REG_INTER);
 5414 %}
 5415 
 5416 operand rbp_RegP()
 5417 %{
 5418   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5419   match(RegP);
 5420   match(rRegP);
 5421 
 5422   format %{ %}
 5423   interface(REG_INTER);
 5424 %}
 5425 
 5426 // Used in rep stosq
 5427 operand rdi_RegP()
 5428 %{
 5429   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5430   match(RegP);
 5431   match(rRegP);
 5432 
 5433   format %{ %}
 5434   interface(REG_INTER);
 5435 %}
 5436 
 5437 operand r15_RegP()
 5438 %{
 5439   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5440   match(RegP);
 5441   match(rRegP);
 5442 
 5443   format %{ %}
 5444   interface(REG_INTER);
 5445 %}
 5446 
 5447 operand rRegL()
 5448 %{
 5449   constraint(ALLOC_IN_RC(long_reg));
 5450   match(RegL);
 5451   match(rax_RegL);
 5452   match(rdx_RegL);
 5453 
 5454   format %{ %}
 5455   interface(REG_INTER);
 5456 %}
 5457 
 5458 // Special Registers
 5459 operand no_rax_rdx_RegL()
 5460 %{
 5461   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5462   match(RegL);
 5463   match(rRegL);
 5464 
 5465   format %{ %}
 5466   interface(REG_INTER);
 5467 %}
 5468 
 5469 operand rax_RegL()
 5470 %{
 5471   constraint(ALLOC_IN_RC(long_rax_reg));
 5472   match(RegL);
 5473   match(rRegL);
 5474 
 5475   format %{ "RAX" %}
 5476   interface(REG_INTER);
 5477 %}
 5478 
 5479 operand rcx_RegL()
 5480 %{
 5481   constraint(ALLOC_IN_RC(long_rcx_reg));
 5482   match(RegL);
 5483   match(rRegL);
 5484 
 5485   format %{ %}
 5486   interface(REG_INTER);
 5487 %}
 5488 
 5489 operand rdx_RegL()
 5490 %{
 5491   constraint(ALLOC_IN_RC(long_rdx_reg));
 5492   match(RegL);
 5493   match(rRegL);
 5494 
 5495   format %{ %}
 5496   interface(REG_INTER);
 5497 %}
 5498 
 5499 operand r11_RegL()
 5500 %{
 5501   constraint(ALLOC_IN_RC(long_r11_reg));
 5502   match(RegL);
 5503   match(rRegL);
 5504 
 5505   format %{ %}
 5506   interface(REG_INTER);
 5507 %}
 5508 
 5509 operand no_rbp_r13_RegL()
 5510 %{
 5511   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5512   match(RegL);
 5513   match(rRegL);
 5514   match(rax_RegL);
 5515   match(rcx_RegL);
 5516   match(rdx_RegL);
 5517 
 5518   format %{ %}
 5519   interface(REG_INTER);
 5520 %}
 5521 
 5522 // Flags register, used as output of compare instructions
 5523 operand rFlagsReg()
 5524 %{
 5525   constraint(ALLOC_IN_RC(int_flags));
 5526   match(RegFlags);
 5527 
 5528   format %{ "RFLAGS" %}
 5529   interface(REG_INTER);
 5530 %}
 5531 
 5532 // Flags register, used as output of FLOATING POINT compare instructions
 5533 operand rFlagsRegU()
 5534 %{
 5535   constraint(ALLOC_IN_RC(int_flags));
 5536   match(RegFlags);
 5537 
 5538   format %{ "RFLAGS_U" %}
 5539   interface(REG_INTER);
 5540 %}
 5541 
 5542 operand rFlagsRegUCF() %{
 5543   constraint(ALLOC_IN_RC(int_flags));
 5544   match(RegFlags);
 5545   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5546 
 5547   format %{ "RFLAGS_U_CF" %}
 5548   interface(REG_INTER);
 5549 %}
 5550 
 5551 operand rFlagsRegUCFE() %{
 5552   constraint(ALLOC_IN_RC(int_flags));
 5553   match(RegFlags);
 5554   predicate(UseAPX && VM_Version::supports_avx10_2());
 5555 
 5556   format %{ "RFLAGS_U_CFE" %}
 5557   interface(REG_INTER);
 5558 %}
 5559 
 5560 // Float register operands
 5561 operand regF() %{
 5562    constraint(ALLOC_IN_RC(float_reg));
 5563    match(RegF);
 5564 
 5565    format %{ %}
 5566    interface(REG_INTER);
 5567 %}
 5568 
 5569 // Float register operands
 5570 operand legRegF() %{
 5571    constraint(ALLOC_IN_RC(float_reg_legacy));
 5572    match(RegF);
 5573 
 5574    format %{ %}
 5575    interface(REG_INTER);
 5576 %}
 5577 
 5578 // Float register operands
 5579 operand vlRegF() %{
 5580    constraint(ALLOC_IN_RC(float_reg_vl));
 5581    match(RegF);
 5582 
 5583    format %{ %}
 5584    interface(REG_INTER);
 5585 %}
 5586 
 5587 // Double register operands
 5588 operand regD() %{
 5589    constraint(ALLOC_IN_RC(double_reg));
 5590    match(RegD);
 5591 
 5592    format %{ %}
 5593    interface(REG_INTER);
 5594 %}
 5595 
 5596 // Double register operands
 5597 operand legRegD() %{
 5598    constraint(ALLOC_IN_RC(double_reg_legacy));
 5599    match(RegD);
 5600 
 5601    format %{ %}
 5602    interface(REG_INTER);
 5603 %}
 5604 
 5605 // Double register operands
 5606 operand vlRegD() %{
 5607    constraint(ALLOC_IN_RC(double_reg_vl));
 5608    match(RegD);
 5609 
 5610    format %{ %}
 5611    interface(REG_INTER);
 5612 %}
 5613 
 5614 //----------Memory Operands----------------------------------------------------
 5615 // Direct Memory Operand
 5616 // operand direct(immP addr)
 5617 // %{
 5618 //   match(addr);
 5619 
 5620 //   format %{ "[$addr]" %}
 5621 //   interface(MEMORY_INTER) %{
 5622 //     base(0xFFFFFFFF);
 5623 //     index(0x4);
 5624 //     scale(0x0);
 5625 //     disp($addr);
 5626 //   %}
 5627 // %}
 5628 
 5629 // Indirect Memory Operand
 5630 operand indirect(any_RegP reg)
 5631 %{
 5632   constraint(ALLOC_IN_RC(ptr_reg));
 5633   match(reg);
 5634 
 5635   format %{ "[$reg]" %}
 5636   interface(MEMORY_INTER) %{
 5637     base($reg);
 5638     index(0x4);
 5639     scale(0x0);
 5640     disp(0x0);
 5641   %}
 5642 %}
 5643 
 5644 // Indirect Memory Plus Short Offset Operand
 5645 operand indOffset8(any_RegP reg, immL8 off)
 5646 %{
 5647   constraint(ALLOC_IN_RC(ptr_reg));
 5648   match(AddP reg off);
 5649 
 5650   format %{ "[$reg + $off (8-bit)]" %}
 5651   interface(MEMORY_INTER) %{
 5652     base($reg);
 5653     index(0x4);
 5654     scale(0x0);
 5655     disp($off);
 5656   %}
 5657 %}
 5658 
 5659 // Indirect Memory Plus Long Offset Operand
 5660 operand indOffset32(any_RegP reg, immL32 off)
 5661 %{
 5662   constraint(ALLOC_IN_RC(ptr_reg));
 5663   match(AddP reg off);
 5664 
 5665   format %{ "[$reg + $off (32-bit)]" %}
 5666   interface(MEMORY_INTER) %{
 5667     base($reg);
 5668     index(0x4);
 5669     scale(0x0);
 5670     disp($off);
 5671   %}
 5672 %}
 5673 
 5674 // Indirect Memory Plus Index Register Plus Offset Operand
 5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5676 %{
 5677   constraint(ALLOC_IN_RC(ptr_reg));
 5678   match(AddP (AddP reg lreg) off);
 5679 
 5680   op_cost(10);
 5681   format %{"[$reg + $off + $lreg]" %}
 5682   interface(MEMORY_INTER) %{
 5683     base($reg);
 5684     index($lreg);
 5685     scale(0x0);
 5686     disp($off);
 5687   %}
 5688 %}
 5689 
 5690 // Indirect Memory Plus Index Register Plus Offset Operand
 5691 operand indIndex(any_RegP reg, rRegL lreg)
 5692 %{
 5693   constraint(ALLOC_IN_RC(ptr_reg));
 5694   match(AddP reg lreg);
 5695 
 5696   op_cost(10);
 5697   format %{"[$reg + $lreg]" %}
 5698   interface(MEMORY_INTER) %{
 5699     base($reg);
 5700     index($lreg);
 5701     scale(0x0);
 5702     disp(0x0);
 5703   %}
 5704 %}
 5705 
 5706 // Indirect Memory Times Scale Plus Index Register
 5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5708 %{
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(AddP reg (LShiftL lreg scale));
 5711 
 5712   op_cost(10);
 5713   format %{"[$reg + $lreg << $scale]" %}
 5714   interface(MEMORY_INTER) %{
 5715     base($reg);
 5716     index($lreg);
 5717     scale($scale);
 5718     disp(0x0);
 5719   %}
 5720 %}
 5721 
 5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5723 %{
 5724   constraint(ALLOC_IN_RC(ptr_reg));
 5725   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5726   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + pos $idx << $scale]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($idx);
 5733     scale($scale);
 5734     disp(0x0);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5740 %{
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5743 
 5744   op_cost(10);
 5745   format %{"[$reg + $off + $lreg << $scale]" %}
 5746   interface(MEMORY_INTER) %{
 5747     base($reg);
 5748     index($lreg);
 5749     scale($scale);
 5750     disp($off);
 5751   %}
 5752 %}
 5753 
 5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5756 %{
 5757   constraint(ALLOC_IN_RC(ptr_reg));
 5758   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5759   match(AddP (AddP reg (ConvI2L idx)) off);
 5760 
 5761   op_cost(10);
 5762   format %{"[$reg + $off + $idx]" %}
 5763   interface(MEMORY_INTER) %{
 5764     base($reg);
 5765     index($idx);
 5766     scale(0x0);
 5767     disp($off);
 5768   %}
 5769 %}
 5770 
 5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5773 %{
 5774   constraint(ALLOC_IN_RC(ptr_reg));
 5775   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5776   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5777 
 5778   op_cost(10);
 5779   format %{"[$reg + $off + $idx << $scale]" %}
 5780   interface(MEMORY_INTER) %{
 5781     base($reg);
 5782     index($idx);
 5783     scale($scale);
 5784     disp($off);
 5785   %}
 5786 %}
 5787 
 5788 // Indirect Narrow Oop Plus Offset Operand
 5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5790 // we can't free r12 even with CompressedOops::base() == nullptr.
 5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5792   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5793   constraint(ALLOC_IN_RC(ptr_reg));
 5794   match(AddP (DecodeN reg) off);
 5795 
 5796   op_cost(10);
 5797   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5798   interface(MEMORY_INTER) %{
 5799     base(0xc); // R12
 5800     index($reg);
 5801     scale(0x3);
 5802     disp($off);
 5803   %}
 5804 %}
 5805 
 5806 // Indirect Memory Operand
 5807 operand indirectNarrow(rRegN reg)
 5808 %{
 5809   predicate(CompressedOops::shift() == 0);
 5810   constraint(ALLOC_IN_RC(ptr_reg));
 5811   match(DecodeN reg);
 5812 
 5813   format %{ "[$reg]" %}
 5814   interface(MEMORY_INTER) %{
 5815     base($reg);
 5816     index(0x4);
 5817     scale(0x0);
 5818     disp(0x0);
 5819   %}
 5820 %}
 5821 
 5822 // Indirect Memory Plus Short Offset Operand
 5823 operand indOffset8Narrow(rRegN reg, immL8 off)
 5824 %{
 5825   predicate(CompressedOops::shift() == 0);
 5826   constraint(ALLOC_IN_RC(ptr_reg));
 5827   match(AddP (DecodeN reg) off);
 5828 
 5829   format %{ "[$reg + $off (8-bit)]" %}
 5830   interface(MEMORY_INTER) %{
 5831     base($reg);
 5832     index(0x4);
 5833     scale(0x0);
 5834     disp($off);
 5835   %}
 5836 %}
 5837 
 5838 // Indirect Memory Plus Long Offset Operand
 5839 operand indOffset32Narrow(rRegN reg, immL32 off)
 5840 %{
 5841   predicate(CompressedOops::shift() == 0);
 5842   constraint(ALLOC_IN_RC(ptr_reg));
 5843   match(AddP (DecodeN reg) off);
 5844 
 5845   format %{ "[$reg + $off (32-bit)]" %}
 5846   interface(MEMORY_INTER) %{
 5847     base($reg);
 5848     index(0x4);
 5849     scale(0x0);
 5850     disp($off);
 5851   %}
 5852 %}
 5853 
 5854 // Indirect Memory Plus Index Register Plus Offset Operand
 5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5856 %{
 5857   predicate(CompressedOops::shift() == 0);
 5858   constraint(ALLOC_IN_RC(ptr_reg));
 5859   match(AddP (AddP (DecodeN reg) lreg) off);
 5860 
 5861   op_cost(10);
 5862   format %{"[$reg + $off + $lreg]" %}
 5863   interface(MEMORY_INTER) %{
 5864     base($reg);
 5865     index($lreg);
 5866     scale(0x0);
 5867     disp($off);
 5868   %}
 5869 %}
 5870 
 5871 // Indirect Memory Plus Index Register Plus Offset Operand
 5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5873 %{
 5874   predicate(CompressedOops::shift() == 0);
 5875   constraint(ALLOC_IN_RC(ptr_reg));
 5876   match(AddP (DecodeN reg) lreg);
 5877 
 5878   op_cost(10);
 5879   format %{"[$reg + $lreg]" %}
 5880   interface(MEMORY_INTER) %{
 5881     base($reg);
 5882     index($lreg);
 5883     scale(0x0);
 5884     disp(0x0);
 5885   %}
 5886 %}
 5887 
 5888 // Indirect Memory Times Scale Plus Index Register
 5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5890 %{
 5891   predicate(CompressedOops::shift() == 0);
 5892   constraint(ALLOC_IN_RC(ptr_reg));
 5893   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5894 
 5895   op_cost(10);
 5896   format %{"[$reg + $lreg << $scale]" %}
 5897   interface(MEMORY_INTER) %{
 5898     base($reg);
 5899     index($lreg);
 5900     scale($scale);
 5901     disp(0x0);
 5902   %}
 5903 %}
 5904 
 5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5907 %{
 5908   predicate(CompressedOops::shift() == 0);
 5909   constraint(ALLOC_IN_RC(ptr_reg));
 5910   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5911 
 5912   op_cost(10);
 5913   format %{"[$reg + $off + $lreg << $scale]" %}
 5914   interface(MEMORY_INTER) %{
 5915     base($reg);
 5916     index($lreg);
 5917     scale($scale);
 5918     disp($off);
 5919   %}
 5920 %}
 5921 
 5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5924 %{
 5925   constraint(ALLOC_IN_RC(ptr_reg));
 5926   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5927   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5928 
 5929   op_cost(10);
 5930   format %{"[$reg + $off + $idx]" %}
 5931   interface(MEMORY_INTER) %{
 5932     base($reg);
 5933     index($idx);
 5934     scale(0x0);
 5935     disp($off);
 5936   %}
 5937 %}
 5938 
 5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5941 %{
 5942   constraint(ALLOC_IN_RC(ptr_reg));
 5943   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5944   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5945 
 5946   op_cost(10);
 5947   format %{"[$reg + $off + $idx << $scale]" %}
 5948   interface(MEMORY_INTER) %{
 5949     base($reg);
 5950     index($idx);
 5951     scale($scale);
 5952     disp($off);
 5953   %}
 5954 %}
 5955 
 5956 //----------Special Memory Operands--------------------------------------------
 5957 // Stack Slot Operand - This operand is used for loading and storing temporary
 5958 //                      values on the stack where a match requires a value to
 5959 //                      flow through memory.
 5960 operand stackSlotP(sRegP reg)
 5961 %{
 5962   constraint(ALLOC_IN_RC(stack_slots));
 5963   // No match rule because this operand is only generated in matching
 5964 
 5965   format %{ "[$reg]" %}
 5966   interface(MEMORY_INTER) %{
 5967     base(0x4);   // RSP
 5968     index(0x4);  // No Index
 5969     scale(0x0);  // No Scale
 5970     disp($reg);  // Stack Offset
 5971   %}
 5972 %}
 5973 
 5974 operand stackSlotI(sRegI reg)
 5975 %{
 5976   constraint(ALLOC_IN_RC(stack_slots));
 5977   // No match rule because this operand is only generated in matching
 5978 
 5979   format %{ "[$reg]" %}
 5980   interface(MEMORY_INTER) %{
 5981     base(0x4);   // RSP
 5982     index(0x4);  // No Index
 5983     scale(0x0);  // No Scale
 5984     disp($reg);  // Stack Offset
 5985   %}
 5986 %}
 5987 
 5988 operand stackSlotF(sRegF reg)
 5989 %{
 5990   constraint(ALLOC_IN_RC(stack_slots));
 5991   // No match rule because this operand is only generated in matching
 5992 
 5993   format %{ "[$reg]" %}
 5994   interface(MEMORY_INTER) %{
 5995     base(0x4);   // RSP
 5996     index(0x4);  // No Index
 5997     scale(0x0);  // No Scale
 5998     disp($reg);  // Stack Offset
 5999   %}
 6000 %}
 6001 
 6002 operand stackSlotD(sRegD reg)
 6003 %{
 6004   constraint(ALLOC_IN_RC(stack_slots));
 6005   // No match rule because this operand is only generated in matching
 6006 
 6007   format %{ "[$reg]" %}
 6008   interface(MEMORY_INTER) %{
 6009     base(0x4);   // RSP
 6010     index(0x4);  // No Index
 6011     scale(0x0);  // No Scale
 6012     disp($reg);  // Stack Offset
 6013   %}
 6014 %}
 6015 operand stackSlotL(sRegL reg)
 6016 %{
 6017   constraint(ALLOC_IN_RC(stack_slots));
 6018   // No match rule because this operand is only generated in matching
 6019 
 6020   format %{ "[$reg]" %}
 6021   interface(MEMORY_INTER) %{
 6022     base(0x4);   // RSP
 6023     index(0x4);  // No Index
 6024     scale(0x0);  // No Scale
 6025     disp($reg);  // Stack Offset
 6026   %}
 6027 %}
 6028 
 6029 //----------Conditional Branch Operands----------------------------------------
 6030 // Comparison Op  - This is the operation of the comparison, and is limited to
 6031 //                  the following set of codes:
 6032 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6033 //
 6034 // Other attributes of the comparison, such as unsignedness, are specified
 6035 // by the comparison instruction that sets a condition code flags register.
 6036 // That result is represented by a flags operand whose subtype is appropriate
 6037 // to the unsignedness (etc.) of the comparison.
 6038 //
 6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6042 
 6043 // Comparison Code
 6044 operand cmpOp()
 6045 %{
 6046   match(Bool);
 6047 
 6048   format %{ "" %}
 6049   interface(COND_INTER) %{
 6050     equal(0x4, "e");
 6051     not_equal(0x5, "ne");
 6052     less(0xc, "l");
 6053     greater_equal(0xd, "ge");
 6054     less_equal(0xe, "le");
 6055     greater(0xf, "g");
 6056     overflow(0x0, "o");
 6057     no_overflow(0x1, "no");
 6058   %}
 6059 %}
 6060 
 6061 // Comparison Code, unsigned compare.  Used by FP also, with
 6062 // C2 (unordered) turned into GT or LT already.  The other bits
 6063 // C0 and C3 are turned into Carry & Zero flags.
 6064 operand cmpOpU()
 6065 %{
 6066   match(Bool);
 6067 
 6068   format %{ "" %}
 6069   interface(COND_INTER) %{
 6070     equal(0x4, "e");
 6071     not_equal(0x5, "ne");
 6072     less(0x2, "b");
 6073     greater_equal(0x3, "ae");
 6074     less_equal(0x6, "be");
 6075     greater(0x7, "a");
 6076     overflow(0x0, "o");
 6077     no_overflow(0x1, "no");
 6078   %}
 6079 %}
 6080 
 6081 
 6082 // Floating comparisons that don't require any fixup for the unordered case,
 6083 // If both inputs of the comparison are the same, ZF is always set so we
 6084 // don't need to use cmpOpUCF2 for eq/ne
 6085 operand cmpOpUCF() %{
 6086   match(Bool);
 6087   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6088             (n->as_Bool()->_test._test == BoolTest::lt ||
 6089              n->as_Bool()->_test._test == BoolTest::ge ||
 6090              n->as_Bool()->_test._test == BoolTest::le ||
 6091              n->as_Bool()->_test._test == BoolTest::gt ||
 6092              n->in(1)->in(1) == n->in(1)->in(2)));
 6093   format %{ "" %}
 6094   interface(COND_INTER) %{
 6095     equal(0xb, "np");
 6096     not_equal(0xa, "p");
 6097     less(0x2, "b");
 6098     greater_equal(0x3, "ae");
 6099     less_equal(0x6, "be");
 6100     greater(0x7, "a");
 6101     overflow(0x0, "o");
 6102     no_overflow(0x1, "no");
 6103   %}
 6104 %}
 6105 
 6106 
 6107 // Floating comparisons that can be fixed up with extra conditional jumps
 6108 operand cmpOpUCF2() %{
 6109   match(Bool);
 6110   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6111             (n->as_Bool()->_test._test == BoolTest::ne ||
 6112              n->as_Bool()->_test._test == BoolTest::eq) &&
 6113             n->in(1)->in(1) != n->in(1)->in(2));
 6114   format %{ "" %}
 6115   interface(COND_INTER) %{
 6116     equal(0x4, "e");
 6117     not_equal(0x5, "ne");
 6118     less(0x2, "b");
 6119     greater_equal(0x3, "ae");
 6120     less_equal(0x6, "be");
 6121     greater(0x7, "a");
 6122     overflow(0x0, "o");
 6123     no_overflow(0x1, "no");
 6124   %}
 6125 %}
 6126 
 6127 
 6128 // Floating point comparisons that set condition flags to test more directly,
 6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6131 // latter conditions to ones that use unsigned tests before passing into an
 6132 // instruction because the preceding comparison might be based on a three way
 6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6134 operand cmpOpUCFE()
 6135 %{
 6136   match(Bool);
 6137   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6138             (n->as_Bool()->_test._test == BoolTest::ne ||
 6139              n->as_Bool()->_test._test == BoolTest::eq ||
 6140              n->as_Bool()->_test._test == BoolTest::lt ||
 6141              n->as_Bool()->_test._test == BoolTest::ge ||
 6142              n->as_Bool()->_test._test == BoolTest::le ||
 6143              n->as_Bool()->_test._test == BoolTest::gt));
 6144 
 6145   format %{ "" %}
 6146   interface(COND_INTER) %{
 6147     equal(0x4, "e");
 6148     not_equal(0x5, "ne");
 6149     less(0x2, "b");
 6150     greater_equal(0x3, "ae");
 6151     less_equal(0x6, "be");
 6152     greater(0x7, "a");
 6153     overflow(0x0, "o");
 6154     no_overflow(0x1, "no");
 6155   %}
 6156 %}
 6157 
 6158 // Operands for bound floating pointer register arguments
 6159 operand rxmm0() %{
 6160   constraint(ALLOC_IN_RC(xmm0_reg));
 6161   match(VecX);
 6162   format%{%}
 6163   interface(REG_INTER);
 6164 %}
 6165 
 6166 // Vectors
 6167 
 6168 // Dummy generic vector class. Should be used for all vector operands.
 6169 // Replaced with vec[SDXYZ] during post-selection pass.
 6170 operand vec() %{
 6171   constraint(ALLOC_IN_RC(dynamic));
 6172   match(VecX);
 6173   match(VecY);
 6174   match(VecZ);
 6175   match(VecS);
 6176   match(VecD);
 6177 
 6178   format %{ %}
 6179   interface(REG_INTER);
 6180 %}
 6181 
 6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6185 // runtime code generation via reg_class_dynamic.
 6186 operand legVec() %{
 6187   constraint(ALLOC_IN_RC(dynamic));
 6188   match(VecX);
 6189   match(VecY);
 6190   match(VecZ);
 6191   match(VecS);
 6192   match(VecD);
 6193 
 6194   format %{ %}
 6195   interface(REG_INTER);
 6196 %}
 6197 
 6198 // Replaces vec during post-selection cleanup. See above.
 6199 operand vecS() %{
 6200   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6201   match(VecS);
 6202 
 6203   format %{ %}
 6204   interface(REG_INTER);
 6205 %}
 6206 
 6207 // Replaces legVec during post-selection cleanup. See above.
 6208 operand legVecS() %{
 6209   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6210   match(VecS);
 6211 
 6212   format %{ %}
 6213   interface(REG_INTER);
 6214 %}
 6215 
 6216 // Replaces vec during post-selection cleanup. See above.
 6217 operand vecD() %{
 6218   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6219   match(VecD);
 6220 
 6221   format %{ %}
 6222   interface(REG_INTER);
 6223 %}
 6224 
 6225 // Replaces legVec during post-selection cleanup. See above.
 6226 operand legVecD() %{
 6227   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6228   match(VecD);
 6229 
 6230   format %{ %}
 6231   interface(REG_INTER);
 6232 %}
 6233 
 6234 // Replaces vec during post-selection cleanup. See above.
 6235 operand vecX() %{
 6236   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6237   match(VecX);
 6238 
 6239   format %{ %}
 6240   interface(REG_INTER);
 6241 %}
 6242 
 6243 // Replaces legVec during post-selection cleanup. See above.
 6244 operand legVecX() %{
 6245   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6246   match(VecX);
 6247 
 6248   format %{ %}
 6249   interface(REG_INTER);
 6250 %}
 6251 
 6252 // Replaces vec during post-selection cleanup. See above.
 6253 operand vecY() %{
 6254   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6255   match(VecY);
 6256 
 6257   format %{ %}
 6258   interface(REG_INTER);
 6259 %}
 6260 
 6261 // Replaces legVec during post-selection cleanup. See above.
 6262 operand legVecY() %{
 6263   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6264   match(VecY);
 6265 
 6266   format %{ %}
 6267   interface(REG_INTER);
 6268 %}
 6269 
 6270 // Replaces vec during post-selection cleanup. See above.
 6271 operand vecZ() %{
 6272   constraint(ALLOC_IN_RC(vectorz_reg));
 6273   match(VecZ);
 6274 
 6275   format %{ %}
 6276   interface(REG_INTER);
 6277 %}
 6278 
 6279 // Replaces legVec during post-selection cleanup. See above.
 6280 operand legVecZ() %{
 6281   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6282   match(VecZ);
 6283 
 6284   format %{ %}
 6285   interface(REG_INTER);
 6286 %}
 6287 
 6288 //----------OPERAND CLASSES----------------------------------------------------
 6289 // Operand Classes are groups of operands that are used as to simplify
 6290 // instruction definitions by not requiring the AD writer to specify separate
 6291 // instructions for every form of operand when the instruction accepts
 6292 // multiple operand types with the same basic encoding and format.  The classic
 6293 // case of this is memory operands.
 6294 
 6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6296                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6297                indCompressedOopOffset,
 6298                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6299                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6300                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6301 
 6302 //----------PIPELINE-----------------------------------------------------------
 6303 // Rules which define the behavior of the target architectures pipeline.
 6304 pipeline %{
 6305 
 6306 //----------ATTRIBUTES---------------------------------------------------------
 6307 attributes %{
 6308   variable_size_instructions;        // Fixed size instructions
 6309   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6310   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6311   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6312   instruction_fetch_units = 1;       // of 16 bytes
 6313 %}
 6314 
 6315 //----------RESOURCES----------------------------------------------------------
 6316 // Resources are the functional units available to the machine
 6317 
 6318 // Generic P2/P3 pipeline
 6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6320 // 3 instructions decoded per cycle.
 6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6322 // 3 ALU op, only ALU0 handles mul instructions.
 6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6324            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6325            BR, FPU,
 6326            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6327 
 6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6329 // Pipeline Description specifies the stages in the machine's pipeline
 6330 
 6331 // Generic P2/P3 pipeline
 6332 pipe_desc(S0, S1, S2, S3, S4, S5);
 6333 
 6334 //----------PIPELINE CLASSES---------------------------------------------------
 6335 // Pipeline Classes describe the stages in which input and output are
 6336 // referenced by the hardware pipeline.
 6337 
 6338 // Naming convention: ialu or fpu
 6339 // Then: _reg
 6340 // Then: _reg if there is a 2nd register
 6341 // Then: _long if it's a pair of instructions implementing a long
 6342 // Then: _fat if it requires the big decoder
 6343 //   Or: _mem if it requires the big decoder and a memory unit.
 6344 
 6345 // Integer ALU reg operation
 6346 pipe_class ialu_reg(rRegI dst)
 6347 %{
 6348     single_instruction;
 6349     dst    : S4(write);
 6350     dst    : S3(read);
 6351     DECODE : S0;        // any decoder
 6352     ALU    : S3;        // any alu
 6353 %}
 6354 
 6355 // Long ALU reg operation
 6356 pipe_class ialu_reg_long(rRegL dst)
 6357 %{
 6358     instruction_count(2);
 6359     dst    : S4(write);
 6360     dst    : S3(read);
 6361     DECODE : S0(2);     // any 2 decoders
 6362     ALU    : S3(2);     // both alus
 6363 %}
 6364 
 6365 // Integer ALU reg operation using big decoder
 6366 pipe_class ialu_reg_fat(rRegI dst)
 6367 %{
 6368     single_instruction;
 6369     dst    : S4(write);
 6370     dst    : S3(read);
 6371     D0     : S0;        // big decoder only
 6372     ALU    : S3;        // any alu
 6373 %}
 6374 
 6375 // Integer ALU reg-reg operation
 6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6377 %{
 6378     single_instruction;
 6379     dst    : S4(write);
 6380     src    : S3(read);
 6381     DECODE : S0;        // any decoder
 6382     ALU    : S3;        // any alu
 6383 %}
 6384 
 6385 // Integer ALU reg-reg operation
 6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6387 %{
 6388     single_instruction;
 6389     dst    : S4(write);
 6390     src    : S3(read);
 6391     D0     : S0;        // big decoder only
 6392     ALU    : S3;        // any alu
 6393 %}
 6394 
 6395 // Integer ALU reg-mem operation
 6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6397 %{
 6398     single_instruction;
 6399     dst    : S5(write);
 6400     mem    : S3(read);
 6401     D0     : S0;        // big decoder only
 6402     ALU    : S4;        // any alu
 6403     MEM    : S3;        // any mem
 6404 %}
 6405 
 6406 // Integer mem operation (prefetch)
 6407 pipe_class ialu_mem(memory mem)
 6408 %{
 6409     single_instruction;
 6410     mem    : S3(read);
 6411     D0     : S0;        // big decoder only
 6412     MEM    : S3;        // any mem
 6413 %}
 6414 
 6415 // Integer Store to Memory
 6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6417 %{
 6418     single_instruction;
 6419     mem    : S3(read);
 6420     src    : S5(read);
 6421     D0     : S0;        // big decoder only
 6422     ALU    : S4;        // any alu
 6423     MEM    : S3;
 6424 %}
 6425 
 6426 // // Long Store to Memory
 6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6428 // %{
 6429 //     instruction_count(2);
 6430 //     mem    : S3(read);
 6431 //     src    : S5(read);
 6432 //     D0     : S0(2);          // big decoder only; twice
 6433 //     ALU    : S4(2);     // any 2 alus
 6434 //     MEM    : S3(2);  // Both mems
 6435 // %}
 6436 
 6437 // Integer Store to Memory
 6438 pipe_class ialu_mem_imm(memory mem)
 6439 %{
 6440     single_instruction;
 6441     mem    : S3(read);
 6442     D0     : S0;        // big decoder only
 6443     ALU    : S4;        // any alu
 6444     MEM    : S3;
 6445 %}
 6446 
 6447 // Integer ALU0 reg-reg operation
 6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6449 %{
 6450     single_instruction;
 6451     dst    : S4(write);
 6452     src    : S3(read);
 6453     D0     : S0;        // Big decoder only
 6454     ALU0   : S3;        // only alu0
 6455 %}
 6456 
 6457 // Integer ALU0 reg-mem operation
 6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6459 %{
 6460     single_instruction;
 6461     dst    : S5(write);
 6462     mem    : S3(read);
 6463     D0     : S0;        // big decoder only
 6464     ALU0   : S4;        // ALU0 only
 6465     MEM    : S3;        // any mem
 6466 %}
 6467 
 6468 // Integer ALU reg-reg operation
 6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6470 %{
 6471     single_instruction;
 6472     cr     : S4(write);
 6473     src1   : S3(read);
 6474     src2   : S3(read);
 6475     DECODE : S0;        // any decoder
 6476     ALU    : S3;        // any alu
 6477 %}
 6478 
 6479 // Integer ALU reg-imm operation
 6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6481 %{
 6482     single_instruction;
 6483     cr     : S4(write);
 6484     src1   : S3(read);
 6485     DECODE : S0;        // any decoder
 6486     ALU    : S3;        // any alu
 6487 %}
 6488 
 6489 // Integer ALU reg-mem operation
 6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6491 %{
 6492     single_instruction;
 6493     cr     : S4(write);
 6494     src1   : S3(read);
 6495     src2   : S3(read);
 6496     D0     : S0;        // big decoder only
 6497     ALU    : S4;        // any alu
 6498     MEM    : S3;
 6499 %}
 6500 
 6501 // Conditional move reg-reg
 6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6503 %{
 6504     instruction_count(4);
 6505     y      : S4(read);
 6506     q      : S3(read);
 6507     p      : S3(read);
 6508     DECODE : S0(4);     // any decoder
 6509 %}
 6510 
 6511 // Conditional move reg-reg
 6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6513 %{
 6514     single_instruction;
 6515     dst    : S4(write);
 6516     src    : S3(read);
 6517     cr     : S3(read);
 6518     DECODE : S0;        // any decoder
 6519 %}
 6520 
 6521 // Conditional move reg-mem
 6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6523 %{
 6524     single_instruction;
 6525     dst    : S4(write);
 6526     src    : S3(read);
 6527     cr     : S3(read);
 6528     DECODE : S0;        // any decoder
 6529     MEM    : S3;
 6530 %}
 6531 
 6532 // Conditional move reg-reg long
 6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6534 %{
 6535     single_instruction;
 6536     dst    : S4(write);
 6537     src    : S3(read);
 6538     cr     : S3(read);
 6539     DECODE : S0(2);     // any 2 decoders
 6540 %}
 6541 
 6542 // Float reg-reg operation
 6543 pipe_class fpu_reg(regD dst)
 6544 %{
 6545     instruction_count(2);
 6546     dst    : S3(read);
 6547     DECODE : S0(2);     // any 2 decoders
 6548     FPU    : S3;
 6549 %}
 6550 
 6551 // Float reg-reg operation
 6552 pipe_class fpu_reg_reg(regD dst, regD src)
 6553 %{
 6554     instruction_count(2);
 6555     dst    : S4(write);
 6556     src    : S3(read);
 6557     DECODE : S0(2);     // any 2 decoders
 6558     FPU    : S3;
 6559 %}
 6560 
 6561 // Float reg-reg operation
 6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6563 %{
 6564     instruction_count(3);
 6565     dst    : S4(write);
 6566     src1   : S3(read);
 6567     src2   : S3(read);
 6568     DECODE : S0(3);     // any 3 decoders
 6569     FPU    : S3(2);
 6570 %}
 6571 
 6572 // Float reg-reg operation
 6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6574 %{
 6575     instruction_count(4);
 6576     dst    : S4(write);
 6577     src1   : S3(read);
 6578     src2   : S3(read);
 6579     src3   : S3(read);
 6580     DECODE : S0(4);     // any 3 decoders
 6581     FPU    : S3(2);
 6582 %}
 6583 
 6584 // Float reg-reg operation
 6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6586 %{
 6587     instruction_count(4);
 6588     dst    : S4(write);
 6589     src1   : S3(read);
 6590     src2   : S3(read);
 6591     src3   : S3(read);
 6592     DECODE : S1(3);     // any 3 decoders
 6593     D0     : S0;        // Big decoder only
 6594     FPU    : S3(2);
 6595     MEM    : S3;
 6596 %}
 6597 
 6598 // Float reg-mem operation
 6599 pipe_class fpu_reg_mem(regD dst, memory mem)
 6600 %{
 6601     instruction_count(2);
 6602     dst    : S5(write);
 6603     mem    : S3(read);
 6604     D0     : S0;        // big decoder only
 6605     DECODE : S1;        // any decoder for FPU POP
 6606     FPU    : S4;
 6607     MEM    : S3;        // any mem
 6608 %}
 6609 
 6610 // Float reg-mem operation
 6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6612 %{
 6613     instruction_count(3);
 6614     dst    : S5(write);
 6615     src1   : S3(read);
 6616     mem    : S3(read);
 6617     D0     : S0;        // big decoder only
 6618     DECODE : S1(2);     // any decoder for FPU POP
 6619     FPU    : S4;
 6620     MEM    : S3;        // any mem
 6621 %}
 6622 
 6623 // Float mem-reg operation
 6624 pipe_class fpu_mem_reg(memory mem, regD src)
 6625 %{
 6626     instruction_count(2);
 6627     src    : S5(read);
 6628     mem    : S3(read);
 6629     DECODE : S0;        // any decoder for FPU PUSH
 6630     D0     : S1;        // big decoder only
 6631     FPU    : S4;
 6632     MEM    : S3;        // any mem
 6633 %}
 6634 
 6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6636 %{
 6637     instruction_count(3);
 6638     src1   : S3(read);
 6639     src2   : S3(read);
 6640     mem    : S3(read);
 6641     DECODE : S0(2);     // any decoder for FPU PUSH
 6642     D0     : S1;        // big decoder only
 6643     FPU    : S4;
 6644     MEM    : S3;        // any mem
 6645 %}
 6646 
 6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6648 %{
 6649     instruction_count(3);
 6650     src1   : S3(read);
 6651     src2   : S3(read);
 6652     mem    : S4(read);
 6653     DECODE : S0;        // any decoder for FPU PUSH
 6654     D0     : S0(2);     // big decoder only
 6655     FPU    : S4;
 6656     MEM    : S3(2);     // any mem
 6657 %}
 6658 
 6659 pipe_class fpu_mem_mem(memory dst, memory src1)
 6660 %{
 6661     instruction_count(2);
 6662     src1   : S3(read);
 6663     dst    : S4(read);
 6664     D0     : S0(2);     // big decoder only
 6665     MEM    : S3(2);     // any mem
 6666 %}
 6667 
 6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6669 %{
 6670     instruction_count(3);
 6671     src1   : S3(read);
 6672     src2   : S3(read);
 6673     dst    : S4(read);
 6674     D0     : S0(3);     // big decoder only
 6675     FPU    : S4;
 6676     MEM    : S3(3);     // any mem
 6677 %}
 6678 
 6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6680 %{
 6681     instruction_count(3);
 6682     src1   : S4(read);
 6683     mem    : S4(read);
 6684     DECODE : S0;        // any decoder for FPU PUSH
 6685     D0     : S0(2);     // big decoder only
 6686     FPU    : S4;
 6687     MEM    : S3(2);     // any mem
 6688 %}
 6689 
 6690 // Float load constant
 6691 pipe_class fpu_reg_con(regD dst)
 6692 %{
 6693     instruction_count(2);
 6694     dst    : S5(write);
 6695     D0     : S0;        // big decoder only for the load
 6696     DECODE : S1;        // any decoder for FPU POP
 6697     FPU    : S4;
 6698     MEM    : S3;        // any mem
 6699 %}
 6700 
 6701 // Float load constant
 6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6703 %{
 6704     instruction_count(3);
 6705     dst    : S5(write);
 6706     src    : S3(read);
 6707     D0     : S0;        // big decoder only for the load
 6708     DECODE : S1(2);     // any decoder for FPU POP
 6709     FPU    : S4;
 6710     MEM    : S3;        // any mem
 6711 %}
 6712 
 6713 // UnConditional branch
 6714 pipe_class pipe_jmp(label labl)
 6715 %{
 6716     single_instruction;
 6717     BR   : S3;
 6718 %}
 6719 
 6720 // Conditional branch
 6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6722 %{
 6723     single_instruction;
 6724     cr    : S1(read);
 6725     BR    : S3;
 6726 %}
 6727 
 6728 // Allocation idiom
 6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6730 %{
 6731     instruction_count(1); force_serialization;
 6732     fixed_latency(6);
 6733     heap_ptr : S3(read);
 6734     DECODE   : S0(3);
 6735     D0       : S2;
 6736     MEM      : S3;
 6737     ALU      : S3(2);
 6738     dst      : S5(write);
 6739     BR       : S5;
 6740 %}
 6741 
 6742 // Generic big/slow expanded idiom
 6743 pipe_class pipe_slow()
 6744 %{
 6745     instruction_count(10); multiple_bundles; force_serialization;
 6746     fixed_latency(100);
 6747     D0  : S0(2);
 6748     MEM : S3(2);
 6749 %}
 6750 
 6751 // The real do-nothing guy
 6752 pipe_class empty()
 6753 %{
 6754     instruction_count(0);
 6755 %}
 6756 
 6757 // Define the class for the Nop node
 6758 define
 6759 %{
 6760    MachNop = empty;
 6761 %}
 6762 
 6763 %}
 6764 
 6765 //----------INSTRUCTIONS-------------------------------------------------------
 6766 //
 6767 // match      -- States which machine-independent subtree may be replaced
 6768 //               by this instruction.
 6769 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6770 //               selection to identify a minimum cost tree of machine
 6771 //               instructions that matches a tree of machine-independent
 6772 //               instructions.
 6773 // format     -- A string providing the disassembly for this instruction.
 6774 //               The value of an instruction's operand may be inserted
 6775 //               by referring to it with a '$' prefix.
 6776 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6777 //               to within an encode class as $primary, $secondary, and $tertiary
 6778 //               rrspectively.  The primary opcode is commonly used to
 6779 //               indicate the type of machine instruction, while secondary
 6780 //               and tertiary are often used for prefix options or addressing
 6781 //               modes.
 6782 // ins_encode -- A list of encode classes with parameters. The encode class
 6783 //               name must have been defined in an 'enc_class' specification
 6784 //               in the encode section of the architecture description.
 6785 
 6786 // ============================================================================
 6787 
 6788 instruct ShouldNotReachHere() %{
 6789   match(Halt);
 6790   format %{ "stop\t# ShouldNotReachHere" %}
 6791   ins_encode %{
 6792     if (is_reachable()) {
 6793       const char* str = __ code_string(_halt_reason);
 6794       __ stop(str);
 6795     }
 6796   %}
 6797   ins_pipe(pipe_slow);
 6798 %}
 6799 
 6800 // ============================================================================
 6801 
 6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6803 // Load Float
 6804 instruct MoveF2VL(vlRegF dst, regF src) %{
 6805   match(Set dst src);
 6806   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6807   ins_encode %{
 6808     ShouldNotReachHere();
 6809   %}
 6810   ins_pipe( fpu_reg_reg );
 6811 %}
 6812 
 6813 // Load Float
 6814 instruct MoveF2LEG(legRegF dst, regF src) %{
 6815   match(Set dst src);
 6816   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6817   ins_encode %{
 6818     ShouldNotReachHere();
 6819   %}
 6820   ins_pipe( fpu_reg_reg );
 6821 %}
 6822 
 6823 // Load Float
 6824 instruct MoveVL2F(regF dst, vlRegF src) %{
 6825   match(Set dst src);
 6826   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6827   ins_encode %{
 6828     ShouldNotReachHere();
 6829   %}
 6830   ins_pipe( fpu_reg_reg );
 6831 %}
 6832 
 6833 // Load Float
 6834 instruct MoveLEG2F(regF dst, legRegF src) %{
 6835   match(Set dst src);
 6836   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6837   ins_encode %{
 6838     ShouldNotReachHere();
 6839   %}
 6840   ins_pipe( fpu_reg_reg );
 6841 %}
 6842 
 6843 // Load Double
 6844 instruct MoveD2VL(vlRegD dst, regD src) %{
 6845   match(Set dst src);
 6846   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6847   ins_encode %{
 6848     ShouldNotReachHere();
 6849   %}
 6850   ins_pipe( fpu_reg_reg );
 6851 %}
 6852 
 6853 // Load Double
 6854 instruct MoveD2LEG(legRegD dst, regD src) %{
 6855   match(Set dst src);
 6856   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6857   ins_encode %{
 6858     ShouldNotReachHere();
 6859   %}
 6860   ins_pipe( fpu_reg_reg );
 6861 %}
 6862 
 6863 // Load Double
 6864 instruct MoveVL2D(regD dst, vlRegD src) %{
 6865   match(Set dst src);
 6866   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6867   ins_encode %{
 6868     ShouldNotReachHere();
 6869   %}
 6870   ins_pipe( fpu_reg_reg );
 6871 %}
 6872 
 6873 // Load Double
 6874 instruct MoveLEG2D(regD dst, legRegD src) %{
 6875   match(Set dst src);
 6876   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6877   ins_encode %{
 6878     ShouldNotReachHere();
 6879   %}
 6880   ins_pipe( fpu_reg_reg );
 6881 %}
 6882 
 6883 //----------Load/Store/Move Instructions---------------------------------------
 6884 //----------Load Instructions--------------------------------------------------
 6885 
 6886 // Load Byte (8 bit signed)
 6887 instruct loadB(rRegI dst, memory mem)
 6888 %{
 6889   match(Set dst (LoadB mem));
 6890 
 6891   ins_cost(125);
 6892   format %{ "movsbl  $dst, $mem\t# byte" %}
 6893 
 6894   ins_encode %{
 6895     __ movsbl($dst$$Register, $mem$$Address);
 6896   %}
 6897 
 6898   ins_pipe(ialu_reg_mem);
 6899 %}
 6900 
 6901 // Load Byte (8 bit signed) into Long Register
 6902 instruct loadB2L(rRegL dst, memory mem)
 6903 %{
 6904   match(Set dst (ConvI2L (LoadB mem)));
 6905 
 6906   ins_cost(125);
 6907   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6908 
 6909   ins_encode %{
 6910     __ movsbq($dst$$Register, $mem$$Address);
 6911   %}
 6912 
 6913   ins_pipe(ialu_reg_mem);
 6914 %}
 6915 
 6916 // Load Unsigned Byte (8 bit UNsigned)
 6917 instruct loadUB(rRegI dst, memory mem)
 6918 %{
 6919   match(Set dst (LoadUB mem));
 6920 
 6921   ins_cost(125);
 6922   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6923 
 6924   ins_encode %{
 6925     __ movzbl($dst$$Register, $mem$$Address);
 6926   %}
 6927 
 6928   ins_pipe(ialu_reg_mem);
 6929 %}
 6930 
 6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6932 instruct loadUB2L(rRegL dst, memory mem)
 6933 %{
 6934   match(Set dst (ConvI2L (LoadUB mem)));
 6935 
 6936   ins_cost(125);
 6937   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6938 
 6939   ins_encode %{
 6940     __ movzbq($dst$$Register, $mem$$Address);
 6941   %}
 6942 
 6943   ins_pipe(ialu_reg_mem);
 6944 %}
 6945 
 6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6948   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6949   effect(KILL cr);
 6950 
 6951   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6952             "andl    $dst, right_n_bits($mask, 8)" %}
 6953   ins_encode %{
 6954     Register Rdst = $dst$$Register;
 6955     __ movzbq(Rdst, $mem$$Address);
 6956     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6957   %}
 6958   ins_pipe(ialu_reg_mem);
 6959 %}
 6960 
 6961 // Load Short (16 bit signed)
 6962 instruct loadS(rRegI dst, memory mem)
 6963 %{
 6964   match(Set dst (LoadS mem));
 6965 
 6966   ins_cost(125);
 6967   format %{ "movswl $dst, $mem\t# short" %}
 6968 
 6969   ins_encode %{
 6970     __ movswl($dst$$Register, $mem$$Address);
 6971   %}
 6972 
 6973   ins_pipe(ialu_reg_mem);
 6974 %}
 6975 
 6976 // Load Short (16 bit signed) to Byte (8 bit signed)
 6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6978   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6979 
 6980   ins_cost(125);
 6981   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6982   ins_encode %{
 6983     __ movsbl($dst$$Register, $mem$$Address);
 6984   %}
 6985   ins_pipe(ialu_reg_mem);
 6986 %}
 6987 
 6988 // Load Short (16 bit signed) into Long Register
 6989 instruct loadS2L(rRegL dst, memory mem)
 6990 %{
 6991   match(Set dst (ConvI2L (LoadS mem)));
 6992 
 6993   ins_cost(125);
 6994   format %{ "movswq $dst, $mem\t# short -> long" %}
 6995 
 6996   ins_encode %{
 6997     __ movswq($dst$$Register, $mem$$Address);
 6998   %}
 6999 
 7000   ins_pipe(ialu_reg_mem);
 7001 %}
 7002 
 7003 // Load Unsigned Short/Char (16 bit UNsigned)
 7004 instruct loadUS(rRegI dst, memory mem)
 7005 %{
 7006   match(Set dst (LoadUS mem));
 7007 
 7008   ins_cost(125);
 7009   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7010 
 7011   ins_encode %{
 7012     __ movzwl($dst$$Register, $mem$$Address);
 7013   %}
 7014 
 7015   ins_pipe(ialu_reg_mem);
 7016 %}
 7017 
 7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7020   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7021 
 7022   ins_cost(125);
 7023   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7024   ins_encode %{
 7025     __ movsbl($dst$$Register, $mem$$Address);
 7026   %}
 7027   ins_pipe(ialu_reg_mem);
 7028 %}
 7029 
 7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7031 instruct loadUS2L(rRegL dst, memory mem)
 7032 %{
 7033   match(Set dst (ConvI2L (LoadUS mem)));
 7034 
 7035   ins_cost(125);
 7036   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7037 
 7038   ins_encode %{
 7039     __ movzwq($dst$$Register, $mem$$Address);
 7040   %}
 7041 
 7042   ins_pipe(ialu_reg_mem);
 7043 %}
 7044 
 7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7047   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7048 
 7049   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7050   ins_encode %{
 7051     __ movzbq($dst$$Register, $mem$$Address);
 7052   %}
 7053   ins_pipe(ialu_reg_mem);
 7054 %}
 7055 
 7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7058   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7059   effect(KILL cr);
 7060 
 7061   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7062             "andl    $dst, right_n_bits($mask, 16)" %}
 7063   ins_encode %{
 7064     Register Rdst = $dst$$Register;
 7065     __ movzwq(Rdst, $mem$$Address);
 7066     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7067   %}
 7068   ins_pipe(ialu_reg_mem);
 7069 %}
 7070 
 7071 // Load Integer
 7072 instruct loadI(rRegI dst, memory mem)
 7073 %{
 7074   match(Set dst (LoadI mem));
 7075 
 7076   ins_cost(125);
 7077   format %{ "movl    $dst, $mem\t# int" %}
 7078 
 7079   ins_encode %{
 7080     __ movl($dst$$Register, $mem$$Address);
 7081   %}
 7082 
 7083   ins_pipe(ialu_reg_mem);
 7084 %}
 7085 
 7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7088   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7089 
 7090   ins_cost(125);
 7091   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7092   ins_encode %{
 7093     __ movsbl($dst$$Register, $mem$$Address);
 7094   %}
 7095   ins_pipe(ialu_reg_mem);
 7096 %}
 7097 
 7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7100   match(Set dst (AndI (LoadI mem) mask));
 7101 
 7102   ins_cost(125);
 7103   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7104   ins_encode %{
 7105     __ movzbl($dst$$Register, $mem$$Address);
 7106   %}
 7107   ins_pipe(ialu_reg_mem);
 7108 %}
 7109 
 7110 // Load Integer (32 bit signed) to Short (16 bit signed)
 7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7112   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7113 
 7114   ins_cost(125);
 7115   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7116   ins_encode %{
 7117     __ movswl($dst$$Register, $mem$$Address);
 7118   %}
 7119   ins_pipe(ialu_reg_mem);
 7120 %}
 7121 
 7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7124   match(Set dst (AndI (LoadI mem) mask));
 7125 
 7126   ins_cost(125);
 7127   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7128   ins_encode %{
 7129     __ movzwl($dst$$Register, $mem$$Address);
 7130   %}
 7131   ins_pipe(ialu_reg_mem);
 7132 %}
 7133 
 7134 // Load Integer into Long Register
 7135 instruct loadI2L(rRegL dst, memory mem)
 7136 %{
 7137   match(Set dst (ConvI2L (LoadI mem)));
 7138 
 7139   ins_cost(125);
 7140   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7141 
 7142   ins_encode %{
 7143     __ movslq($dst$$Register, $mem$$Address);
 7144   %}
 7145 
 7146   ins_pipe(ialu_reg_mem);
 7147 %}
 7148 
 7149 // Load Integer with mask 0xFF into Long Register
 7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7151   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7152 
 7153   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7154   ins_encode %{
 7155     __ movzbq($dst$$Register, $mem$$Address);
 7156   %}
 7157   ins_pipe(ialu_reg_mem);
 7158 %}
 7159 
 7160 // Load Integer with mask 0xFFFF into Long Register
 7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7162   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7163 
 7164   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7165   ins_encode %{
 7166     __ movzwq($dst$$Register, $mem$$Address);
 7167   %}
 7168   ins_pipe(ialu_reg_mem);
 7169 %}
 7170 
 7171 // Load Integer with a 31-bit mask into Long Register
 7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7173   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7174   effect(KILL cr);
 7175 
 7176   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7177             "andl    $dst, $mask" %}
 7178   ins_encode %{
 7179     Register Rdst = $dst$$Register;
 7180     __ movl(Rdst, $mem$$Address);
 7181     __ andl(Rdst, $mask$$constant);
 7182   %}
 7183   ins_pipe(ialu_reg_mem);
 7184 %}
 7185 
 7186 // Load Unsigned Integer into Long Register
 7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7188 %{
 7189   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7190 
 7191   ins_cost(125);
 7192   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7193 
 7194   ins_encode %{
 7195     __ movl($dst$$Register, $mem$$Address);
 7196   %}
 7197 
 7198   ins_pipe(ialu_reg_mem);
 7199 %}
 7200 
 7201 // Load Long
 7202 instruct loadL(rRegL dst, memory mem)
 7203 %{
 7204   match(Set dst (LoadL mem));
 7205 
 7206   ins_cost(125);
 7207   format %{ "movq    $dst, $mem\t# long" %}
 7208 
 7209   ins_encode %{
 7210     __ movq($dst$$Register, $mem$$Address);
 7211   %}
 7212 
 7213   ins_pipe(ialu_reg_mem); // XXX
 7214 %}
 7215 
 7216 // Load Range
 7217 instruct loadRange(rRegI dst, memory mem)
 7218 %{
 7219   match(Set dst (LoadRange mem));
 7220 
 7221   ins_cost(125); // XXX
 7222   format %{ "movl    $dst, $mem\t# range" %}
 7223   ins_encode %{
 7224     __ movl($dst$$Register, $mem$$Address);
 7225   %}
 7226   ins_pipe(ialu_reg_mem);
 7227 %}
 7228 
 7229 // Load Pointer
 7230 instruct loadP(rRegP dst, memory mem)
 7231 %{
 7232   match(Set dst (LoadP mem));
 7233   predicate(n->as_Load()->barrier_data() == 0);
 7234 
 7235   ins_cost(125); // XXX
 7236   format %{ "movq    $dst, $mem\t# ptr" %}
 7237   ins_encode %{
 7238     __ movq($dst$$Register, $mem$$Address);
 7239   %}
 7240   ins_pipe(ialu_reg_mem); // XXX
 7241 %}
 7242 
 7243 // Load Compressed Pointer
 7244 instruct loadN(rRegN dst, memory mem)
 7245 %{
 7246    predicate(n->as_Load()->barrier_data() == 0);
 7247    match(Set dst (LoadN mem));
 7248 
 7249    ins_cost(125); // XXX
 7250    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7251    ins_encode %{
 7252      __ movl($dst$$Register, $mem$$Address);
 7253    %}
 7254    ins_pipe(ialu_reg_mem); // XXX
 7255 %}
 7256 
 7257 
 7258 // Load Klass Pointer
 7259 instruct loadKlass(rRegP dst, memory mem)
 7260 %{
 7261   match(Set dst (LoadKlass mem));
 7262 
 7263   ins_cost(125); // XXX
 7264   format %{ "movq    $dst, $mem\t# class" %}
 7265   ins_encode %{
 7266     __ movq($dst$$Register, $mem$$Address);
 7267   %}
 7268   ins_pipe(ialu_reg_mem); // XXX
 7269 %}
 7270 
 7271 // Load narrow Klass Pointer
 7272 instruct loadNKlass(rRegN dst, memory mem)
 7273 %{
 7274   predicate(!UseCompactObjectHeaders);
 7275   match(Set dst (LoadNKlass mem));
 7276 
 7277   ins_cost(125); // XXX
 7278   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7279   ins_encode %{
 7280     __ movl($dst$$Register, $mem$$Address);
 7281   %}
 7282   ins_pipe(ialu_reg_mem); // XXX
 7283 %}
 7284 
 7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7286 %{
 7287   predicate(UseCompactObjectHeaders);
 7288   match(Set dst (LoadNKlass mem));
 7289   effect(KILL cr);
 7290   ins_cost(125);
 7291   format %{
 7292     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7293     "shrl    $dst, markWord::klass_shift"
 7294   %}
 7295   ins_encode %{
 7296     // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
 7297     // obj-start, so that we can load from the object's mark-word instead.
 7298     Register d = $dst$$Register;
 7299     Address  s = ($mem$$Address).plus_disp(-Type::klass_offset());
 7300     if (UseAPX) {
 7301       __ eshrl(d, s, markWord::klass_shift, false);
 7302     } else {
 7303       __ movl(d, s);
 7304       __ shrl(d, markWord::klass_shift);
 7305     }
 7306   %}
 7307   ins_pipe(ialu_reg_mem);
 7308 %}
 7309 
 7310 // Load Float
 7311 instruct loadF(regF dst, memory mem)
 7312 %{
 7313   match(Set dst (LoadF mem));
 7314 
 7315   ins_cost(145); // XXX
 7316   format %{ "movss   $dst, $mem\t# float" %}
 7317   ins_encode %{
 7318     __ movflt($dst$$XMMRegister, $mem$$Address);
 7319   %}
 7320   ins_pipe(pipe_slow); // XXX
 7321 %}
 7322 
 7323 // Load Double
 7324 instruct loadD_partial(regD dst, memory mem)
 7325 %{
 7326   predicate(!UseXmmLoadAndClearUpper);
 7327   match(Set dst (LoadD mem));
 7328 
 7329   ins_cost(145); // XXX
 7330   format %{ "movlpd  $dst, $mem\t# double" %}
 7331   ins_encode %{
 7332     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7333   %}
 7334   ins_pipe(pipe_slow); // XXX
 7335 %}
 7336 
 7337 instruct loadD(regD dst, memory mem)
 7338 %{
 7339   predicate(UseXmmLoadAndClearUpper);
 7340   match(Set dst (LoadD mem));
 7341 
 7342   ins_cost(145); // XXX
 7343   format %{ "movsd   $dst, $mem\t# double" %}
 7344   ins_encode %{
 7345     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7346   %}
 7347   ins_pipe(pipe_slow); // XXX
 7348 %}
 7349 
 7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7351 %{
 7352   match(Set dst con);
 7353 
 7354   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7355 
 7356   ins_encode %{
 7357     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7358   %}
 7359 
 7360   ins_pipe(ialu_reg_fat);
 7361 %}
 7362 
 7363 // max = java.lang.Math.max(float a, float b)
 7364 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7365   predicate(VM_Version::supports_avx10_2());
 7366   match(Set dst (MaxF a b));
 7367   format %{ "maxF $dst, $a, $b" %}
 7368   ins_encode %{
 7369     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7370   %}
 7371   ins_pipe( pipe_slow );
 7372 %}
 7373 
 7374 // max = java.lang.Math.max(float a, float b)
 7375 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7376   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7377   match(Set dst (MaxF a b));
 7378   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7379   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7380   ins_encode %{
 7381     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7382   %}
 7383   ins_pipe( pipe_slow );
 7384 %}
 7385 
 7386 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7387   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7388   match(Set dst (MaxF a b));
 7389   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7390 
 7391   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7392   ins_encode %{
 7393     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7394                     false /*min*/, true /*single*/);
 7395   %}
 7396   ins_pipe( pipe_slow );
 7397 %}
 7398 
 7399 // max = java.lang.Math.max(double a, double b)
 7400 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7401   predicate(VM_Version::supports_avx10_2());
 7402   match(Set dst (MaxD a b));
 7403   format %{ "maxD $dst, $a, $b" %}
 7404   ins_encode %{
 7405     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7406   %}
 7407   ins_pipe( pipe_slow );
 7408 %}
 7409 
 7410 // max = java.lang.Math.max(double a, double b)
 7411 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7412   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7413   match(Set dst (MaxD a b));
 7414   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7415   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7416   ins_encode %{
 7417     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7418   %}
 7419   ins_pipe( pipe_slow );
 7420 %}
 7421 
 7422 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7423   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7424   match(Set dst (MaxD a b));
 7425   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7426 
 7427   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7428   ins_encode %{
 7429     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7430                     false /*min*/, false /*single*/);
 7431   %}
 7432   ins_pipe( pipe_slow );
 7433 %}
 7434 
 7435 // max = java.lang.Math.min(float a, float b)
 7436 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7437   predicate(VM_Version::supports_avx10_2());
 7438   match(Set dst (MinF a b));
 7439   format %{ "minF $dst, $a, $b" %}
 7440   ins_encode %{
 7441     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7442   %}
 7443   ins_pipe( pipe_slow );
 7444 %}
 7445 
 7446 // min = java.lang.Math.min(float a, float b)
 7447 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7448   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7449   match(Set dst (MinF a b));
 7450   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7451   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7452   ins_encode %{
 7453     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7454   %}
 7455   ins_pipe( pipe_slow );
 7456 %}
 7457 
 7458 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7459   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7460   match(Set dst (MinF a b));
 7461   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7462 
 7463   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7464   ins_encode %{
 7465     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7466                     true /*min*/, true /*single*/);
 7467   %}
 7468   ins_pipe( pipe_slow );
 7469 %}
 7470 
 7471 // max = java.lang.Math.min(double a, double b)
 7472 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7473   predicate(VM_Version::supports_avx10_2());
 7474   match(Set dst (MinD a b));
 7475   format %{ "minD $dst, $a, $b" %}
 7476   ins_encode %{
 7477     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7478   %}
 7479   ins_pipe( pipe_slow );
 7480 %}
 7481 
 7482 // min = java.lang.Math.min(double a, double b)
 7483 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7484   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7485   match(Set dst (MinD a b));
 7486   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7487     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7488   ins_encode %{
 7489     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7490   %}
 7491   ins_pipe( pipe_slow );
 7492 %}
 7493 
 7494 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7495   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7496   match(Set dst (MinD a b));
 7497   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7498 
 7499   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7500   ins_encode %{
 7501     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7502                     true /*min*/, false /*single*/);
 7503   %}
 7504   ins_pipe( pipe_slow );
 7505 %}
 7506 
 7507 // Load Effective Address
 7508 instruct leaP8(rRegP dst, indOffset8 mem)
 7509 %{
 7510   match(Set dst mem);
 7511 
 7512   ins_cost(110); // XXX
 7513   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7514   ins_encode %{
 7515     __ leaq($dst$$Register, $mem$$Address);
 7516   %}
 7517   ins_pipe(ialu_reg_reg_fat);
 7518 %}
 7519 
 7520 instruct leaP32(rRegP dst, indOffset32 mem)
 7521 %{
 7522   match(Set dst mem);
 7523 
 7524   ins_cost(110);
 7525   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7526   ins_encode %{
 7527     __ leaq($dst$$Register, $mem$$Address);
 7528   %}
 7529   ins_pipe(ialu_reg_reg_fat);
 7530 %}
 7531 
 7532 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7533 %{
 7534   match(Set dst mem);
 7535 
 7536   ins_cost(110);
 7537   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7538   ins_encode %{
 7539     __ leaq($dst$$Register, $mem$$Address);
 7540   %}
 7541   ins_pipe(ialu_reg_reg_fat);
 7542 %}
 7543 
 7544 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7545 %{
 7546   match(Set dst mem);
 7547 
 7548   ins_cost(110);
 7549   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7550   ins_encode %{
 7551     __ leaq($dst$$Register, $mem$$Address);
 7552   %}
 7553   ins_pipe(ialu_reg_reg_fat);
 7554 %}
 7555 
 7556 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7557 %{
 7558   match(Set dst mem);
 7559 
 7560   ins_cost(110);
 7561   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7562   ins_encode %{
 7563     __ leaq($dst$$Register, $mem$$Address);
 7564   %}
 7565   ins_pipe(ialu_reg_reg_fat);
 7566 %}
 7567 
 7568 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7569 %{
 7570   match(Set dst mem);
 7571 
 7572   ins_cost(110);
 7573   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7574   ins_encode %{
 7575     __ leaq($dst$$Register, $mem$$Address);
 7576   %}
 7577   ins_pipe(ialu_reg_reg_fat);
 7578 %}
 7579 
 7580 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7581 %{
 7582   match(Set dst mem);
 7583 
 7584   ins_cost(110);
 7585   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7586   ins_encode %{
 7587     __ leaq($dst$$Register, $mem$$Address);
 7588   %}
 7589   ins_pipe(ialu_reg_reg_fat);
 7590 %}
 7591 
 7592 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7593 %{
 7594   match(Set dst mem);
 7595 
 7596   ins_cost(110);
 7597   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7598   ins_encode %{
 7599     __ leaq($dst$$Register, $mem$$Address);
 7600   %}
 7601   ins_pipe(ialu_reg_reg_fat);
 7602 %}
 7603 
 7604 // Load Effective Address which uses Narrow (32-bits) oop
 7605 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7606 %{
 7607   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7608   match(Set dst mem);
 7609 
 7610   ins_cost(110);
 7611   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7612   ins_encode %{
 7613     __ leaq($dst$$Register, $mem$$Address);
 7614   %}
 7615   ins_pipe(ialu_reg_reg_fat);
 7616 %}
 7617 
 7618 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7619 %{
 7620   predicate(CompressedOops::shift() == 0);
 7621   match(Set dst mem);
 7622 
 7623   ins_cost(110); // XXX
 7624   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7625   ins_encode %{
 7626     __ leaq($dst$$Register, $mem$$Address);
 7627   %}
 7628   ins_pipe(ialu_reg_reg_fat);
 7629 %}
 7630 
 7631 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7632 %{
 7633   predicate(CompressedOops::shift() == 0);
 7634   match(Set dst mem);
 7635 
 7636   ins_cost(110);
 7637   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7638   ins_encode %{
 7639     __ leaq($dst$$Register, $mem$$Address);
 7640   %}
 7641   ins_pipe(ialu_reg_reg_fat);
 7642 %}
 7643 
 7644 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7645 %{
 7646   predicate(CompressedOops::shift() == 0);
 7647   match(Set dst mem);
 7648 
 7649   ins_cost(110);
 7650   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7651   ins_encode %{
 7652     __ leaq($dst$$Register, $mem$$Address);
 7653   %}
 7654   ins_pipe(ialu_reg_reg_fat);
 7655 %}
 7656 
 7657 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7658 %{
 7659   predicate(CompressedOops::shift() == 0);
 7660   match(Set dst mem);
 7661 
 7662   ins_cost(110);
 7663   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7664   ins_encode %{
 7665     __ leaq($dst$$Register, $mem$$Address);
 7666   %}
 7667   ins_pipe(ialu_reg_reg_fat);
 7668 %}
 7669 
 7670 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7671 %{
 7672   predicate(CompressedOops::shift() == 0);
 7673   match(Set dst mem);
 7674 
 7675   ins_cost(110);
 7676   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7677   ins_encode %{
 7678     __ leaq($dst$$Register, $mem$$Address);
 7679   %}
 7680   ins_pipe(ialu_reg_reg_fat);
 7681 %}
 7682 
 7683 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7684 %{
 7685   predicate(CompressedOops::shift() == 0);
 7686   match(Set dst mem);
 7687 
 7688   ins_cost(110);
 7689   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7690   ins_encode %{
 7691     __ leaq($dst$$Register, $mem$$Address);
 7692   %}
 7693   ins_pipe(ialu_reg_reg_fat);
 7694 %}
 7695 
 7696 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7697 %{
 7698   predicate(CompressedOops::shift() == 0);
 7699   match(Set dst mem);
 7700 
 7701   ins_cost(110);
 7702   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7703   ins_encode %{
 7704     __ leaq($dst$$Register, $mem$$Address);
 7705   %}
 7706   ins_pipe(ialu_reg_reg_fat);
 7707 %}
 7708 
 7709 instruct loadConI(rRegI dst, immI src)
 7710 %{
 7711   match(Set dst src);
 7712 
 7713   format %{ "movl    $dst, $src\t# int" %}
 7714   ins_encode %{
 7715     __ movl($dst$$Register, $src$$constant);
 7716   %}
 7717   ins_pipe(ialu_reg_fat); // XXX
 7718 %}
 7719 
 7720 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7721 %{
 7722   match(Set dst src);
 7723   effect(KILL cr);
 7724 
 7725   ins_cost(50);
 7726   format %{ "xorl    $dst, $dst\t# int" %}
 7727   ins_encode %{
 7728     __ xorl($dst$$Register, $dst$$Register);
 7729   %}
 7730   ins_pipe(ialu_reg);
 7731 %}
 7732 
 7733 instruct loadConL(rRegL dst, immL src)
 7734 %{
 7735   match(Set dst src);
 7736 
 7737   ins_cost(150);
 7738   format %{ "movq    $dst, $src\t# long" %}
 7739   ins_encode %{
 7740     __ mov64($dst$$Register, $src$$constant);
 7741   %}
 7742   ins_pipe(ialu_reg);
 7743 %}
 7744 
 7745 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7746 %{
 7747   match(Set dst src);
 7748   effect(KILL cr);
 7749 
 7750   ins_cost(50);
 7751   format %{ "xorl    $dst, $dst\t# long" %}
 7752   ins_encode %{
 7753     __ xorl($dst$$Register, $dst$$Register);
 7754   %}
 7755   ins_pipe(ialu_reg); // XXX
 7756 %}
 7757 
 7758 instruct loadConUL32(rRegL dst, immUL32 src)
 7759 %{
 7760   match(Set dst src);
 7761 
 7762   ins_cost(60);
 7763   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7764   ins_encode %{
 7765     __ movl($dst$$Register, $src$$constant);
 7766   %}
 7767   ins_pipe(ialu_reg);
 7768 %}
 7769 
 7770 instruct loadConL32(rRegL dst, immL32 src)
 7771 %{
 7772   match(Set dst src);
 7773 
 7774   ins_cost(70);
 7775   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7776   ins_encode %{
 7777     __ movq($dst$$Register, $src$$constant);
 7778   %}
 7779   ins_pipe(ialu_reg);
 7780 %}
 7781 
 7782 instruct loadConP(rRegP dst, immP con) %{
 7783   match(Set dst con);
 7784 
 7785   format %{ "movq    $dst, $con\t# ptr" %}
 7786   ins_encode %{
 7787     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7788   %}
 7789   ins_pipe(ialu_reg_fat); // XXX
 7790 %}
 7791 
 7792 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7793 %{
 7794   match(Set dst src);
 7795   effect(KILL cr);
 7796 
 7797   ins_cost(50);
 7798   format %{ "xorl    $dst, $dst\t# ptr" %}
 7799   ins_encode %{
 7800     __ xorl($dst$$Register, $dst$$Register);
 7801   %}
 7802   ins_pipe(ialu_reg);
 7803 %}
 7804 
 7805 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7806 %{
 7807   match(Set dst src);
 7808   effect(KILL cr);
 7809 
 7810   ins_cost(60);
 7811   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7812   ins_encode %{
 7813     __ movl($dst$$Register, $src$$constant);
 7814   %}
 7815   ins_pipe(ialu_reg);
 7816 %}
 7817 
 7818 instruct loadConF(regF dst, immF con) %{
 7819   match(Set dst con);
 7820   ins_cost(125);
 7821   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7822   ins_encode %{
 7823     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7824   %}
 7825   ins_pipe(pipe_slow);
 7826 %}
 7827 
 7828 instruct loadConH(regF dst, immH con) %{
 7829   match(Set dst con);
 7830   ins_cost(125);
 7831   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7832   ins_encode %{
 7833     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7834   %}
 7835   ins_pipe(pipe_slow);
 7836 %}
 7837 
 7838 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7839   match(Set dst src);
 7840   effect(KILL cr);
 7841   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7842   ins_encode %{
 7843     __ xorq($dst$$Register, $dst$$Register);
 7844   %}
 7845   ins_pipe(ialu_reg);
 7846 %}
 7847 
 7848 instruct loadConN(rRegN dst, immN src) %{
 7849   match(Set dst src);
 7850 
 7851   ins_cost(125);
 7852   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7853   ins_encode %{
 7854     address con = (address)$src$$constant;
 7855     if (con == nullptr) {
 7856       ShouldNotReachHere();
 7857     } else {
 7858       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7859     }
 7860   %}
 7861   ins_pipe(ialu_reg_fat); // XXX
 7862 %}
 7863 
 7864 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7865   match(Set dst src);
 7866 
 7867   ins_cost(125);
 7868   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7869   ins_encode %{
 7870     address con = (address)$src$$constant;
 7871     if (con == nullptr) {
 7872       ShouldNotReachHere();
 7873     } else {
 7874       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7875     }
 7876   %}
 7877   ins_pipe(ialu_reg_fat); // XXX
 7878 %}
 7879 
 7880 instruct loadConF0(regF dst, immF0 src)
 7881 %{
 7882   match(Set dst src);
 7883   ins_cost(100);
 7884 
 7885   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7886   ins_encode %{
 7887     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7888   %}
 7889   ins_pipe(pipe_slow);
 7890 %}
 7891 
 7892 // Use the same format since predicate() can not be used here.
 7893 instruct loadConD(regD dst, immD con) %{
 7894   match(Set dst con);
 7895   ins_cost(125);
 7896   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7897   ins_encode %{
 7898     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7899   %}
 7900   ins_pipe(pipe_slow);
 7901 %}
 7902 
 7903 instruct loadConD0(regD dst, immD0 src)
 7904 %{
 7905   match(Set dst src);
 7906   ins_cost(100);
 7907 
 7908   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7909   ins_encode %{
 7910     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7911   %}
 7912   ins_pipe(pipe_slow);
 7913 %}
 7914 
 7915 instruct loadSSI(rRegI dst, stackSlotI src)
 7916 %{
 7917   match(Set dst src);
 7918 
 7919   ins_cost(125);
 7920   format %{ "movl    $dst, $src\t# int stk" %}
 7921   ins_encode %{
 7922     __ movl($dst$$Register, $src$$Address);
 7923   %}
 7924   ins_pipe(ialu_reg_mem);
 7925 %}
 7926 
 7927 instruct loadSSL(rRegL dst, stackSlotL src)
 7928 %{
 7929   match(Set dst src);
 7930 
 7931   ins_cost(125);
 7932   format %{ "movq    $dst, $src\t# long stk" %}
 7933   ins_encode %{
 7934     __ movq($dst$$Register, $src$$Address);
 7935   %}
 7936   ins_pipe(ialu_reg_mem);
 7937 %}
 7938 
 7939 instruct loadSSP(rRegP dst, stackSlotP src)
 7940 %{
 7941   match(Set dst src);
 7942 
 7943   ins_cost(125);
 7944   format %{ "movq    $dst, $src\t# ptr stk" %}
 7945   ins_encode %{
 7946     __ movq($dst$$Register, $src$$Address);
 7947   %}
 7948   ins_pipe(ialu_reg_mem);
 7949 %}
 7950 
 7951 instruct loadSSF(regF dst, stackSlotF src)
 7952 %{
 7953   match(Set dst src);
 7954 
 7955   ins_cost(125);
 7956   format %{ "movss   $dst, $src\t# float stk" %}
 7957   ins_encode %{
 7958     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7959   %}
 7960   ins_pipe(pipe_slow); // XXX
 7961 %}
 7962 
 7963 // Use the same format since predicate() can not be used here.
 7964 instruct loadSSD(regD dst, stackSlotD src)
 7965 %{
 7966   match(Set dst src);
 7967 
 7968   ins_cost(125);
 7969   format %{ "movsd   $dst, $src\t# double stk" %}
 7970   ins_encode  %{
 7971     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7972   %}
 7973   ins_pipe(pipe_slow); // XXX
 7974 %}
 7975 
 7976 // Prefetch instructions for allocation.
 7977 // Must be safe to execute with invalid address (cannot fault).
 7978 
 7979 instruct prefetchAlloc( memory mem ) %{
 7980   predicate(AllocatePrefetchInstr==3);
 7981   match(PrefetchAllocation mem);
 7982   ins_cost(125);
 7983 
 7984   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7985   ins_encode %{
 7986     __ prefetchw($mem$$Address);
 7987   %}
 7988   ins_pipe(ialu_mem);
 7989 %}
 7990 
 7991 instruct prefetchAllocNTA( memory mem ) %{
 7992   predicate(AllocatePrefetchInstr==0);
 7993   match(PrefetchAllocation mem);
 7994   ins_cost(125);
 7995 
 7996   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7997   ins_encode %{
 7998     __ prefetchnta($mem$$Address);
 7999   %}
 8000   ins_pipe(ialu_mem);
 8001 %}
 8002 
 8003 instruct prefetchAllocT0( memory mem ) %{
 8004   predicate(AllocatePrefetchInstr==1);
 8005   match(PrefetchAllocation mem);
 8006   ins_cost(125);
 8007 
 8008   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8009   ins_encode %{
 8010     __ prefetcht0($mem$$Address);
 8011   %}
 8012   ins_pipe(ialu_mem);
 8013 %}
 8014 
 8015 instruct prefetchAllocT2( memory mem ) %{
 8016   predicate(AllocatePrefetchInstr==2);
 8017   match(PrefetchAllocation mem);
 8018   ins_cost(125);
 8019 
 8020   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8021   ins_encode %{
 8022     __ prefetcht2($mem$$Address);
 8023   %}
 8024   ins_pipe(ialu_mem);
 8025 %}
 8026 
 8027 //----------Store Instructions-------------------------------------------------
 8028 
 8029 // Store Byte
 8030 instruct storeB(memory mem, rRegI src)
 8031 %{
 8032   match(Set mem (StoreB mem src));
 8033 
 8034   ins_cost(125); // XXX
 8035   format %{ "movb    $mem, $src\t# byte" %}
 8036   ins_encode %{
 8037     __ movb($mem$$Address, $src$$Register);
 8038   %}
 8039   ins_pipe(ialu_mem_reg);
 8040 %}
 8041 
 8042 // Store Char/Short
 8043 instruct storeC(memory mem, rRegI src)
 8044 %{
 8045   match(Set mem (StoreC mem src));
 8046 
 8047   ins_cost(125); // XXX
 8048   format %{ "movw    $mem, $src\t# char/short" %}
 8049   ins_encode %{
 8050     __ movw($mem$$Address, $src$$Register);
 8051   %}
 8052   ins_pipe(ialu_mem_reg);
 8053 %}
 8054 
 8055 // Store Integer
 8056 instruct storeI(memory mem, rRegI src)
 8057 %{
 8058   match(Set mem (StoreI mem src));
 8059 
 8060   ins_cost(125); // XXX
 8061   format %{ "movl    $mem, $src\t# int" %}
 8062   ins_encode %{
 8063     __ movl($mem$$Address, $src$$Register);
 8064   %}
 8065   ins_pipe(ialu_mem_reg);
 8066 %}
 8067 
 8068 // Store Long
 8069 instruct storeL(memory mem, rRegL src)
 8070 %{
 8071   match(Set mem (StoreL mem src));
 8072 
 8073   ins_cost(125); // XXX
 8074   format %{ "movq    $mem, $src\t# long" %}
 8075   ins_encode %{
 8076     __ movq($mem$$Address, $src$$Register);
 8077   %}
 8078   ins_pipe(ialu_mem_reg); // XXX
 8079 %}
 8080 
 8081 // Store Pointer
 8082 instruct storeP(memory mem, any_RegP src)
 8083 %{
 8084   predicate(n->as_Store()->barrier_data() == 0);
 8085   match(Set mem (StoreP mem src));
 8086 
 8087   ins_cost(125); // XXX
 8088   format %{ "movq    $mem, $src\t# ptr" %}
 8089   ins_encode %{
 8090     __ movq($mem$$Address, $src$$Register);
 8091   %}
 8092   ins_pipe(ialu_mem_reg);
 8093 %}
 8094 
 8095 instruct storeImmP0(memory mem, immP0 zero)
 8096 %{
 8097   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8098   match(Set mem (StoreP mem zero));
 8099 
 8100   ins_cost(125); // XXX
 8101   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8102   ins_encode %{
 8103     __ movq($mem$$Address, r12);
 8104   %}
 8105   ins_pipe(ialu_mem_reg);
 8106 %}
 8107 
 8108 // Store Null Pointer, mark word, or other simple pointer constant.
 8109 instruct storeImmP(memory mem, immP31 src)
 8110 %{
 8111   predicate(n->as_Store()->barrier_data() == 0);
 8112   match(Set mem (StoreP mem src));
 8113 
 8114   ins_cost(150); // XXX
 8115   format %{ "movq    $mem, $src\t# ptr" %}
 8116   ins_encode %{
 8117     __ movq($mem$$Address, $src$$constant);
 8118   %}
 8119   ins_pipe(ialu_mem_imm);
 8120 %}
 8121 
 8122 // Store Compressed Pointer
 8123 instruct storeN(memory mem, rRegN src)
 8124 %{
 8125   predicate(n->as_Store()->barrier_data() == 0);
 8126   match(Set mem (StoreN mem src));
 8127 
 8128   ins_cost(125); // XXX
 8129   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8130   ins_encode %{
 8131     __ movl($mem$$Address, $src$$Register);
 8132   %}
 8133   ins_pipe(ialu_mem_reg);
 8134 %}
 8135 
 8136 instruct storeNKlass(memory mem, rRegN src)
 8137 %{
 8138   match(Set mem (StoreNKlass mem src));
 8139 
 8140   ins_cost(125); // XXX
 8141   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8142   ins_encode %{
 8143     __ movl($mem$$Address, $src$$Register);
 8144   %}
 8145   ins_pipe(ialu_mem_reg);
 8146 %}
 8147 
 8148 instruct storeImmN0(memory mem, immN0 zero)
 8149 %{
 8150   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8151   match(Set mem (StoreN mem zero));
 8152 
 8153   ins_cost(125); // XXX
 8154   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8155   ins_encode %{
 8156     __ movl($mem$$Address, r12);
 8157   %}
 8158   ins_pipe(ialu_mem_reg);
 8159 %}
 8160 
 8161 instruct storeImmN(memory mem, immN src)
 8162 %{
 8163   predicate(n->as_Store()->barrier_data() == 0);
 8164   match(Set mem (StoreN mem src));
 8165 
 8166   ins_cost(150); // XXX
 8167   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8168   ins_encode %{
 8169     address con = (address)$src$$constant;
 8170     if (con == nullptr) {
 8171       __ movl($mem$$Address, 0);
 8172     } else {
 8173       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8174     }
 8175   %}
 8176   ins_pipe(ialu_mem_imm);
 8177 %}
 8178 
 8179 instruct storeImmNKlass(memory mem, immNKlass src)
 8180 %{
 8181   match(Set mem (StoreNKlass mem src));
 8182 
 8183   ins_cost(150); // XXX
 8184   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8185   ins_encode %{
 8186     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8187   %}
 8188   ins_pipe(ialu_mem_imm);
 8189 %}
 8190 
 8191 // Store Integer Immediate
 8192 instruct storeImmI0(memory mem, immI_0 zero)
 8193 %{
 8194   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8195   match(Set mem (StoreI mem zero));
 8196 
 8197   ins_cost(125); // XXX
 8198   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8199   ins_encode %{
 8200     __ movl($mem$$Address, r12);
 8201   %}
 8202   ins_pipe(ialu_mem_reg);
 8203 %}
 8204 
 8205 instruct storeImmI(memory mem, immI src)
 8206 %{
 8207   match(Set mem (StoreI mem src));
 8208 
 8209   ins_cost(150);
 8210   format %{ "movl    $mem, $src\t# int" %}
 8211   ins_encode %{
 8212     __ movl($mem$$Address, $src$$constant);
 8213   %}
 8214   ins_pipe(ialu_mem_imm);
 8215 %}
 8216 
 8217 // Store Long Immediate
 8218 instruct storeImmL0(memory mem, immL0 zero)
 8219 %{
 8220   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8221   match(Set mem (StoreL mem zero));
 8222 
 8223   ins_cost(125); // XXX
 8224   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8225   ins_encode %{
 8226     __ movq($mem$$Address, r12);
 8227   %}
 8228   ins_pipe(ialu_mem_reg);
 8229 %}
 8230 
 8231 instruct storeImmL(memory mem, immL32 src)
 8232 %{
 8233   match(Set mem (StoreL mem src));
 8234 
 8235   ins_cost(150);
 8236   format %{ "movq    $mem, $src\t# long" %}
 8237   ins_encode %{
 8238     __ movq($mem$$Address, $src$$constant);
 8239   %}
 8240   ins_pipe(ialu_mem_imm);
 8241 %}
 8242 
 8243 // Store Short/Char Immediate
 8244 instruct storeImmC0(memory mem, immI_0 zero)
 8245 %{
 8246   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8247   match(Set mem (StoreC mem zero));
 8248 
 8249   ins_cost(125); // XXX
 8250   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8251   ins_encode %{
 8252     __ movw($mem$$Address, r12);
 8253   %}
 8254   ins_pipe(ialu_mem_reg);
 8255 %}
 8256 
 8257 instruct storeImmI16(memory mem, immI16 src)
 8258 %{
 8259   predicate(UseStoreImmI16);
 8260   match(Set mem (StoreC mem src));
 8261 
 8262   ins_cost(150);
 8263   format %{ "movw    $mem, $src\t# short/char" %}
 8264   ins_encode %{
 8265     __ movw($mem$$Address, $src$$constant);
 8266   %}
 8267   ins_pipe(ialu_mem_imm);
 8268 %}
 8269 
 8270 // Store Byte Immediate
 8271 instruct storeImmB0(memory mem, immI_0 zero)
 8272 %{
 8273   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8274   match(Set mem (StoreB mem zero));
 8275 
 8276   ins_cost(125); // XXX
 8277   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8278   ins_encode %{
 8279     __ movb($mem$$Address, r12);
 8280   %}
 8281   ins_pipe(ialu_mem_reg);
 8282 %}
 8283 
 8284 instruct storeImmB(memory mem, immI8 src)
 8285 %{
 8286   match(Set mem (StoreB mem src));
 8287 
 8288   ins_cost(150); // XXX
 8289   format %{ "movb    $mem, $src\t# byte" %}
 8290   ins_encode %{
 8291     __ movb($mem$$Address, $src$$constant);
 8292   %}
 8293   ins_pipe(ialu_mem_imm);
 8294 %}
 8295 
 8296 // Store Float
 8297 instruct storeF(memory mem, regF src)
 8298 %{
 8299   match(Set mem (StoreF mem src));
 8300 
 8301   ins_cost(95); // XXX
 8302   format %{ "movss   $mem, $src\t# float" %}
 8303   ins_encode %{
 8304     __ movflt($mem$$Address, $src$$XMMRegister);
 8305   %}
 8306   ins_pipe(pipe_slow); // XXX
 8307 %}
 8308 
 8309 // Store immediate Float value (it is faster than store from XMM register)
 8310 instruct storeF0(memory mem, immF0 zero)
 8311 %{
 8312   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8313   match(Set mem (StoreF mem zero));
 8314 
 8315   ins_cost(25); // XXX
 8316   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8317   ins_encode %{
 8318     __ movl($mem$$Address, r12);
 8319   %}
 8320   ins_pipe(ialu_mem_reg);
 8321 %}
 8322 
 8323 instruct storeF_imm(memory mem, immF src)
 8324 %{
 8325   match(Set mem (StoreF mem src));
 8326 
 8327   ins_cost(50);
 8328   format %{ "movl    $mem, $src\t# float" %}
 8329   ins_encode %{
 8330     __ movl($mem$$Address, jint_cast($src$$constant));
 8331   %}
 8332   ins_pipe(ialu_mem_imm);
 8333 %}
 8334 
 8335 // Store Double
 8336 instruct storeD(memory mem, regD src)
 8337 %{
 8338   match(Set mem (StoreD mem src));
 8339 
 8340   ins_cost(95); // XXX
 8341   format %{ "movsd   $mem, $src\t# double" %}
 8342   ins_encode %{
 8343     __ movdbl($mem$$Address, $src$$XMMRegister);
 8344   %}
 8345   ins_pipe(pipe_slow); // XXX
 8346 %}
 8347 
 8348 // Store immediate double 0.0 (it is faster than store from XMM register)
 8349 instruct storeD0_imm(memory mem, immD0 src)
 8350 %{
 8351   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8352   match(Set mem (StoreD mem src));
 8353 
 8354   ins_cost(50);
 8355   format %{ "movq    $mem, $src\t# double 0." %}
 8356   ins_encode %{
 8357     __ movq($mem$$Address, $src$$constant);
 8358   %}
 8359   ins_pipe(ialu_mem_imm);
 8360 %}
 8361 
 8362 instruct storeD0(memory mem, immD0 zero)
 8363 %{
 8364   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8365   match(Set mem (StoreD mem zero));
 8366 
 8367   ins_cost(25); // XXX
 8368   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8369   ins_encode %{
 8370     __ movq($mem$$Address, r12);
 8371   %}
 8372   ins_pipe(ialu_mem_reg);
 8373 %}
 8374 
 8375 instruct storeSSI(stackSlotI dst, rRegI src)
 8376 %{
 8377   match(Set dst src);
 8378 
 8379   ins_cost(100);
 8380   format %{ "movl    $dst, $src\t# int stk" %}
 8381   ins_encode %{
 8382     __ movl($dst$$Address, $src$$Register);
 8383   %}
 8384   ins_pipe( ialu_mem_reg );
 8385 %}
 8386 
 8387 instruct storeSSL(stackSlotL dst, rRegL src)
 8388 %{
 8389   match(Set dst src);
 8390 
 8391   ins_cost(100);
 8392   format %{ "movq    $dst, $src\t# long stk" %}
 8393   ins_encode %{
 8394     __ movq($dst$$Address, $src$$Register);
 8395   %}
 8396   ins_pipe(ialu_mem_reg);
 8397 %}
 8398 
 8399 instruct storeSSP(stackSlotP dst, rRegP src)
 8400 %{
 8401   match(Set dst src);
 8402 
 8403   ins_cost(100);
 8404   format %{ "movq    $dst, $src\t# ptr stk" %}
 8405   ins_encode %{
 8406     __ movq($dst$$Address, $src$$Register);
 8407   %}
 8408   ins_pipe(ialu_mem_reg);
 8409 %}
 8410 
 8411 instruct storeSSF(stackSlotF dst, regF src)
 8412 %{
 8413   match(Set dst src);
 8414 
 8415   ins_cost(95); // XXX
 8416   format %{ "movss   $dst, $src\t# float stk" %}
 8417   ins_encode %{
 8418     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8419   %}
 8420   ins_pipe(pipe_slow); // XXX
 8421 %}
 8422 
 8423 instruct storeSSD(stackSlotD dst, regD src)
 8424 %{
 8425   match(Set dst src);
 8426 
 8427   ins_cost(95); // XXX
 8428   format %{ "movsd   $dst, $src\t# double stk" %}
 8429   ins_encode %{
 8430     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8431   %}
 8432   ins_pipe(pipe_slow); // XXX
 8433 %}
 8434 
 8435 instruct cacheWB(indirect addr)
 8436 %{
 8437   predicate(VM_Version::supports_data_cache_line_flush());
 8438   match(CacheWB addr);
 8439 
 8440   ins_cost(100);
 8441   format %{"cache wb $addr" %}
 8442   ins_encode %{
 8443     assert($addr->index_position() < 0, "should be");
 8444     assert($addr$$disp == 0, "should be");
 8445     __ cache_wb(Address($addr$$base$$Register, 0));
 8446   %}
 8447   ins_pipe(pipe_slow); // XXX
 8448 %}
 8449 
 8450 instruct cacheWBPreSync()
 8451 %{
 8452   predicate(VM_Version::supports_data_cache_line_flush());
 8453   match(CacheWBPreSync);
 8454 
 8455   ins_cost(100);
 8456   format %{"cache wb presync" %}
 8457   ins_encode %{
 8458     __ cache_wbsync(true);
 8459   %}
 8460   ins_pipe(pipe_slow); // XXX
 8461 %}
 8462 
 8463 instruct cacheWBPostSync()
 8464 %{
 8465   predicate(VM_Version::supports_data_cache_line_flush());
 8466   match(CacheWBPostSync);
 8467 
 8468   ins_cost(100);
 8469   format %{"cache wb postsync" %}
 8470   ins_encode %{
 8471     __ cache_wbsync(false);
 8472   %}
 8473   ins_pipe(pipe_slow); // XXX
 8474 %}
 8475 
 8476 //----------BSWAP Instructions-------------------------------------------------
 8477 instruct bytes_reverse_int(rRegI dst) %{
 8478   match(Set dst (ReverseBytesI dst));
 8479 
 8480   format %{ "bswapl  $dst" %}
 8481   ins_encode %{
 8482     __ bswapl($dst$$Register);
 8483   %}
 8484   ins_pipe( ialu_reg );
 8485 %}
 8486 
 8487 instruct bytes_reverse_long(rRegL dst) %{
 8488   match(Set dst (ReverseBytesL dst));
 8489 
 8490   format %{ "bswapq  $dst" %}
 8491   ins_encode %{
 8492     __ bswapq($dst$$Register);
 8493   %}
 8494   ins_pipe( ialu_reg);
 8495 %}
 8496 
 8497 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8498   match(Set dst (ReverseBytesUS dst));
 8499   effect(KILL cr);
 8500 
 8501   format %{ "bswapl  $dst\n\t"
 8502             "shrl    $dst,16\n\t" %}
 8503   ins_encode %{
 8504     __ bswapl($dst$$Register);
 8505     __ shrl($dst$$Register, 16);
 8506   %}
 8507   ins_pipe( ialu_reg );
 8508 %}
 8509 
 8510 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8511   match(Set dst (ReverseBytesS dst));
 8512   effect(KILL cr);
 8513 
 8514   format %{ "bswapl  $dst\n\t"
 8515             "sar     $dst,16\n\t" %}
 8516   ins_encode %{
 8517     __ bswapl($dst$$Register);
 8518     __ sarl($dst$$Register, 16);
 8519   %}
 8520   ins_pipe( ialu_reg );
 8521 %}
 8522 
 8523 //---------- Zeros Count Instructions ------------------------------------------
 8524 
 8525 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8526   predicate(UseCountLeadingZerosInstruction);
 8527   match(Set dst (CountLeadingZerosI src));
 8528   effect(KILL cr);
 8529 
 8530   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8531   ins_encode %{
 8532     __ lzcntl($dst$$Register, $src$$Register);
 8533   %}
 8534   ins_pipe(ialu_reg);
 8535 %}
 8536 
 8537 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8538   predicate(UseCountLeadingZerosInstruction);
 8539   match(Set dst (CountLeadingZerosI (LoadI src)));
 8540   effect(KILL cr);
 8541   ins_cost(175);
 8542   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8543   ins_encode %{
 8544     __ lzcntl($dst$$Register, $src$$Address);
 8545   %}
 8546   ins_pipe(ialu_reg_mem);
 8547 %}
 8548 
 8549 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8550   predicate(!UseCountLeadingZerosInstruction);
 8551   match(Set dst (CountLeadingZerosI src));
 8552   effect(KILL cr);
 8553 
 8554   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8555             "jnz     skip\n\t"
 8556             "movl    $dst, -1\n"
 8557       "skip:\n\t"
 8558             "negl    $dst\n\t"
 8559             "addl    $dst, 31" %}
 8560   ins_encode %{
 8561     Register Rdst = $dst$$Register;
 8562     Register Rsrc = $src$$Register;
 8563     Label skip;
 8564     __ bsrl(Rdst, Rsrc);
 8565     __ jccb(Assembler::notZero, skip);
 8566     __ movl(Rdst, -1);
 8567     __ bind(skip);
 8568     __ negl(Rdst);
 8569     __ addl(Rdst, BitsPerInt - 1);
 8570   %}
 8571   ins_pipe(ialu_reg);
 8572 %}
 8573 
 8574 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8575   predicate(UseCountLeadingZerosInstruction);
 8576   match(Set dst (CountLeadingZerosL src));
 8577   effect(KILL cr);
 8578 
 8579   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8580   ins_encode %{
 8581     __ lzcntq($dst$$Register, $src$$Register);
 8582   %}
 8583   ins_pipe(ialu_reg);
 8584 %}
 8585 
 8586 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8587   predicate(UseCountLeadingZerosInstruction);
 8588   match(Set dst (CountLeadingZerosL (LoadL src)));
 8589   effect(KILL cr);
 8590   ins_cost(175);
 8591   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8592   ins_encode %{
 8593     __ lzcntq($dst$$Register, $src$$Address);
 8594   %}
 8595   ins_pipe(ialu_reg_mem);
 8596 %}
 8597 
 8598 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8599   predicate(!UseCountLeadingZerosInstruction);
 8600   match(Set dst (CountLeadingZerosL src));
 8601   effect(KILL cr);
 8602 
 8603   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8604             "jnz     skip\n\t"
 8605             "movl    $dst, -1\n"
 8606       "skip:\n\t"
 8607             "negl    $dst\n\t"
 8608             "addl    $dst, 63" %}
 8609   ins_encode %{
 8610     Register Rdst = $dst$$Register;
 8611     Register Rsrc = $src$$Register;
 8612     Label skip;
 8613     __ bsrq(Rdst, Rsrc);
 8614     __ jccb(Assembler::notZero, skip);
 8615     __ movl(Rdst, -1);
 8616     __ bind(skip);
 8617     __ negl(Rdst);
 8618     __ addl(Rdst, BitsPerLong - 1);
 8619   %}
 8620   ins_pipe(ialu_reg);
 8621 %}
 8622 
 8623 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8624   predicate(UseCountTrailingZerosInstruction);
 8625   match(Set dst (CountTrailingZerosI src));
 8626   effect(KILL cr);
 8627 
 8628   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8629   ins_encode %{
 8630     __ tzcntl($dst$$Register, $src$$Register);
 8631   %}
 8632   ins_pipe(ialu_reg);
 8633 %}
 8634 
 8635 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8636   predicate(UseCountTrailingZerosInstruction);
 8637   match(Set dst (CountTrailingZerosI (LoadI src)));
 8638   effect(KILL cr);
 8639   ins_cost(175);
 8640   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8641   ins_encode %{
 8642     __ tzcntl($dst$$Register, $src$$Address);
 8643   %}
 8644   ins_pipe(ialu_reg_mem);
 8645 %}
 8646 
 8647 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8648   predicate(!UseCountTrailingZerosInstruction);
 8649   match(Set dst (CountTrailingZerosI src));
 8650   effect(KILL cr);
 8651 
 8652   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8653             "jnz     done\n\t"
 8654             "movl    $dst, 32\n"
 8655       "done:" %}
 8656   ins_encode %{
 8657     Register Rdst = $dst$$Register;
 8658     Label done;
 8659     __ bsfl(Rdst, $src$$Register);
 8660     __ jccb(Assembler::notZero, done);
 8661     __ movl(Rdst, BitsPerInt);
 8662     __ bind(done);
 8663   %}
 8664   ins_pipe(ialu_reg);
 8665 %}
 8666 
 8667 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8668   predicate(UseCountTrailingZerosInstruction);
 8669   match(Set dst (CountTrailingZerosL src));
 8670   effect(KILL cr);
 8671 
 8672   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8673   ins_encode %{
 8674     __ tzcntq($dst$$Register, $src$$Register);
 8675   %}
 8676   ins_pipe(ialu_reg);
 8677 %}
 8678 
 8679 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8680   predicate(UseCountTrailingZerosInstruction);
 8681   match(Set dst (CountTrailingZerosL (LoadL src)));
 8682   effect(KILL cr);
 8683   ins_cost(175);
 8684   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8685   ins_encode %{
 8686     __ tzcntq($dst$$Register, $src$$Address);
 8687   %}
 8688   ins_pipe(ialu_reg_mem);
 8689 %}
 8690 
 8691 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8692   predicate(!UseCountTrailingZerosInstruction);
 8693   match(Set dst (CountTrailingZerosL src));
 8694   effect(KILL cr);
 8695 
 8696   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8697             "jnz     done\n\t"
 8698             "movl    $dst, 64\n"
 8699       "done:" %}
 8700   ins_encode %{
 8701     Register Rdst = $dst$$Register;
 8702     Label done;
 8703     __ bsfq(Rdst, $src$$Register);
 8704     __ jccb(Assembler::notZero, done);
 8705     __ movl(Rdst, BitsPerLong);
 8706     __ bind(done);
 8707   %}
 8708   ins_pipe(ialu_reg);
 8709 %}
 8710 
 8711 //--------------- Reverse Operation Instructions ----------------
 8712 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8713   predicate(!VM_Version::supports_gfni());
 8714   match(Set dst (ReverseI src));
 8715   effect(TEMP dst, TEMP rtmp, KILL cr);
 8716   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8717   ins_encode %{
 8718     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8719   %}
 8720   ins_pipe( ialu_reg );
 8721 %}
 8722 
 8723 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8724   predicate(VM_Version::supports_gfni());
 8725   match(Set dst (ReverseI src));
 8726   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8727   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8728   ins_encode %{
 8729     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8730   %}
 8731   ins_pipe( ialu_reg );
 8732 %}
 8733 
 8734 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8735   predicate(!VM_Version::supports_gfni());
 8736   match(Set dst (ReverseL src));
 8737   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8738   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8739   ins_encode %{
 8740     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8741   %}
 8742   ins_pipe( ialu_reg );
 8743 %}
 8744 
 8745 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8746   predicate(VM_Version::supports_gfni());
 8747   match(Set dst (ReverseL src));
 8748   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8749   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8750   ins_encode %{
 8751     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8752   %}
 8753   ins_pipe( ialu_reg );
 8754 %}
 8755 
 8756 //---------- Population Count Instructions -------------------------------------
 8757 
 8758 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8759   predicate(UsePopCountInstruction);
 8760   match(Set dst (PopCountI src));
 8761   effect(KILL cr);
 8762 
 8763   format %{ "popcnt  $dst, $src" %}
 8764   ins_encode %{
 8765     __ popcntl($dst$$Register, $src$$Register);
 8766   %}
 8767   ins_pipe(ialu_reg);
 8768 %}
 8769 
 8770 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8771   predicate(UsePopCountInstruction);
 8772   match(Set dst (PopCountI (LoadI mem)));
 8773   effect(KILL cr);
 8774 
 8775   format %{ "popcnt  $dst, $mem" %}
 8776   ins_encode %{
 8777     __ popcntl($dst$$Register, $mem$$Address);
 8778   %}
 8779   ins_pipe(ialu_reg);
 8780 %}
 8781 
 8782 // Note: Long.bitCount(long) returns an int.
 8783 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8784   predicate(UsePopCountInstruction);
 8785   match(Set dst (PopCountL src));
 8786   effect(KILL cr);
 8787 
 8788   format %{ "popcnt  $dst, $src" %}
 8789   ins_encode %{
 8790     __ popcntq($dst$$Register, $src$$Register);
 8791   %}
 8792   ins_pipe(ialu_reg);
 8793 %}
 8794 
 8795 // Note: Long.bitCount(long) returns an int.
 8796 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8797   predicate(UsePopCountInstruction);
 8798   match(Set dst (PopCountL (LoadL mem)));
 8799   effect(KILL cr);
 8800 
 8801   format %{ "popcnt  $dst, $mem" %}
 8802   ins_encode %{
 8803     __ popcntq($dst$$Register, $mem$$Address);
 8804   %}
 8805   ins_pipe(ialu_reg);
 8806 %}
 8807 
 8808 
 8809 //----------MemBar Instructions-----------------------------------------------
 8810 // Memory barrier flavors
 8811 
 8812 instruct membar_acquire()
 8813 %{
 8814   match(MemBarAcquire);
 8815   match(LoadFence);
 8816   ins_cost(0);
 8817 
 8818   size(0);
 8819   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8820   ins_encode();
 8821   ins_pipe(empty);
 8822 %}
 8823 
 8824 instruct membar_acquire_lock()
 8825 %{
 8826   match(MemBarAcquireLock);
 8827   ins_cost(0);
 8828 
 8829   size(0);
 8830   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8831   ins_encode();
 8832   ins_pipe(empty);
 8833 %}
 8834 
 8835 instruct membar_release()
 8836 %{
 8837   match(MemBarRelease);
 8838   match(StoreFence);
 8839   ins_cost(0);
 8840 
 8841   size(0);
 8842   format %{ "MEMBAR-release ! (empty encoding)" %}
 8843   ins_encode();
 8844   ins_pipe(empty);
 8845 %}
 8846 
 8847 instruct membar_release_lock()
 8848 %{
 8849   match(MemBarReleaseLock);
 8850   ins_cost(0);
 8851 
 8852   size(0);
 8853   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8854   ins_encode();
 8855   ins_pipe(empty);
 8856 %}
 8857 
 8858 instruct membar_storeload(rFlagsReg cr) %{
 8859   match(MemBarStoreLoad);
 8860   effect(KILL cr);
 8861   ins_cost(400);
 8862 
 8863   format %{
 8864     $$template
 8865     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8866   %}
 8867   ins_encode %{
 8868     __ membar(Assembler::StoreLoad);
 8869   %}
 8870   ins_pipe(pipe_slow);
 8871 %}
 8872 
 8873 instruct membar_volatile(rFlagsReg cr) %{
 8874   match(MemBarVolatile);
 8875   effect(KILL cr);
 8876   ins_cost(400);
 8877 
 8878   format %{
 8879     $$template
 8880     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8881   %}
 8882   ins_encode %{
 8883     __ membar(Assembler::StoreLoad);
 8884   %}
 8885   ins_pipe(pipe_slow);
 8886 %}
 8887 
 8888 instruct unnecessary_membar_volatile()
 8889 %{
 8890   match(MemBarVolatile);
 8891   predicate(Matcher::post_store_load_barrier(n));
 8892   ins_cost(0);
 8893 
 8894   size(0);
 8895   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8896   ins_encode();
 8897   ins_pipe(empty);
 8898 %}
 8899 
 8900 instruct membar_full(rFlagsReg cr) %{
 8901   match(MemBarFull);
 8902   effect(KILL cr);
 8903   ins_cost(400);
 8904 
 8905   format %{
 8906     $$template
 8907     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8908   %}
 8909   ins_encode %{
 8910     __ membar(Assembler::StoreLoad);
 8911   %}
 8912   ins_pipe(pipe_slow);
 8913 %}
 8914 
 8915 instruct membar_storestore() %{
 8916   match(MemBarStoreStore);
 8917   match(StoreStoreFence);
 8918   ins_cost(0);
 8919 
 8920   size(0);
 8921   format %{ "MEMBAR-storestore (empty encoding)" %}
 8922   ins_encode( );
 8923   ins_pipe(empty);
 8924 %}
 8925 
 8926 //----------Move Instructions--------------------------------------------------
 8927 
 8928 instruct castX2P(rRegP dst, rRegL src)
 8929 %{
 8930   match(Set dst (CastX2P src));
 8931 
 8932   format %{ "movq    $dst, $src\t# long->ptr" %}
 8933   ins_encode %{
 8934     if ($dst$$reg != $src$$reg) {
 8935       __ movptr($dst$$Register, $src$$Register);
 8936     }
 8937   %}
 8938   ins_pipe(ialu_reg_reg); // XXX
 8939 %}
 8940 
 8941 instruct castP2X(rRegL dst, rRegP src)
 8942 %{
 8943   match(Set dst (CastP2X src));
 8944 
 8945   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8946   ins_encode %{
 8947     if ($dst$$reg != $src$$reg) {
 8948       __ movptr($dst$$Register, $src$$Register);
 8949     }
 8950   %}
 8951   ins_pipe(ialu_reg_reg); // XXX
 8952 %}
 8953 
 8954 // Convert oop into int for vectors alignment masking
 8955 instruct convP2I(rRegI dst, rRegP src)
 8956 %{
 8957   match(Set dst (ConvL2I (CastP2X src)));
 8958 
 8959   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8960   ins_encode %{
 8961     __ movl($dst$$Register, $src$$Register);
 8962   %}
 8963   ins_pipe(ialu_reg_reg); // XXX
 8964 %}
 8965 
 8966 // Convert compressed oop into int for vectors alignment masking
 8967 // in case of 32bit oops (heap < 4Gb).
 8968 instruct convN2I(rRegI dst, rRegN src)
 8969 %{
 8970   predicate(CompressedOops::shift() == 0);
 8971   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8972 
 8973   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8974   ins_encode %{
 8975     __ movl($dst$$Register, $src$$Register);
 8976   %}
 8977   ins_pipe(ialu_reg_reg); // XXX
 8978 %}
 8979 
 8980 // Convert oop pointer into compressed form
 8981 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8982   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8983   match(Set dst (EncodeP src));
 8984   effect(KILL cr);
 8985   format %{ "encode_heap_oop $dst,$src" %}
 8986   ins_encode %{
 8987     Register s = $src$$Register;
 8988     Register d = $dst$$Register;
 8989     if (s != d) {
 8990       __ movq(d, s);
 8991     }
 8992     __ encode_heap_oop(d);
 8993   %}
 8994   ins_pipe(ialu_reg_long);
 8995 %}
 8996 
 8997 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8998   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8999   match(Set dst (EncodeP src));
 9000   effect(KILL cr);
 9001   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9002   ins_encode %{
 9003     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9004   %}
 9005   ins_pipe(ialu_reg_long);
 9006 %}
 9007 
 9008 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9009   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9010             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9011   match(Set dst (DecodeN src));
 9012   effect(KILL cr);
 9013   format %{ "decode_heap_oop $dst,$src" %}
 9014   ins_encode %{
 9015     Register s = $src$$Register;
 9016     Register d = $dst$$Register;
 9017     if (s != d) {
 9018       __ movq(d, s);
 9019     }
 9020     __ decode_heap_oop(d);
 9021   %}
 9022   ins_pipe(ialu_reg_long);
 9023 %}
 9024 
 9025 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9026   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9027             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9028   match(Set dst (DecodeN src));
 9029   effect(KILL cr);
 9030   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9031   ins_encode %{
 9032     Register s = $src$$Register;
 9033     Register d = $dst$$Register;
 9034     if (s != d) {
 9035       __ decode_heap_oop_not_null(d, s);
 9036     } else {
 9037       __ decode_heap_oop_not_null(d);
 9038     }
 9039   %}
 9040   ins_pipe(ialu_reg_long);
 9041 %}
 9042 
 9043 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9044   match(Set dst (EncodePKlass src));
 9045   effect(TEMP dst, KILL cr);
 9046   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9047   ins_encode %{
 9048     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9049   %}
 9050   ins_pipe(ialu_reg_long);
 9051 %}
 9052 
 9053 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9054   match(Set dst (DecodeNKlass src));
 9055   effect(TEMP dst, KILL cr);
 9056   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9057   ins_encode %{
 9058     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9059   %}
 9060   ins_pipe(ialu_reg_long);
 9061 %}
 9062 
 9063 //----------Conditional Move---------------------------------------------------
 9064 // Jump
 9065 // dummy instruction for generating temp registers
 9066 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9067   match(Jump (LShiftL switch_val shift));
 9068   ins_cost(350);
 9069   predicate(false);
 9070   effect(TEMP dest);
 9071 
 9072   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9073             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9074   ins_encode %{
 9075     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9076     // to do that and the compiler is using that register as one it can allocate.
 9077     // So we build it all by hand.
 9078     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9079     // ArrayAddress dispatch(table, index);
 9080     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9081     __ lea($dest$$Register, $constantaddress);
 9082     __ jmp(dispatch);
 9083   %}
 9084   ins_pipe(pipe_jmp);
 9085 %}
 9086 
 9087 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9088   match(Jump (AddL (LShiftL switch_val shift) offset));
 9089   ins_cost(350);
 9090   effect(TEMP dest);
 9091 
 9092   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9093             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9094   ins_encode %{
 9095     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9096     // to do that and the compiler is using that register as one it can allocate.
 9097     // So we build it all by hand.
 9098     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9099     // ArrayAddress dispatch(table, index);
 9100     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9101     __ lea($dest$$Register, $constantaddress);
 9102     __ jmp(dispatch);
 9103   %}
 9104   ins_pipe(pipe_jmp);
 9105 %}
 9106 
 9107 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9108   match(Jump switch_val);
 9109   ins_cost(350);
 9110   effect(TEMP dest);
 9111 
 9112   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9113             "jmp     [$dest + $switch_val]\n\t" %}
 9114   ins_encode %{
 9115     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9116     // to do that and the compiler is using that register as one it can allocate.
 9117     // So we build it all by hand.
 9118     // Address index(noreg, switch_reg, Address::times_1);
 9119     // ArrayAddress dispatch(table, index);
 9120     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9121     __ lea($dest$$Register, $constantaddress);
 9122     __ jmp(dispatch);
 9123   %}
 9124   ins_pipe(pipe_jmp);
 9125 %}
 9126 
 9127 // Conditional move
 9128 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9129 %{
 9130   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9131   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9132 
 9133   ins_cost(100); // XXX
 9134   format %{ "setbn$cop $dst\t# signed, int" %}
 9135   ins_encode %{
 9136     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9137     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9138   %}
 9139   ins_pipe(ialu_reg);
 9140 %}
 9141 
 9142 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9143 %{
 9144   predicate(!UseAPX);
 9145   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9146 
 9147   ins_cost(200); // XXX
 9148   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9149   ins_encode %{
 9150     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9151   %}
 9152   ins_pipe(pipe_cmov_reg);
 9153 %}
 9154 
 9155 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9156 %{
 9157   predicate(UseAPX);
 9158   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9159 
 9160   ins_cost(200);
 9161   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9162   ins_encode %{
 9163     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9164   %}
 9165   ins_pipe(pipe_cmov_reg);
 9166 %}
 9167 
 9168 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9169 %{
 9170   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9171   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9172 
 9173   ins_cost(100); // XXX
 9174   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9175   ins_encode %{
 9176     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9177     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9178   %}
 9179   ins_pipe(ialu_reg);
 9180 %}
 9181 
 9182 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9183   predicate(!UseAPX);
 9184   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9185 
 9186   ins_cost(200); // XXX
 9187   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9188   ins_encode %{
 9189     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9190   %}
 9191   ins_pipe(pipe_cmov_reg);
 9192 %}
 9193 
 9194 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9195   predicate(UseAPX);
 9196   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9197 
 9198   ins_cost(200);
 9199   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9200   ins_encode %{
 9201     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9202   %}
 9203   ins_pipe(pipe_cmov_reg);
 9204 %}
 9205 
 9206 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9207 %{
 9208   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9209   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9210 
 9211   ins_cost(100); // XXX
 9212   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9213   ins_encode %{
 9214     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9215     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9216   %}
 9217   ins_pipe(ialu_reg);
 9218 %}
 9219 
 9220 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9221 %{
 9222   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9223   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9224 
 9225   ins_cost(100); // XXX
 9226   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9227   ins_encode %{
 9228     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9229     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9230   %}
 9231   ins_pipe(ialu_reg);
 9232 %}
 9233 
 9234 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9235   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9236 
 9237   ins_cost(200);
 9238   expand %{
 9239     cmovI_regU(cop, cr, dst, src);
 9240   %}
 9241 %}
 9242 
 9243 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9244   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9245 
 9246   ins_cost(200);
 9247   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9248   ins_encode %{
 9249     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9250   %}
 9251   ins_pipe(pipe_cmov_reg);
 9252 %}
 9253 
 9254 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9255   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9256   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9257 
 9258   ins_cost(200); // XXX
 9259   format %{ "cmovpl  $dst, $src\n\t"
 9260             "cmovnel $dst, $src" %}
 9261   ins_encode %{
 9262     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9263     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9264   %}
 9265   ins_pipe(pipe_cmov_reg);
 9266 %}
 9267 
 9268 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9269 // inputs of the CMove
 9270 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9271   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9272   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9273   effect(TEMP dst);
 9274 
 9275   ins_cost(200); // XXX
 9276   format %{ "cmovpl  $dst, $src\n\t"
 9277             "cmovnel $dst, $src" %}
 9278   ins_encode %{
 9279     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9280     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9281   %}
 9282   ins_pipe(pipe_cmov_reg);
 9283 %}
 9284 
 9285 // Conditional move
 9286 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9287   predicate(!UseAPX);
 9288   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9289 
 9290   ins_cost(250); // XXX
 9291   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9292   ins_encode %{
 9293     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9294   %}
 9295   ins_pipe(pipe_cmov_mem);
 9296 %}
 9297 
 9298 // Conditional move
 9299 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9300 %{
 9301   predicate(UseAPX);
 9302   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9303 
 9304   ins_cost(250);
 9305   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9306   ins_encode %{
 9307     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9308   %}
 9309   ins_pipe(pipe_cmov_mem);
 9310 %}
 9311 
 9312 // Conditional move
 9313 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9314 %{
 9315   predicate(!UseAPX);
 9316   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9317 
 9318   ins_cost(250); // XXX
 9319   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9320   ins_encode %{
 9321     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9322   %}
 9323   ins_pipe(pipe_cmov_mem);
 9324 %}
 9325 
 9326 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9327   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9328 
 9329   ins_cost(250);
 9330   expand %{
 9331     cmovI_memU(cop, cr, dst, src);
 9332   %}
 9333 %}
 9334 
 9335 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9336 %{
 9337   predicate(UseAPX);
 9338   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9339 
 9340   ins_cost(250);
 9341   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9342   ins_encode %{
 9343     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9344   %}
 9345   ins_pipe(pipe_cmov_mem);
 9346 %}
 9347 
 9348 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9349 %{
 9350   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9351 
 9352   ins_cost(250);
 9353   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9354   ins_encode %{
 9355     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9356   %}
 9357   ins_pipe(pipe_cmov_mem);
 9358 %}
 9359 
 9360 // Conditional move
 9361 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9362 %{
 9363   predicate(!UseAPX);
 9364   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9365 
 9366   ins_cost(200); // XXX
 9367   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9368   ins_encode %{
 9369     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9370   %}
 9371   ins_pipe(pipe_cmov_reg);
 9372 %}
 9373 
 9374 // Conditional move ndd
 9375 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9376 %{
 9377   predicate(UseAPX);
 9378   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9379 
 9380   ins_cost(200);
 9381   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9382   ins_encode %{
 9383     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9384   %}
 9385   ins_pipe(pipe_cmov_reg);
 9386 %}
 9387 
 9388 // Conditional move
 9389 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9390 %{
 9391   predicate(!UseAPX);
 9392   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9393 
 9394   ins_cost(200); // XXX
 9395   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9396   ins_encode %{
 9397     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9398   %}
 9399   ins_pipe(pipe_cmov_reg);
 9400 %}
 9401 
 9402 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9403   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9404 
 9405   ins_cost(200);
 9406   expand %{
 9407     cmovN_regU(cop, cr, dst, src);
 9408   %}
 9409 %}
 9410 
 9411 // Conditional move ndd
 9412 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9413 %{
 9414   predicate(UseAPX);
 9415   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9416 
 9417   ins_cost(200);
 9418   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9419   ins_encode %{
 9420     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9421   %}
 9422   ins_pipe(pipe_cmov_reg);
 9423 %}
 9424 
 9425 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9426   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9427 
 9428   ins_cost(200);
 9429   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9430   ins_encode %{
 9431     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9432   %}
 9433   ins_pipe(pipe_cmov_reg);
 9434 %}
 9435 
 9436 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9437   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9438   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9439 
 9440   ins_cost(200); // XXX
 9441   format %{ "cmovpl  $dst, $src\n\t"
 9442             "cmovnel $dst, $src" %}
 9443   ins_encode %{
 9444     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9445     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9446   %}
 9447   ins_pipe(pipe_cmov_reg);
 9448 %}
 9449 
 9450 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9451 // inputs of the CMove
 9452 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9453   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9454   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9455 
 9456   ins_cost(200); // XXX
 9457   format %{ "cmovpl  $dst, $src\n\t"
 9458             "cmovnel $dst, $src" %}
 9459   ins_encode %{
 9460     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9461     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9462   %}
 9463   ins_pipe(pipe_cmov_reg);
 9464 %}
 9465 
 9466 // Conditional move
 9467 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9468 %{
 9469   predicate(!UseAPX);
 9470   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9471 
 9472   ins_cost(200); // XXX
 9473   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9474   ins_encode %{
 9475     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9476   %}
 9477   ins_pipe(pipe_cmov_reg);  // XXX
 9478 %}
 9479 
 9480 // Conditional move ndd
 9481 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9482 %{
 9483   predicate(UseAPX);
 9484   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9485 
 9486   ins_cost(200);
 9487   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9488   ins_encode %{
 9489     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9490   %}
 9491   ins_pipe(pipe_cmov_reg);
 9492 %}
 9493 
 9494 // Conditional move
 9495 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9496 %{
 9497   predicate(!UseAPX);
 9498   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9499 
 9500   ins_cost(200); // XXX
 9501   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9502   ins_encode %{
 9503     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9504   %}
 9505   ins_pipe(pipe_cmov_reg); // XXX
 9506 %}
 9507 
 9508 // Conditional move ndd
 9509 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9510 %{
 9511   predicate(UseAPX);
 9512   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9513 
 9514   ins_cost(200);
 9515   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9516   ins_encode %{
 9517     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9518   %}
 9519   ins_pipe(pipe_cmov_reg);
 9520 %}
 9521 
 9522 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9523   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9524 
 9525   ins_cost(200);
 9526   expand %{
 9527     cmovP_regU(cop, cr, dst, src);
 9528   %}
 9529 %}
 9530 
 9531 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9532   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9533 
 9534   ins_cost(200);
 9535   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9536   ins_encode %{
 9537     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9538   %}
 9539   ins_pipe(pipe_cmov_reg);
 9540 %}
 9541 
 9542 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9543   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9544   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9545 
 9546   ins_cost(200); // XXX
 9547   format %{ "cmovpq  $dst, $src\n\t"
 9548             "cmovneq $dst, $src" %}
 9549   ins_encode %{
 9550     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9551     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9552   %}
 9553   ins_pipe(pipe_cmov_reg);
 9554 %}
 9555 
 9556 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9557 // inputs of the CMove
 9558 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9559   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9560   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9561 
 9562   ins_cost(200); // XXX
 9563   format %{ "cmovpq  $dst, $src\n\t"
 9564             "cmovneq $dst, $src" %}
 9565   ins_encode %{
 9566     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9567     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9568   %}
 9569   ins_pipe(pipe_cmov_reg);
 9570 %}
 9571 
 9572 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9573 %{
 9574   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9575   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9576 
 9577   ins_cost(100); // XXX
 9578   format %{ "setbn$cop $dst\t# signed, long" %}
 9579   ins_encode %{
 9580     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9581     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9582   %}
 9583   ins_pipe(ialu_reg);
 9584 %}
 9585 
 9586 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9587 %{
 9588   predicate(!UseAPX);
 9589   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9590 
 9591   ins_cost(200); // XXX
 9592   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9593   ins_encode %{
 9594     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9595   %}
 9596   ins_pipe(pipe_cmov_reg);  // XXX
 9597 %}
 9598 
 9599 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9600 %{
 9601   predicate(UseAPX);
 9602   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9603 
 9604   ins_cost(200);
 9605   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9606   ins_encode %{
 9607     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9608   %}
 9609   ins_pipe(pipe_cmov_reg);
 9610 %}
 9611 
 9612 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9613 %{
 9614   predicate(!UseAPX);
 9615   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9616 
 9617   ins_cost(200); // XXX
 9618   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9619   ins_encode %{
 9620     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9621   %}
 9622   ins_pipe(pipe_cmov_mem);  // XXX
 9623 %}
 9624 
 9625 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9626 %{
 9627   predicate(UseAPX);
 9628   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9629 
 9630   ins_cost(200);
 9631   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9632   ins_encode %{
 9633     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9634   %}
 9635   ins_pipe(pipe_cmov_mem);
 9636 %}
 9637 
 9638 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9639 %{
 9640   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9641   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9642 
 9643   ins_cost(100); // XXX
 9644   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9645   ins_encode %{
 9646     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9647     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9648   %}
 9649   ins_pipe(ialu_reg);
 9650 %}
 9651 
 9652 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9653 %{
 9654   predicate(!UseAPX);
 9655   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9656 
 9657   ins_cost(200); // XXX
 9658   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9659   ins_encode %{
 9660     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9661   %}
 9662   ins_pipe(pipe_cmov_reg); // XXX
 9663 %}
 9664 
 9665 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9666 %{
 9667   predicate(UseAPX);
 9668   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9669 
 9670   ins_cost(200);
 9671   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9672   ins_encode %{
 9673     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9674   %}
 9675   ins_pipe(pipe_cmov_reg);
 9676 %}
 9677 
 9678 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9679 %{
 9680   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9681   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9682 
 9683   ins_cost(100); // XXX
 9684   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9685   ins_encode %{
 9686     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9687     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9688   %}
 9689   ins_pipe(ialu_reg);
 9690 %}
 9691 
 9692 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9693 %{
 9694   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9695   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9696 
 9697   ins_cost(100); // XXX
 9698   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9699   ins_encode %{
 9700     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9701     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9702   %}
 9703   ins_pipe(ialu_reg);
 9704 %}
 9705 
 9706 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9707   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9708 
 9709   ins_cost(200);
 9710   expand %{
 9711     cmovL_regU(cop, cr, dst, src);
 9712   %}
 9713 %}
 9714 
 9715 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9716 %{
 9717   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9718 
 9719   ins_cost(200);
 9720   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9721   ins_encode %{
 9722     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9723   %}
 9724   ins_pipe(pipe_cmov_reg);
 9725 %}
 9726 
 9727 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9728   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9729   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9730 
 9731   ins_cost(200); // XXX
 9732   format %{ "cmovpq  $dst, $src\n\t"
 9733             "cmovneq $dst, $src" %}
 9734   ins_encode %{
 9735     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9736     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9737   %}
 9738   ins_pipe(pipe_cmov_reg);
 9739 %}
 9740 
 9741 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9742 // inputs of the CMove
 9743 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9744   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9745   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9746 
 9747   ins_cost(200); // XXX
 9748   format %{ "cmovpq  $dst, $src\n\t"
 9749             "cmovneq $dst, $src" %}
 9750   ins_encode %{
 9751     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9752     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9753   %}
 9754   ins_pipe(pipe_cmov_reg);
 9755 %}
 9756 
 9757 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9758 %{
 9759   predicate(!UseAPX);
 9760   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9761 
 9762   ins_cost(200); // XXX
 9763   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9764   ins_encode %{
 9765     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9766   %}
 9767   ins_pipe(pipe_cmov_mem); // XXX
 9768 %}
 9769 
 9770 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9771   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9772 
 9773   ins_cost(200);
 9774   expand %{
 9775     cmovL_memU(cop, cr, dst, src);
 9776   %}
 9777 %}
 9778 
 9779 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9780 %{
 9781   predicate(UseAPX);
 9782   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9783 
 9784   ins_cost(200);
 9785   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9786   ins_encode %{
 9787     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9788   %}
 9789   ins_pipe(pipe_cmov_mem);
 9790 %}
 9791 
 9792 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9793 %{
 9794   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9795 
 9796   ins_cost(200);
 9797   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9798   ins_encode %{
 9799     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9800   %}
 9801   ins_pipe(pipe_cmov_mem);
 9802 %}
 9803 
 9804 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9805 %{
 9806   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9807 
 9808   ins_cost(200); // XXX
 9809   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9810             "movss     $dst, $src\n"
 9811     "skip:" %}
 9812   ins_encode %{
 9813     Label Lskip;
 9814     // Invert sense of branch from sense of CMOV
 9815     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9816     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9817     __ bind(Lskip);
 9818   %}
 9819   ins_pipe(pipe_slow);
 9820 %}
 9821 
 9822 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9823 %{
 9824   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9825 
 9826   ins_cost(200); // XXX
 9827   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9828             "movss     $dst, $src\n"
 9829     "skip:" %}
 9830   ins_encode %{
 9831     Label Lskip;
 9832     // Invert sense of branch from sense of CMOV
 9833     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9834     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9835     __ bind(Lskip);
 9836   %}
 9837   ins_pipe(pipe_slow);
 9838 %}
 9839 
 9840 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9841   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9842 
 9843   ins_cost(200);
 9844   expand %{
 9845     cmovF_regU(cop, cr, dst, src);
 9846   %}
 9847 %}
 9848 
 9849 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9850 %{
 9851   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9852 
 9853   ins_cost(200); // XXX
 9854   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9855             "movss     $dst, $src\n"
 9856     "skip:" %}
 9857   ins_encode %{
 9858     Label Lskip;
 9859     // Invert sense of branch from sense of CMOV
 9860     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9861     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9862     __ bind(Lskip);
 9863   %}
 9864   ins_pipe(pipe_slow);
 9865 %}
 9866 
 9867 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9868 %{
 9869   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9870 
 9871   ins_cost(200); // XXX
 9872   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9873             "movsd     $dst, $src\n"
 9874     "skip:" %}
 9875   ins_encode %{
 9876     Label Lskip;
 9877     // Invert sense of branch from sense of CMOV
 9878     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9879     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9880     __ bind(Lskip);
 9881   %}
 9882   ins_pipe(pipe_slow);
 9883 %}
 9884 
 9885 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9886 %{
 9887   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9888 
 9889   ins_cost(200); // XXX
 9890   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9891             "movsd     $dst, $src\n"
 9892     "skip:" %}
 9893   ins_encode %{
 9894     Label Lskip;
 9895     // Invert sense of branch from sense of CMOV
 9896     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9897     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9898     __ bind(Lskip);
 9899   %}
 9900   ins_pipe(pipe_slow);
 9901 %}
 9902 
 9903 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9904   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9905 
 9906   ins_cost(200);
 9907   expand %{
 9908     cmovD_regU(cop, cr, dst, src);
 9909   %}
 9910 %}
 9911 
 9912 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9913 %{
 9914   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9915 
 9916   ins_cost(200); // XXX
 9917   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9918             "movsd     $dst, $src\n"
 9919     "skip:" %}
 9920   ins_encode %{
 9921     Label Lskip;
 9922     // Invert sense of branch from sense of CMOV
 9923     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9924     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9925     __ bind(Lskip);
 9926   %}
 9927   ins_pipe(pipe_slow);
 9928 %}
 9929 
 9930 //----------Arithmetic Instructions--------------------------------------------
 9931 //----------Addition Instructions----------------------------------------------
 9932 
 9933 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9934 %{
 9935   predicate(!UseAPX);
 9936   match(Set dst (AddI dst src));
 9937   effect(KILL cr);
 9938   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9939   format %{ "addl    $dst, $src\t# int" %}
 9940   ins_encode %{
 9941     __ addl($dst$$Register, $src$$Register);
 9942   %}
 9943   ins_pipe(ialu_reg_reg);
 9944 %}
 9945 
 9946 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9947 %{
 9948   predicate(UseAPX);
 9949   match(Set dst (AddI src1 src2));
 9950   effect(KILL cr);
 9951   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9952 
 9953   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9954   ins_encode %{
 9955     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9956   %}
 9957   ins_pipe(ialu_reg_reg);
 9958 %}
 9959 
 9960 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9961 %{
 9962   predicate(!UseAPX);
 9963   match(Set dst (AddI dst src));
 9964   effect(KILL cr);
 9965   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9966 
 9967   format %{ "addl    $dst, $src\t# int" %}
 9968   ins_encode %{
 9969     __ addl($dst$$Register, $src$$constant);
 9970   %}
 9971   ins_pipe( ialu_reg );
 9972 %}
 9973 
 9974 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9975 %{
 9976   predicate(UseAPX);
 9977   match(Set dst (AddI src1 src2));
 9978   effect(KILL cr);
 9979   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9980 
 9981   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9982   ins_encode %{
 9983     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9984   %}
 9985   ins_pipe( ialu_reg );
 9986 %}
 9987 
 9988 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9989 %{
 9990   predicate(UseAPX);
 9991   match(Set dst (AddI (LoadI src1) src2));
 9992   effect(KILL cr);
 9993   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9994 
 9995   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9996   ins_encode %{
 9997     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9998   %}
 9999   ins_pipe( ialu_reg );
10000 %}
10001 
10002 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10003 %{
10004   predicate(!UseAPX);
10005   match(Set dst (AddI dst (LoadI src)));
10006   effect(KILL cr);
10007   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10008 
10009   ins_cost(150); // XXX
10010   format %{ "addl    $dst, $src\t# int" %}
10011   ins_encode %{
10012     __ addl($dst$$Register, $src$$Address);
10013   %}
10014   ins_pipe(ialu_reg_mem);
10015 %}
10016 
10017 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10018 %{
10019   predicate(UseAPX);
10020   match(Set dst (AddI src1 (LoadI src2)));
10021   effect(KILL cr);
10022   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10023 
10024   ins_cost(150);
10025   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10026   ins_encode %{
10027     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10028   %}
10029   ins_pipe(ialu_reg_mem);
10030 %}
10031 
10032 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10033 %{
10034   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10035   effect(KILL cr);
10036   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10037 
10038   ins_cost(150); // XXX
10039   format %{ "addl    $dst, $src\t# int" %}
10040   ins_encode %{
10041     __ addl($dst$$Address, $src$$Register);
10042   %}
10043   ins_pipe(ialu_mem_reg);
10044 %}
10045 
10046 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10047 %{
10048   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10049   effect(KILL cr);
10050   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10051 
10052 
10053   ins_cost(125); // XXX
10054   format %{ "addl    $dst, $src\t# int" %}
10055   ins_encode %{
10056     __ addl($dst$$Address, $src$$constant);
10057   %}
10058   ins_pipe(ialu_mem_imm);
10059 %}
10060 
10061 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10062 %{
10063   predicate(!UseAPX && UseIncDec);
10064   match(Set dst (AddI dst src));
10065   effect(KILL cr);
10066 
10067   format %{ "incl    $dst\t# int" %}
10068   ins_encode %{
10069     __ incrementl($dst$$Register);
10070   %}
10071   ins_pipe(ialu_reg);
10072 %}
10073 
10074 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10075 %{
10076   predicate(UseAPX && UseIncDec);
10077   match(Set dst (AddI src val));
10078   effect(KILL cr);
10079   flag(PD::Flag_ndd_demotable_opr1);
10080 
10081   format %{ "eincl    $dst, $src\t# int ndd" %}
10082   ins_encode %{
10083     __ eincl($dst$$Register, $src$$Register, false);
10084   %}
10085   ins_pipe(ialu_reg);
10086 %}
10087 
10088 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10089 %{
10090   predicate(UseAPX && UseIncDec);
10091   match(Set dst (AddI (LoadI src) val));
10092   effect(KILL cr);
10093 
10094   format %{ "eincl    $dst, $src\t# int ndd" %}
10095   ins_encode %{
10096     __ eincl($dst$$Register, $src$$Address, false);
10097   %}
10098   ins_pipe(ialu_reg);
10099 %}
10100 
10101 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10102 %{
10103   predicate(UseIncDec);
10104   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10105   effect(KILL cr);
10106 
10107   ins_cost(125); // XXX
10108   format %{ "incl    $dst\t# int" %}
10109   ins_encode %{
10110     __ incrementl($dst$$Address);
10111   %}
10112   ins_pipe(ialu_mem_imm);
10113 %}
10114 
10115 // XXX why does that use AddI
10116 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10117 %{
10118   predicate(!UseAPX && UseIncDec);
10119   match(Set dst (AddI dst src));
10120   effect(KILL cr);
10121 
10122   format %{ "decl    $dst\t# int" %}
10123   ins_encode %{
10124     __ decrementl($dst$$Register);
10125   %}
10126   ins_pipe(ialu_reg);
10127 %}
10128 
10129 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10130 %{
10131   predicate(UseAPX && UseIncDec);
10132   match(Set dst (AddI src val));
10133   effect(KILL cr);
10134   flag(PD::Flag_ndd_demotable_opr1);
10135 
10136   format %{ "edecl    $dst, $src\t# int ndd" %}
10137   ins_encode %{
10138     __ edecl($dst$$Register, $src$$Register, false);
10139   %}
10140   ins_pipe(ialu_reg);
10141 %}
10142 
10143 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10144 %{
10145   predicate(UseAPX && UseIncDec);
10146   match(Set dst (AddI (LoadI src) val));
10147   effect(KILL cr);
10148 
10149   format %{ "edecl    $dst, $src\t# int ndd" %}
10150   ins_encode %{
10151     __ edecl($dst$$Register, $src$$Address, false);
10152   %}
10153   ins_pipe(ialu_reg);
10154 %}
10155 
10156 // XXX why does that use AddI
10157 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10158 %{
10159   predicate(UseIncDec);
10160   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10161   effect(KILL cr);
10162 
10163   ins_cost(125); // XXX
10164   format %{ "decl    $dst\t# int" %}
10165   ins_encode %{
10166     __ decrementl($dst$$Address);
10167   %}
10168   ins_pipe(ialu_mem_imm);
10169 %}
10170 
10171 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10172 %{
10173   predicate(VM_Version::supports_fast_2op_lea());
10174   match(Set dst (AddI (LShiftI index scale) disp));
10175 
10176   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10177   ins_encode %{
10178     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10179     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10180   %}
10181   ins_pipe(ialu_reg_reg);
10182 %}
10183 
10184 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10185 %{
10186   predicate(VM_Version::supports_fast_3op_lea());
10187   match(Set dst (AddI (AddI base index) disp));
10188 
10189   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10190   ins_encode %{
10191     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10192   %}
10193   ins_pipe(ialu_reg_reg);
10194 %}
10195 
10196 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10197 %{
10198   predicate(VM_Version::supports_fast_2op_lea());
10199   match(Set dst (AddI base (LShiftI index scale)));
10200 
10201   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10202   ins_encode %{
10203     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10204     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10205   %}
10206   ins_pipe(ialu_reg_reg);
10207 %}
10208 
10209 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10210 %{
10211   predicate(VM_Version::supports_fast_3op_lea());
10212   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10213 
10214   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10215   ins_encode %{
10216     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10217     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10218   %}
10219   ins_pipe(ialu_reg_reg);
10220 %}
10221 
10222 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10223 %{
10224   predicate(!UseAPX);
10225   match(Set dst (AddL dst src));
10226   effect(KILL cr);
10227   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10228 
10229   format %{ "addq    $dst, $src\t# long" %}
10230   ins_encode %{
10231     __ addq($dst$$Register, $src$$Register);
10232   %}
10233   ins_pipe(ialu_reg_reg);
10234 %}
10235 
10236 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10237 %{
10238   predicate(UseAPX);
10239   match(Set dst (AddL src1 src2));
10240   effect(KILL cr);
10241   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10242 
10243   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10244   ins_encode %{
10245     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10246   %}
10247   ins_pipe(ialu_reg_reg);
10248 %}
10249 
10250 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10251 %{
10252   predicate(!UseAPX);
10253   match(Set dst (AddL dst src));
10254   effect(KILL cr);
10255   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256 
10257   format %{ "addq    $dst, $src\t# long" %}
10258   ins_encode %{
10259     __ addq($dst$$Register, $src$$constant);
10260   %}
10261   ins_pipe( ialu_reg );
10262 %}
10263 
10264 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10265 %{
10266   predicate(UseAPX);
10267   match(Set dst (AddL src1 src2));
10268   effect(KILL cr);
10269   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10270 
10271   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10272   ins_encode %{
10273     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10274   %}
10275   ins_pipe( ialu_reg );
10276 %}
10277 
10278 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10279 %{
10280   predicate(UseAPX);
10281   match(Set dst (AddL (LoadL src1) src2));
10282   effect(KILL cr);
10283   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10284 
10285   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10286   ins_encode %{
10287     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10288   %}
10289   ins_pipe( ialu_reg );
10290 %}
10291 
10292 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10293 %{
10294   predicate(!UseAPX);
10295   match(Set dst (AddL dst (LoadL src)));
10296   effect(KILL cr);
10297   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10298 
10299   ins_cost(150); // XXX
10300   format %{ "addq    $dst, $src\t# long" %}
10301   ins_encode %{
10302     __ addq($dst$$Register, $src$$Address);
10303   %}
10304   ins_pipe(ialu_reg_mem);
10305 %}
10306 
10307 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10308 %{
10309   predicate(UseAPX);
10310   match(Set dst (AddL src1 (LoadL src2)));
10311   effect(KILL cr);
10312   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10313 
10314   ins_cost(150);
10315   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10316   ins_encode %{
10317     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10318   %}
10319   ins_pipe(ialu_reg_mem);
10320 %}
10321 
10322 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10323 %{
10324   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10325   effect(KILL cr);
10326   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10327 
10328   ins_cost(150); // XXX
10329   format %{ "addq    $dst, $src\t# long" %}
10330   ins_encode %{
10331     __ addq($dst$$Address, $src$$Register);
10332   %}
10333   ins_pipe(ialu_mem_reg);
10334 %}
10335 
10336 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10337 %{
10338   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10339   effect(KILL cr);
10340   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10341 
10342   ins_cost(125); // XXX
10343   format %{ "addq    $dst, $src\t# long" %}
10344   ins_encode %{
10345     __ addq($dst$$Address, $src$$constant);
10346   %}
10347   ins_pipe(ialu_mem_imm);
10348 %}
10349 
10350 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10351 %{
10352   predicate(!UseAPX && UseIncDec);
10353   match(Set dst (AddL dst src));
10354   effect(KILL cr);
10355 
10356   format %{ "incq    $dst\t# long" %}
10357   ins_encode %{
10358     __ incrementq($dst$$Register);
10359   %}
10360   ins_pipe(ialu_reg);
10361 %}
10362 
10363 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10364 %{
10365   predicate(UseAPX && UseIncDec);
10366   match(Set dst (AddL src val));
10367   effect(KILL cr);
10368   flag(PD::Flag_ndd_demotable_opr1);
10369 
10370   format %{ "eincq    $dst, $src\t# long ndd" %}
10371   ins_encode %{
10372     __ eincq($dst$$Register, $src$$Register, false);
10373   %}
10374   ins_pipe(ialu_reg);
10375 %}
10376 
10377 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10378 %{
10379   predicate(UseAPX && UseIncDec);
10380   match(Set dst (AddL (LoadL src) val));
10381   effect(KILL cr);
10382 
10383   format %{ "eincq    $dst, $src\t# long ndd" %}
10384   ins_encode %{
10385     __ eincq($dst$$Register, $src$$Address, false);
10386   %}
10387   ins_pipe(ialu_reg);
10388 %}
10389 
10390 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10391 %{
10392   predicate(UseIncDec);
10393   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10394   effect(KILL cr);
10395 
10396   ins_cost(125); // XXX
10397   format %{ "incq    $dst\t# long" %}
10398   ins_encode %{
10399     __ incrementq($dst$$Address);
10400   %}
10401   ins_pipe(ialu_mem_imm);
10402 %}
10403 
10404 // XXX why does that use AddL
10405 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10406 %{
10407   predicate(!UseAPX && UseIncDec);
10408   match(Set dst (AddL dst src));
10409   effect(KILL cr);
10410 
10411   format %{ "decq    $dst\t# long" %}
10412   ins_encode %{
10413     __ decrementq($dst$$Register);
10414   %}
10415   ins_pipe(ialu_reg);
10416 %}
10417 
10418 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10419 %{
10420   predicate(UseAPX && UseIncDec);
10421   match(Set dst (AddL src val));
10422   effect(KILL cr);
10423   flag(PD::Flag_ndd_demotable_opr1);
10424 
10425   format %{ "edecq    $dst, $src\t# long ndd" %}
10426   ins_encode %{
10427     __ edecq($dst$$Register, $src$$Register, false);
10428   %}
10429   ins_pipe(ialu_reg);
10430 %}
10431 
10432 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10433 %{
10434   predicate(UseAPX && UseIncDec);
10435   match(Set dst (AddL (LoadL src) val));
10436   effect(KILL cr);
10437 
10438   format %{ "edecq    $dst, $src\t# long ndd" %}
10439   ins_encode %{
10440     __ edecq($dst$$Register, $src$$Address, false);
10441   %}
10442   ins_pipe(ialu_reg);
10443 %}
10444 
10445 // XXX why does that use AddL
10446 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10447 %{
10448   predicate(UseIncDec);
10449   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10450   effect(KILL cr);
10451 
10452   ins_cost(125); // XXX
10453   format %{ "decq    $dst\t# long" %}
10454   ins_encode %{
10455     __ decrementq($dst$$Address);
10456   %}
10457   ins_pipe(ialu_mem_imm);
10458 %}
10459 
10460 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10461 %{
10462   predicate(VM_Version::supports_fast_2op_lea());
10463   match(Set dst (AddL (LShiftL index scale) disp));
10464 
10465   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10466   ins_encode %{
10467     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10468     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10469   %}
10470   ins_pipe(ialu_reg_reg);
10471 %}
10472 
10473 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10474 %{
10475   predicate(VM_Version::supports_fast_3op_lea());
10476   match(Set dst (AddL (AddL base index) disp));
10477 
10478   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10479   ins_encode %{
10480     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10481   %}
10482   ins_pipe(ialu_reg_reg);
10483 %}
10484 
10485 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10486 %{
10487   predicate(VM_Version::supports_fast_2op_lea());
10488   match(Set dst (AddL base (LShiftL index scale)));
10489 
10490   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10491   ins_encode %{
10492     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10493     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10494   %}
10495   ins_pipe(ialu_reg_reg);
10496 %}
10497 
10498 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10499 %{
10500   predicate(VM_Version::supports_fast_3op_lea());
10501   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10502 
10503   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10504   ins_encode %{
10505     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10506     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10507   %}
10508   ins_pipe(ialu_reg_reg);
10509 %}
10510 
10511 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10512 %{
10513   match(Set dst (AddP dst src));
10514   effect(KILL cr);
10515   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10516 
10517   format %{ "addq    $dst, $src\t# ptr" %}
10518   ins_encode %{
10519     __ addq($dst$$Register, $src$$Register);
10520   %}
10521   ins_pipe(ialu_reg_reg);
10522 %}
10523 
10524 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10525 %{
10526   match(Set dst (AddP dst src));
10527   effect(KILL cr);
10528   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10529 
10530   format %{ "addq    $dst, $src\t# ptr" %}
10531   ins_encode %{
10532     __ addq($dst$$Register, $src$$constant);
10533   %}
10534   ins_pipe( ialu_reg );
10535 %}
10536 
10537 // XXX addP mem ops ????
10538 
10539 instruct checkCastPP(rRegP dst)
10540 %{
10541   match(Set dst (CheckCastPP dst));
10542 
10543   size(0);
10544   format %{ "# checkcastPP of $dst" %}
10545   ins_encode(/* empty encoding */);
10546   ins_pipe(empty);
10547 %}
10548 
10549 instruct castPP(rRegP dst)
10550 %{
10551   match(Set dst (CastPP dst));
10552 
10553   size(0);
10554   format %{ "# castPP of $dst" %}
10555   ins_encode(/* empty encoding */);
10556   ins_pipe(empty);
10557 %}
10558 
10559 instruct castII(rRegI dst)
10560 %{
10561   predicate(VerifyConstraintCasts == 0);
10562   match(Set dst (CastII dst));
10563 
10564   size(0);
10565   format %{ "# castII of $dst" %}
10566   ins_encode(/* empty encoding */);
10567   ins_cost(0);
10568   ins_pipe(empty);
10569 %}
10570 
10571 instruct castII_checked(rRegI dst, rFlagsReg cr)
10572 %{
10573   predicate(VerifyConstraintCasts > 0);
10574   match(Set dst (CastII dst));
10575 
10576   effect(KILL cr);
10577   format %{ "# cast_checked_II $dst" %}
10578   ins_encode %{
10579     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10580   %}
10581   ins_pipe(pipe_slow);
10582 %}
10583 
10584 instruct castLL(rRegL dst)
10585 %{
10586   predicate(VerifyConstraintCasts == 0);
10587   match(Set dst (CastLL dst));
10588 
10589   size(0);
10590   format %{ "# castLL of $dst" %}
10591   ins_encode(/* empty encoding */);
10592   ins_cost(0);
10593   ins_pipe(empty);
10594 %}
10595 
10596 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10597 %{
10598   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10599   match(Set dst (CastLL dst));
10600 
10601   effect(KILL cr);
10602   format %{ "# cast_checked_LL $dst" %}
10603   ins_encode %{
10604     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10605   %}
10606   ins_pipe(pipe_slow);
10607 %}
10608 
10609 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10610 %{
10611   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10612   match(Set dst (CastLL dst));
10613 
10614   effect(KILL cr, TEMP tmp);
10615   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10616   ins_encode %{
10617     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10618   %}
10619   ins_pipe(pipe_slow);
10620 %}
10621 
10622 instruct castFF(regF dst)
10623 %{
10624   match(Set dst (CastFF dst));
10625 
10626   size(0);
10627   format %{ "# castFF of $dst" %}
10628   ins_encode(/* empty encoding */);
10629   ins_cost(0);
10630   ins_pipe(empty);
10631 %}
10632 
10633 instruct castHH(regF dst)
10634 %{
10635   match(Set dst (CastHH dst));
10636 
10637   size(0);
10638   format %{ "# castHH of $dst" %}
10639   ins_encode(/* empty encoding */);
10640   ins_cost(0);
10641   ins_pipe(empty);
10642 %}
10643 
10644 instruct castDD(regD dst)
10645 %{
10646   match(Set dst (CastDD dst));
10647 
10648   size(0);
10649   format %{ "# castDD of $dst" %}
10650   ins_encode(/* empty encoding */);
10651   ins_cost(0);
10652   ins_pipe(empty);
10653 %}
10654 
10655 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10656 instruct compareAndSwapP(rRegI res,
10657                          memory mem_ptr,
10658                          rax_RegP oldval, rRegP newval,
10659                          rFlagsReg cr)
10660 %{
10661   predicate(n->as_LoadStore()->barrier_data() == 0);
10662   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10663   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10664   effect(KILL cr, KILL oldval);
10665 
10666   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10667             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10668             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10669   ins_encode %{
10670     __ lock();
10671     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10672     __ setcc(Assembler::equal, $res$$Register);
10673   %}
10674   ins_pipe( pipe_cmpxchg );
10675 %}
10676 
10677 instruct compareAndSwapL(rRegI res,
10678                          memory mem_ptr,
10679                          rax_RegL oldval, rRegL newval,
10680                          rFlagsReg cr)
10681 %{
10682   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10683   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10684   effect(KILL cr, KILL oldval);
10685 
10686   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10687             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10688             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10689   ins_encode %{
10690     __ lock();
10691     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10692     __ setcc(Assembler::equal, $res$$Register);
10693   %}
10694   ins_pipe( pipe_cmpxchg );
10695 %}
10696 
10697 instruct compareAndSwapI(rRegI res,
10698                          memory mem_ptr,
10699                          rax_RegI oldval, rRegI newval,
10700                          rFlagsReg cr)
10701 %{
10702   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10703   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10704   effect(KILL cr, KILL oldval);
10705 
10706   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10707             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10708             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10709   ins_encode %{
10710     __ lock();
10711     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10712     __ setcc(Assembler::equal, $res$$Register);
10713   %}
10714   ins_pipe( pipe_cmpxchg );
10715 %}
10716 
10717 instruct compareAndSwapB(rRegI res,
10718                          memory mem_ptr,
10719                          rax_RegI oldval, rRegI newval,
10720                          rFlagsReg cr)
10721 %{
10722   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10723   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10724   effect(KILL cr, KILL oldval);
10725 
10726   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10727             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10728             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10729   ins_encode %{
10730     __ lock();
10731     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10732     __ setcc(Assembler::equal, $res$$Register);
10733   %}
10734   ins_pipe( pipe_cmpxchg );
10735 %}
10736 
10737 instruct compareAndSwapS(rRegI res,
10738                          memory mem_ptr,
10739                          rax_RegI oldval, rRegI newval,
10740                          rFlagsReg cr)
10741 %{
10742   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10743   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10744   effect(KILL cr, KILL oldval);
10745 
10746   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10747             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10748             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10749   ins_encode %{
10750     __ lock();
10751     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10752     __ setcc(Assembler::equal, $res$$Register);
10753   %}
10754   ins_pipe( pipe_cmpxchg );
10755 %}
10756 
10757 instruct compareAndSwapN(rRegI res,
10758                           memory mem_ptr,
10759                           rax_RegN oldval, rRegN newval,
10760                           rFlagsReg cr) %{
10761   predicate(n->as_LoadStore()->barrier_data() == 0);
10762   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10763   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10764   effect(KILL cr, KILL oldval);
10765 
10766   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10767             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10768             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10769   ins_encode %{
10770     __ lock();
10771     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10772     __ setcc(Assembler::equal, $res$$Register);
10773   %}
10774   ins_pipe( pipe_cmpxchg );
10775 %}
10776 
10777 instruct compareAndExchangeB(
10778                          memory mem_ptr,
10779                          rax_RegI oldval, rRegI newval,
10780                          rFlagsReg cr)
10781 %{
10782   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10783   effect(KILL cr);
10784 
10785   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10786             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10787   ins_encode %{
10788     __ lock();
10789     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10790   %}
10791   ins_pipe( pipe_cmpxchg );
10792 %}
10793 
10794 instruct compareAndExchangeS(
10795                          memory mem_ptr,
10796                          rax_RegI oldval, rRegI newval,
10797                          rFlagsReg cr)
10798 %{
10799   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10800   effect(KILL cr);
10801 
10802   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10803             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10804   ins_encode %{
10805     __ lock();
10806     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10807   %}
10808   ins_pipe( pipe_cmpxchg );
10809 %}
10810 
10811 instruct compareAndExchangeI(
10812                          memory mem_ptr,
10813                          rax_RegI oldval, rRegI newval,
10814                          rFlagsReg cr)
10815 %{
10816   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10817   effect(KILL cr);
10818 
10819   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10820             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10821   ins_encode %{
10822     __ lock();
10823     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10824   %}
10825   ins_pipe( pipe_cmpxchg );
10826 %}
10827 
10828 instruct compareAndExchangeL(
10829                          memory mem_ptr,
10830                          rax_RegL oldval, rRegL newval,
10831                          rFlagsReg cr)
10832 %{
10833   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10834   effect(KILL cr);
10835 
10836   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10837             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10838   ins_encode %{
10839     __ lock();
10840     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10841   %}
10842   ins_pipe( pipe_cmpxchg );
10843 %}
10844 
10845 instruct compareAndExchangeN(
10846                           memory mem_ptr,
10847                           rax_RegN oldval, rRegN newval,
10848                           rFlagsReg cr) %{
10849   predicate(n->as_LoadStore()->barrier_data() == 0);
10850   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10851   effect(KILL cr);
10852 
10853   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10854             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10855   ins_encode %{
10856     __ lock();
10857     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10858   %}
10859   ins_pipe( pipe_cmpxchg );
10860 %}
10861 
10862 instruct compareAndExchangeP(
10863                          memory mem_ptr,
10864                          rax_RegP oldval, rRegP newval,
10865                          rFlagsReg cr)
10866 %{
10867   predicate(n->as_LoadStore()->barrier_data() == 0);
10868   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10869   effect(KILL cr);
10870 
10871   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10872             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10873   ins_encode %{
10874     __ lock();
10875     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10876   %}
10877   ins_pipe( pipe_cmpxchg );
10878 %}
10879 
10880 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10881   predicate(n->as_LoadStore()->result_not_used());
10882   match(Set dummy (GetAndAddB mem add));
10883   effect(KILL cr);
10884   format %{ "addb_lock   $mem, $add" %}
10885   ins_encode %{
10886     __ lock();
10887     __ addb($mem$$Address, $add$$Register);
10888   %}
10889   ins_pipe(pipe_cmpxchg);
10890 %}
10891 
10892 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10893   predicate(n->as_LoadStore()->result_not_used());
10894   match(Set dummy (GetAndAddB mem add));
10895   effect(KILL cr);
10896   format %{ "addb_lock   $mem, $add" %}
10897   ins_encode %{
10898     __ lock();
10899     __ addb($mem$$Address, $add$$constant);
10900   %}
10901   ins_pipe(pipe_cmpxchg);
10902 %}
10903 
10904 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10905   predicate(!n->as_LoadStore()->result_not_used());
10906   match(Set newval (GetAndAddB mem newval));
10907   effect(KILL cr);
10908   format %{ "xaddb_lock  $mem, $newval" %}
10909   ins_encode %{
10910     __ lock();
10911     __ xaddb($mem$$Address, $newval$$Register);
10912   %}
10913   ins_pipe(pipe_cmpxchg);
10914 %}
10915 
10916 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10917   predicate(n->as_LoadStore()->result_not_used());
10918   match(Set dummy (GetAndAddS mem add));
10919   effect(KILL cr);
10920   format %{ "addw_lock   $mem, $add" %}
10921   ins_encode %{
10922     __ lock();
10923     __ addw($mem$$Address, $add$$Register);
10924   %}
10925   ins_pipe(pipe_cmpxchg);
10926 %}
10927 
10928 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10929   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10930   match(Set dummy (GetAndAddS mem add));
10931   effect(KILL cr);
10932   format %{ "addw_lock   $mem, $add" %}
10933   ins_encode %{
10934     __ lock();
10935     __ addw($mem$$Address, $add$$constant);
10936   %}
10937   ins_pipe(pipe_cmpxchg);
10938 %}
10939 
10940 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10941   predicate(!n->as_LoadStore()->result_not_used());
10942   match(Set newval (GetAndAddS mem newval));
10943   effect(KILL cr);
10944   format %{ "xaddw_lock  $mem, $newval" %}
10945   ins_encode %{
10946     __ lock();
10947     __ xaddw($mem$$Address, $newval$$Register);
10948   %}
10949   ins_pipe(pipe_cmpxchg);
10950 %}
10951 
10952 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10953   predicate(n->as_LoadStore()->result_not_used());
10954   match(Set dummy (GetAndAddI mem add));
10955   effect(KILL cr);
10956   format %{ "addl_lock   $mem, $add" %}
10957   ins_encode %{
10958     __ lock();
10959     __ addl($mem$$Address, $add$$Register);
10960   %}
10961   ins_pipe(pipe_cmpxchg);
10962 %}
10963 
10964 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10965   predicate(n->as_LoadStore()->result_not_used());
10966   match(Set dummy (GetAndAddI mem add));
10967   effect(KILL cr);
10968   format %{ "addl_lock   $mem, $add" %}
10969   ins_encode %{
10970     __ lock();
10971     __ addl($mem$$Address, $add$$constant);
10972   %}
10973   ins_pipe(pipe_cmpxchg);
10974 %}
10975 
10976 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10977   predicate(!n->as_LoadStore()->result_not_used());
10978   match(Set newval (GetAndAddI mem newval));
10979   effect(KILL cr);
10980   format %{ "xaddl_lock  $mem, $newval" %}
10981   ins_encode %{
10982     __ lock();
10983     __ xaddl($mem$$Address, $newval$$Register);
10984   %}
10985   ins_pipe(pipe_cmpxchg);
10986 %}
10987 
10988 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10989   predicate(n->as_LoadStore()->result_not_used());
10990   match(Set dummy (GetAndAddL mem add));
10991   effect(KILL cr);
10992   format %{ "addq_lock   $mem, $add" %}
10993   ins_encode %{
10994     __ lock();
10995     __ addq($mem$$Address, $add$$Register);
10996   %}
10997   ins_pipe(pipe_cmpxchg);
10998 %}
10999 
11000 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11001   predicate(n->as_LoadStore()->result_not_used());
11002   match(Set dummy (GetAndAddL mem add));
11003   effect(KILL cr);
11004   format %{ "addq_lock   $mem, $add" %}
11005   ins_encode %{
11006     __ lock();
11007     __ addq($mem$$Address, $add$$constant);
11008   %}
11009   ins_pipe(pipe_cmpxchg);
11010 %}
11011 
11012 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11013   predicate(!n->as_LoadStore()->result_not_used());
11014   match(Set newval (GetAndAddL mem newval));
11015   effect(KILL cr);
11016   format %{ "xaddq_lock  $mem, $newval" %}
11017   ins_encode %{
11018     __ lock();
11019     __ xaddq($mem$$Address, $newval$$Register);
11020   %}
11021   ins_pipe(pipe_cmpxchg);
11022 %}
11023 
11024 instruct xchgB( memory mem, rRegI newval) %{
11025   match(Set newval (GetAndSetB mem newval));
11026   format %{ "XCHGB  $newval,[$mem]" %}
11027   ins_encode %{
11028     __ xchgb($newval$$Register, $mem$$Address);
11029   %}
11030   ins_pipe( pipe_cmpxchg );
11031 %}
11032 
11033 instruct xchgS( memory mem, rRegI newval) %{
11034   match(Set newval (GetAndSetS mem newval));
11035   format %{ "XCHGW  $newval,[$mem]" %}
11036   ins_encode %{
11037     __ xchgw($newval$$Register, $mem$$Address);
11038   %}
11039   ins_pipe( pipe_cmpxchg );
11040 %}
11041 
11042 instruct xchgI( memory mem, rRegI newval) %{
11043   match(Set newval (GetAndSetI mem newval));
11044   format %{ "XCHGL  $newval,[$mem]" %}
11045   ins_encode %{
11046     __ xchgl($newval$$Register, $mem$$Address);
11047   %}
11048   ins_pipe( pipe_cmpxchg );
11049 %}
11050 
11051 instruct xchgL( memory mem, rRegL newval) %{
11052   match(Set newval (GetAndSetL mem newval));
11053   format %{ "XCHGL  $newval,[$mem]" %}
11054   ins_encode %{
11055     __ xchgq($newval$$Register, $mem$$Address);
11056   %}
11057   ins_pipe( pipe_cmpxchg );
11058 %}
11059 
11060 instruct xchgP( memory mem, rRegP newval) %{
11061   match(Set newval (GetAndSetP mem newval));
11062   predicate(n->as_LoadStore()->barrier_data() == 0);
11063   format %{ "XCHGQ  $newval,[$mem]" %}
11064   ins_encode %{
11065     __ xchgq($newval$$Register, $mem$$Address);
11066   %}
11067   ins_pipe( pipe_cmpxchg );
11068 %}
11069 
11070 instruct xchgN( memory mem, rRegN newval) %{
11071   predicate(n->as_LoadStore()->barrier_data() == 0);
11072   match(Set newval (GetAndSetN mem newval));
11073   format %{ "XCHGL  $newval,$mem]" %}
11074   ins_encode %{
11075     __ xchgl($newval$$Register, $mem$$Address);
11076   %}
11077   ins_pipe( pipe_cmpxchg );
11078 %}
11079 
11080 //----------Abs Instructions-------------------------------------------
11081 
11082 // Integer Absolute Instructions
11083 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11084 %{
11085   match(Set dst (AbsI src));
11086   effect(TEMP dst, KILL cr);
11087   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11088             "subl    $dst, $src\n\t"
11089             "cmovll  $dst, $src" %}
11090   ins_encode %{
11091     __ xorl($dst$$Register, $dst$$Register);
11092     __ subl($dst$$Register, $src$$Register);
11093     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11094   %}
11095 
11096   ins_pipe(ialu_reg_reg);
11097 %}
11098 
11099 // Long Absolute Instructions
11100 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11101 %{
11102   match(Set dst (AbsL src));
11103   effect(TEMP dst, KILL cr);
11104   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11105             "subq    $dst, $src\n\t"
11106             "cmovlq  $dst, $src" %}
11107   ins_encode %{
11108     __ xorl($dst$$Register, $dst$$Register);
11109     __ subq($dst$$Register, $src$$Register);
11110     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11111   %}
11112 
11113   ins_pipe(ialu_reg_reg);
11114 %}
11115 
11116 //----------Subtraction Instructions-------------------------------------------
11117 
11118 // Integer Subtraction Instructions
11119 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11120 %{
11121   predicate(!UseAPX);
11122   match(Set dst (SubI dst src));
11123   effect(KILL cr);
11124   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11125 
11126   format %{ "subl    $dst, $src\t# int" %}
11127   ins_encode %{
11128     __ subl($dst$$Register, $src$$Register);
11129   %}
11130   ins_pipe(ialu_reg_reg);
11131 %}
11132 
11133 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11134 %{
11135   predicate(UseAPX);
11136   match(Set dst (SubI src1 src2));
11137   effect(KILL cr);
11138   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11139 
11140   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11141   ins_encode %{
11142     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11143   %}
11144   ins_pipe(ialu_reg_reg);
11145 %}
11146 
11147 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11148 %{
11149   predicate(UseAPX);
11150   match(Set dst (SubI src1 src2));
11151   effect(KILL cr);
11152   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11153 
11154   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11155   ins_encode %{
11156     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11157   %}
11158   ins_pipe(ialu_reg_reg);
11159 %}
11160 
11161 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11162 %{
11163   predicate(UseAPX);
11164   match(Set dst (SubI (LoadI src1) src2));
11165   effect(KILL cr);
11166   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11167 
11168   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11169   ins_encode %{
11170     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11171   %}
11172   ins_pipe(ialu_reg_reg);
11173 %}
11174 
11175 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11176 %{
11177   predicate(!UseAPX);
11178   match(Set dst (SubI dst (LoadI src)));
11179   effect(KILL cr);
11180   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11181 
11182   ins_cost(150);
11183   format %{ "subl    $dst, $src\t# int" %}
11184   ins_encode %{
11185     __ subl($dst$$Register, $src$$Address);
11186   %}
11187   ins_pipe(ialu_reg_mem);
11188 %}
11189 
11190 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11191 %{
11192   predicate(UseAPX);
11193   match(Set dst (SubI src1 (LoadI src2)));
11194   effect(KILL cr);
11195   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11196 
11197   ins_cost(150);
11198   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11199   ins_encode %{
11200     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11201   %}
11202   ins_pipe(ialu_reg_mem);
11203 %}
11204 
11205 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11206 %{
11207   predicate(UseAPX);
11208   match(Set dst (SubI (LoadI src1) src2));
11209   effect(KILL cr);
11210   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11211 
11212   ins_cost(150);
11213   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11214   ins_encode %{
11215     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11216   %}
11217   ins_pipe(ialu_reg_mem);
11218 %}
11219 
11220 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11221 %{
11222   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11223   effect(KILL cr);
11224   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11225 
11226   ins_cost(150);
11227   format %{ "subl    $dst, $src\t# int" %}
11228   ins_encode %{
11229     __ subl($dst$$Address, $src$$Register);
11230   %}
11231   ins_pipe(ialu_mem_reg);
11232 %}
11233 
11234 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11235 %{
11236   predicate(!UseAPX);
11237   match(Set dst (SubL dst src));
11238   effect(KILL cr);
11239   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11240 
11241   format %{ "subq    $dst, $src\t# long" %}
11242   ins_encode %{
11243     __ subq($dst$$Register, $src$$Register);
11244   %}
11245   ins_pipe(ialu_reg_reg);
11246 %}
11247 
11248 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11249 %{
11250   predicate(UseAPX);
11251   match(Set dst (SubL src1 src2));
11252   effect(KILL cr);
11253   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11254 
11255   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11256   ins_encode %{
11257     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11258   %}
11259   ins_pipe(ialu_reg_reg);
11260 %}
11261 
11262 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11263 %{
11264   predicate(UseAPX);
11265   match(Set dst (SubL src1 src2));
11266   effect(KILL cr);
11267   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11268 
11269   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11270   ins_encode %{
11271     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11272   %}
11273   ins_pipe(ialu_reg_reg);
11274 %}
11275 
11276 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11277 %{
11278   predicate(UseAPX);
11279   match(Set dst (SubL (LoadL src1) src2));
11280   effect(KILL cr);
11281   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11282 
11283   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11284   ins_encode %{
11285     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11286   %}
11287   ins_pipe(ialu_reg_reg);
11288 %}
11289 
11290 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11291 %{
11292   predicate(!UseAPX);
11293   match(Set dst (SubL dst (LoadL src)));
11294   effect(KILL cr);
11295   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11296 
11297   ins_cost(150);
11298   format %{ "subq    $dst, $src\t# long" %}
11299   ins_encode %{
11300     __ subq($dst$$Register, $src$$Address);
11301   %}
11302   ins_pipe(ialu_reg_mem);
11303 %}
11304 
11305 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11306 %{
11307   predicate(UseAPX);
11308   match(Set dst (SubL src1 (LoadL src2)));
11309   effect(KILL cr);
11310   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11311 
11312   ins_cost(150);
11313   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11314   ins_encode %{
11315     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11316   %}
11317   ins_pipe(ialu_reg_mem);
11318 %}
11319 
11320 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11321 %{
11322   predicate(UseAPX);
11323   match(Set dst (SubL (LoadL src1) src2));
11324   effect(KILL cr);
11325   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11326 
11327   ins_cost(150);
11328   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11329   ins_encode %{
11330     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11331   %}
11332   ins_pipe(ialu_reg_mem);
11333 %}
11334 
11335 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11336 %{
11337   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11338   effect(KILL cr);
11339   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11340 
11341   ins_cost(150);
11342   format %{ "subq    $dst, $src\t# long" %}
11343   ins_encode %{
11344     __ subq($dst$$Address, $src$$Register);
11345   %}
11346   ins_pipe(ialu_mem_reg);
11347 %}
11348 
11349 // Subtract from a pointer
11350 // XXX hmpf???
11351 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11352 %{
11353   match(Set dst (AddP dst (SubI zero src)));
11354   effect(KILL cr);
11355 
11356   format %{ "subq    $dst, $src\t# ptr - int" %}
11357   ins_encode %{
11358     __ subq($dst$$Register, $src$$Register);
11359   %}
11360   ins_pipe(ialu_reg_reg);
11361 %}
11362 
11363 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11364 %{
11365   predicate(!UseAPX);
11366   match(Set dst (SubI zero dst));
11367   effect(KILL cr);
11368   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11369 
11370   format %{ "negl    $dst\t# int" %}
11371   ins_encode %{
11372     __ negl($dst$$Register);
11373   %}
11374   ins_pipe(ialu_reg);
11375 %}
11376 
11377 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11378 %{
11379   predicate(UseAPX);
11380   match(Set dst (SubI zero src));
11381   effect(KILL cr);
11382   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11383 
11384   format %{ "enegl    $dst, $src\t# int ndd" %}
11385   ins_encode %{
11386     __ enegl($dst$$Register, $src$$Register, false);
11387   %}
11388   ins_pipe(ialu_reg);
11389 %}
11390 
11391 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11392 %{
11393   predicate(!UseAPX);
11394   match(Set dst (NegI dst));
11395   effect(KILL cr);
11396   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11397 
11398   format %{ "negl    $dst\t# int" %}
11399   ins_encode %{
11400     __ negl($dst$$Register);
11401   %}
11402   ins_pipe(ialu_reg);
11403 %}
11404 
11405 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11406 %{
11407   predicate(UseAPX);
11408   match(Set dst (NegI src));
11409   effect(KILL cr);
11410   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11411 
11412   format %{ "enegl    $dst, $src\t# int ndd" %}
11413   ins_encode %{
11414     __ enegl($dst$$Register, $src$$Register, false);
11415   %}
11416   ins_pipe(ialu_reg);
11417 %}
11418 
11419 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11420 %{
11421   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11422   effect(KILL cr);
11423   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11424 
11425   format %{ "negl    $dst\t# int" %}
11426   ins_encode %{
11427     __ negl($dst$$Address);
11428   %}
11429   ins_pipe(ialu_reg);
11430 %}
11431 
11432 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11433 %{
11434   predicate(!UseAPX);
11435   match(Set dst (SubL zero dst));
11436   effect(KILL cr);
11437   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11438 
11439   format %{ "negq    $dst\t# long" %}
11440   ins_encode %{
11441     __ negq($dst$$Register);
11442   %}
11443   ins_pipe(ialu_reg);
11444 %}
11445 
11446 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11447 %{
11448   predicate(UseAPX);
11449   match(Set dst (SubL zero src));
11450   effect(KILL cr);
11451   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11452 
11453   format %{ "enegq    $dst, $src\t# long ndd" %}
11454   ins_encode %{
11455     __ enegq($dst$$Register, $src$$Register, false);
11456   %}
11457   ins_pipe(ialu_reg);
11458 %}
11459 
11460 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11461 %{
11462   predicate(!UseAPX);
11463   match(Set dst (NegL dst));
11464   effect(KILL cr);
11465   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11466 
11467   format %{ "negq    $dst\t# int" %}
11468   ins_encode %{
11469     __ negq($dst$$Register);
11470   %}
11471   ins_pipe(ialu_reg);
11472 %}
11473 
11474 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11475 %{
11476   predicate(UseAPX);
11477   match(Set dst (NegL src));
11478   effect(KILL cr);
11479   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11480 
11481   format %{ "enegq    $dst, $src\t# long ndd" %}
11482   ins_encode %{
11483     __ enegq($dst$$Register, $src$$Register, false);
11484   %}
11485   ins_pipe(ialu_reg);
11486 %}
11487 
11488 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11489 %{
11490   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11491   effect(KILL cr);
11492   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11493 
11494   format %{ "negq    $dst\t# long" %}
11495   ins_encode %{
11496     __ negq($dst$$Address);
11497   %}
11498   ins_pipe(ialu_reg);
11499 %}
11500 
11501 //----------Multiplication/Division Instructions-------------------------------
11502 // Integer Multiplication Instructions
11503 // Multiply Register
11504 
11505 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11506 %{
11507   predicate(!UseAPX);
11508   match(Set dst (MulI dst src));
11509   effect(KILL cr);
11510 
11511   ins_cost(300);
11512   format %{ "imull   $dst, $src\t# int" %}
11513   ins_encode %{
11514     __ imull($dst$$Register, $src$$Register);
11515   %}
11516   ins_pipe(ialu_reg_reg_alu0);
11517 %}
11518 
11519 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11520 %{
11521   predicate(UseAPX);
11522   match(Set dst (MulI src1 src2));
11523   effect(KILL cr);
11524   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11525 
11526   ins_cost(300);
11527   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11528   ins_encode %{
11529     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11530   %}
11531   ins_pipe(ialu_reg_reg_alu0);
11532 %}
11533 
11534 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11535 %{
11536   match(Set dst (MulI src imm));
11537   effect(KILL cr);
11538 
11539   ins_cost(300);
11540   format %{ "imull   $dst, $src, $imm\t# int" %}
11541   ins_encode %{
11542     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11543   %}
11544   ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546 
11547 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11548 %{
11549   predicate(!UseAPX);
11550   match(Set dst (MulI dst (LoadI src)));
11551   effect(KILL cr);
11552 
11553   ins_cost(350);
11554   format %{ "imull   $dst, $src\t# int" %}
11555   ins_encode %{
11556     __ imull($dst$$Register, $src$$Address);
11557   %}
11558   ins_pipe(ialu_reg_mem_alu0);
11559 %}
11560 
11561 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11562 %{
11563   predicate(UseAPX);
11564   match(Set dst (MulI src1 (LoadI src2)));
11565   effect(KILL cr);
11566   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11567 
11568   ins_cost(350);
11569   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11570   ins_encode %{
11571     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11572   %}
11573   ins_pipe(ialu_reg_mem_alu0);
11574 %}
11575 
11576 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11577 %{
11578   match(Set dst (MulI (LoadI src) imm));
11579   effect(KILL cr);
11580 
11581   ins_cost(300);
11582   format %{ "imull   $dst, $src, $imm\t# int" %}
11583   ins_encode %{
11584     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11585   %}
11586   ins_pipe(ialu_reg_mem_alu0);
11587 %}
11588 
11589 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11590 %{
11591   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11592   effect(KILL cr, KILL src2);
11593 
11594   expand %{ mulI_rReg(dst, src1, cr);
11595            mulI_rReg(src2, src3, cr);
11596            addI_rReg(dst, src2, cr); %}
11597 %}
11598 
11599 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11600 %{
11601   predicate(!UseAPX);
11602   match(Set dst (MulL dst src));
11603   effect(KILL cr);
11604 
11605   ins_cost(300);
11606   format %{ "imulq   $dst, $src\t# long" %}
11607   ins_encode %{
11608     __ imulq($dst$$Register, $src$$Register);
11609   %}
11610   ins_pipe(ialu_reg_reg_alu0);
11611 %}
11612 
11613 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11614 %{
11615   predicate(UseAPX);
11616   match(Set dst (MulL src1 src2));
11617   effect(KILL cr);
11618   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11619 
11620   ins_cost(300);
11621   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11622   ins_encode %{
11623     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11624   %}
11625   ins_pipe(ialu_reg_reg_alu0);
11626 %}
11627 
11628 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11629 %{
11630   match(Set dst (MulL src imm));
11631   effect(KILL cr);
11632 
11633   ins_cost(300);
11634   format %{ "imulq   $dst, $src, $imm\t# long" %}
11635   ins_encode %{
11636     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11637   %}
11638   ins_pipe(ialu_reg_reg_alu0);
11639 %}
11640 
11641 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11642 %{
11643   predicate(!UseAPX);
11644   match(Set dst (MulL dst (LoadL src)));
11645   effect(KILL cr);
11646 
11647   ins_cost(350);
11648   format %{ "imulq   $dst, $src\t# long" %}
11649   ins_encode %{
11650     __ imulq($dst$$Register, $src$$Address);
11651   %}
11652   ins_pipe(ialu_reg_mem_alu0);
11653 %}
11654 
11655 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11656 %{
11657   predicate(UseAPX);
11658   match(Set dst (MulL src1 (LoadL src2)));
11659   effect(KILL cr);
11660   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11661 
11662   ins_cost(350);
11663   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11664   ins_encode %{
11665     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11666   %}
11667   ins_pipe(ialu_reg_mem_alu0);
11668 %}
11669 
11670 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11671 %{
11672   match(Set dst (MulL (LoadL src) imm));
11673   effect(KILL cr);
11674 
11675   ins_cost(300);
11676   format %{ "imulq   $dst, $src, $imm\t# long" %}
11677   ins_encode %{
11678     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11679   %}
11680   ins_pipe(ialu_reg_mem_alu0);
11681 %}
11682 
11683 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11684 %{
11685   match(Set dst (MulHiL src rax));
11686   effect(USE_KILL rax, KILL cr);
11687 
11688   ins_cost(300);
11689   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11690   ins_encode %{
11691     __ imulq($src$$Register);
11692   %}
11693   ins_pipe(ialu_reg_reg_alu0);
11694 %}
11695 
11696 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11697 %{
11698   match(Set dst (UMulHiL src rax));
11699   effect(USE_KILL rax, KILL cr);
11700 
11701   ins_cost(300);
11702   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11703   ins_encode %{
11704     __ mulq($src$$Register);
11705   %}
11706   ins_pipe(ialu_reg_reg_alu0);
11707 %}
11708 
11709 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11710                    rFlagsReg cr)
11711 %{
11712   match(Set rax (DivI rax div));
11713   effect(KILL rdx, KILL cr);
11714 
11715   ins_cost(30*100+10*100); // XXX
11716   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11717             "jne,s   normal\n\t"
11718             "xorl    rdx, rdx\n\t"
11719             "cmpl    $div, -1\n\t"
11720             "je,s    done\n"
11721     "normal: cdql\n\t"
11722             "idivl   $div\n"
11723     "done:"        %}
11724   ins_encode(cdql_enc(div));
11725   ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727 
11728 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11729                    rFlagsReg cr)
11730 %{
11731   match(Set rax (DivL rax div));
11732   effect(KILL rdx, KILL cr);
11733 
11734   ins_cost(30*100+10*100); // XXX
11735   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11736             "cmpq    rax, rdx\n\t"
11737             "jne,s   normal\n\t"
11738             "xorl    rdx, rdx\n\t"
11739             "cmpq    $div, -1\n\t"
11740             "je,s    done\n"
11741     "normal: cdqq\n\t"
11742             "idivq   $div\n"
11743     "done:"        %}
11744   ins_encode(cdqq_enc(div));
11745   ins_pipe(ialu_reg_reg_alu0);
11746 %}
11747 
11748 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11749 %{
11750   match(Set rax (UDivI rax div));
11751   effect(KILL rdx, KILL cr);
11752 
11753   ins_cost(300);
11754   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11755   ins_encode %{
11756     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11757   %}
11758   ins_pipe(ialu_reg_reg_alu0);
11759 %}
11760 
11761 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11762 %{
11763   match(Set rax (UDivL rax div));
11764   effect(KILL rdx, KILL cr);
11765 
11766   ins_cost(300);
11767   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11768   ins_encode %{
11769      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11770   %}
11771   ins_pipe(ialu_reg_reg_alu0);
11772 %}
11773 
11774 // Integer DIVMOD with Register, both quotient and mod results
11775 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11776                              rFlagsReg cr)
11777 %{
11778   match(DivModI rax div);
11779   effect(KILL cr);
11780 
11781   ins_cost(30*100+10*100); // XXX
11782   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11783             "jne,s   normal\n\t"
11784             "xorl    rdx, rdx\n\t"
11785             "cmpl    $div, -1\n\t"
11786             "je,s    done\n"
11787     "normal: cdql\n\t"
11788             "idivl   $div\n"
11789     "done:"        %}
11790   ins_encode(cdql_enc(div));
11791   ins_pipe(pipe_slow);
11792 %}
11793 
11794 // Long DIVMOD with Register, both quotient and mod results
11795 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11796                              rFlagsReg cr)
11797 %{
11798   match(DivModL rax div);
11799   effect(KILL cr);
11800 
11801   ins_cost(30*100+10*100); // XXX
11802   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11803             "cmpq    rax, rdx\n\t"
11804             "jne,s   normal\n\t"
11805             "xorl    rdx, rdx\n\t"
11806             "cmpq    $div, -1\n\t"
11807             "je,s    done\n"
11808     "normal: cdqq\n\t"
11809             "idivq   $div\n"
11810     "done:"        %}
11811   ins_encode(cdqq_enc(div));
11812   ins_pipe(pipe_slow);
11813 %}
11814 
11815 // Unsigned integer DIVMOD with Register, both quotient and mod results
11816 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11817                               no_rax_rdx_RegI div, rFlagsReg cr)
11818 %{
11819   match(UDivModI rax div);
11820   effect(TEMP tmp, KILL cr);
11821 
11822   ins_cost(300);
11823   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11824             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11825           %}
11826   ins_encode %{
11827     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11828   %}
11829   ins_pipe(pipe_slow);
11830 %}
11831 
11832 // Unsigned long DIVMOD with Register, both quotient and mod results
11833 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11834                               no_rax_rdx_RegL div, rFlagsReg cr)
11835 %{
11836   match(UDivModL rax div);
11837   effect(TEMP tmp, KILL cr);
11838 
11839   ins_cost(300);
11840   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11841             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11842           %}
11843   ins_encode %{
11844     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11845   %}
11846   ins_pipe(pipe_slow);
11847 %}
11848 
11849 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11850                    rFlagsReg cr)
11851 %{
11852   match(Set rdx (ModI rax div));
11853   effect(KILL rax, KILL cr);
11854 
11855   ins_cost(300); // XXX
11856   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11857             "jne,s   normal\n\t"
11858             "xorl    rdx, rdx\n\t"
11859             "cmpl    $div, -1\n\t"
11860             "je,s    done\n"
11861     "normal: cdql\n\t"
11862             "idivl   $div\n"
11863     "done:"        %}
11864   ins_encode(cdql_enc(div));
11865   ins_pipe(ialu_reg_reg_alu0);
11866 %}
11867 
11868 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11869                    rFlagsReg cr)
11870 %{
11871   match(Set rdx (ModL rax div));
11872   effect(KILL rax, KILL cr);
11873 
11874   ins_cost(300); // XXX
11875   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11876             "cmpq    rax, rdx\n\t"
11877             "jne,s   normal\n\t"
11878             "xorl    rdx, rdx\n\t"
11879             "cmpq    $div, -1\n\t"
11880             "je,s    done\n"
11881     "normal: cdqq\n\t"
11882             "idivq   $div\n"
11883     "done:"        %}
11884   ins_encode(cdqq_enc(div));
11885   ins_pipe(ialu_reg_reg_alu0);
11886 %}
11887 
11888 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11889 %{
11890   match(Set rdx (UModI rax div));
11891   effect(KILL rax, KILL cr);
11892 
11893   ins_cost(300);
11894   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11895   ins_encode %{
11896     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11897   %}
11898   ins_pipe(ialu_reg_reg_alu0);
11899 %}
11900 
11901 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11902 %{
11903   match(Set rdx (UModL rax div));
11904   effect(KILL rax, KILL cr);
11905 
11906   ins_cost(300);
11907   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11908   ins_encode %{
11909     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11910   %}
11911   ins_pipe(ialu_reg_reg_alu0);
11912 %}
11913 
11914 // Integer Shift Instructions
11915 // Shift Left by one, two, three
11916 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11917 %{
11918   predicate(!UseAPX);
11919   match(Set dst (LShiftI dst shift));
11920   effect(KILL cr);
11921 
11922   format %{ "sall    $dst, $shift" %}
11923   ins_encode %{
11924     __ sall($dst$$Register, $shift$$constant);
11925   %}
11926   ins_pipe(ialu_reg);
11927 %}
11928 
11929 // Shift Left by one, two, three
11930 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11931 %{
11932   predicate(UseAPX);
11933   match(Set dst (LShiftI src shift));
11934   effect(KILL cr);
11935   flag(PD::Flag_ndd_demotable_opr1);
11936 
11937   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11938   ins_encode %{
11939     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11940   %}
11941   ins_pipe(ialu_reg);
11942 %}
11943 
11944 // Shift Left by 8-bit immediate
11945 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11946 %{
11947   predicate(!UseAPX);
11948   match(Set dst (LShiftI dst shift));
11949   effect(KILL cr);
11950 
11951   format %{ "sall    $dst, $shift" %}
11952   ins_encode %{
11953     __ sall($dst$$Register, $shift$$constant);
11954   %}
11955   ins_pipe(ialu_reg);
11956 %}
11957 
11958 // Shift Left by 8-bit immediate
11959 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11960 %{
11961   predicate(UseAPX);
11962   match(Set dst (LShiftI src shift));
11963   effect(KILL cr);
11964   flag(PD::Flag_ndd_demotable_opr1);
11965 
11966   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11967   ins_encode %{
11968     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11969   %}
11970   ins_pipe(ialu_reg);
11971 %}
11972 
11973 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11974 %{
11975   predicate(UseAPX);
11976   match(Set dst (LShiftI (LoadI src) shift));
11977   effect(KILL cr);
11978 
11979   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11980   ins_encode %{
11981     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11982   %}
11983   ins_pipe(ialu_reg);
11984 %}
11985 
11986 // Shift Left by 8-bit immediate
11987 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11988 %{
11989   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11990   effect(KILL cr);
11991 
11992   format %{ "sall    $dst, $shift" %}
11993   ins_encode %{
11994     __ sall($dst$$Address, $shift$$constant);
11995   %}
11996   ins_pipe(ialu_mem_imm);
11997 %}
11998 
11999 // Shift Left by variable
12000 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12001 %{
12002   predicate(!VM_Version::supports_bmi2());
12003   match(Set dst (LShiftI dst shift));
12004   effect(KILL cr);
12005 
12006   format %{ "sall    $dst, $shift" %}
12007   ins_encode %{
12008     __ sall($dst$$Register);
12009   %}
12010   ins_pipe(ialu_reg_reg);
12011 %}
12012 
12013 // Shift Left by variable
12014 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12015 %{
12016   predicate(!VM_Version::supports_bmi2());
12017   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12018   effect(KILL cr);
12019 
12020   format %{ "sall    $dst, $shift" %}
12021   ins_encode %{
12022     __ sall($dst$$Address);
12023   %}
12024   ins_pipe(ialu_mem_reg);
12025 %}
12026 
12027 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12028 %{
12029   predicate(VM_Version::supports_bmi2());
12030   match(Set dst (LShiftI src shift));
12031 
12032   format %{ "shlxl   $dst, $src, $shift" %}
12033   ins_encode %{
12034     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12035   %}
12036   ins_pipe(ialu_reg_reg);
12037 %}
12038 
12039 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12040 %{
12041   predicate(VM_Version::supports_bmi2());
12042   match(Set dst (LShiftI (LoadI src) shift));
12043   ins_cost(175);
12044   format %{ "shlxl   $dst, $src, $shift" %}
12045   ins_encode %{
12046     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12047   %}
12048   ins_pipe(ialu_reg_mem);
12049 %}
12050 
12051 // Arithmetic Shift Right by 8-bit immediate
12052 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12053 %{
12054   predicate(!UseAPX);
12055   match(Set dst (RShiftI dst shift));
12056   effect(KILL cr);
12057 
12058   format %{ "sarl    $dst, $shift" %}
12059   ins_encode %{
12060     __ sarl($dst$$Register, $shift$$constant);
12061   %}
12062   ins_pipe(ialu_mem_imm);
12063 %}
12064 
12065 // Arithmetic Shift Right by 8-bit immediate
12066 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12067 %{
12068   predicate(UseAPX);
12069   match(Set dst (RShiftI src shift));
12070   effect(KILL cr);
12071   flag(PD::Flag_ndd_demotable_opr1);
12072 
12073   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12074   ins_encode %{
12075     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12076   %}
12077   ins_pipe(ialu_mem_imm);
12078 %}
12079 
12080 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12081 %{
12082   predicate(UseAPX);
12083   match(Set dst (RShiftI (LoadI src) shift));
12084   effect(KILL cr);
12085 
12086   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12087   ins_encode %{
12088     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12089   %}
12090   ins_pipe(ialu_mem_imm);
12091 %}
12092 
12093 // Arithmetic Shift Right by 8-bit immediate
12094 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12095 %{
12096   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12097   effect(KILL cr);
12098 
12099   format %{ "sarl    $dst, $shift" %}
12100   ins_encode %{
12101     __ sarl($dst$$Address, $shift$$constant);
12102   %}
12103   ins_pipe(ialu_mem_imm);
12104 %}
12105 
12106 // Arithmetic Shift Right by variable
12107 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12108 %{
12109   predicate(!VM_Version::supports_bmi2());
12110   match(Set dst (RShiftI dst shift));
12111   effect(KILL cr);
12112 
12113   format %{ "sarl    $dst, $shift" %}
12114   ins_encode %{
12115     __ sarl($dst$$Register);
12116   %}
12117   ins_pipe(ialu_reg_reg);
12118 %}
12119 
12120 // Arithmetic Shift Right by variable
12121 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12122 %{
12123   predicate(!VM_Version::supports_bmi2());
12124   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12125   effect(KILL cr);
12126 
12127   format %{ "sarl    $dst, $shift" %}
12128   ins_encode %{
12129     __ sarl($dst$$Address);
12130   %}
12131   ins_pipe(ialu_mem_reg);
12132 %}
12133 
12134 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12135 %{
12136   predicate(VM_Version::supports_bmi2());
12137   match(Set dst (RShiftI src shift));
12138 
12139   format %{ "sarxl   $dst, $src, $shift" %}
12140   ins_encode %{
12141     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12142   %}
12143   ins_pipe(ialu_reg_reg);
12144 %}
12145 
12146 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12147 %{
12148   predicate(VM_Version::supports_bmi2());
12149   match(Set dst (RShiftI (LoadI src) shift));
12150   ins_cost(175);
12151   format %{ "sarxl   $dst, $src, $shift" %}
12152   ins_encode %{
12153     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12154   %}
12155   ins_pipe(ialu_reg_mem);
12156 %}
12157 
12158 // Logical Shift Right by 8-bit immediate
12159 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12160 %{
12161   predicate(!UseAPX);
12162   match(Set dst (URShiftI dst shift));
12163   effect(KILL cr);
12164 
12165   format %{ "shrl    $dst, $shift" %}
12166   ins_encode %{
12167     __ shrl($dst$$Register, $shift$$constant);
12168   %}
12169   ins_pipe(ialu_reg);
12170 %}
12171 
12172 // Logical Shift Right by 8-bit immediate
12173 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12174 %{
12175   predicate(UseAPX);
12176   match(Set dst (URShiftI src shift));
12177   effect(KILL cr);
12178   flag(PD::Flag_ndd_demotable_opr1);
12179 
12180   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12181   ins_encode %{
12182     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12183   %}
12184   ins_pipe(ialu_reg);
12185 %}
12186 
12187 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12188 %{
12189   predicate(UseAPX);
12190   match(Set dst (URShiftI (LoadI src) shift));
12191   effect(KILL cr);
12192 
12193   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12194   ins_encode %{
12195     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12196   %}
12197   ins_pipe(ialu_reg);
12198 %}
12199 
12200 // Logical Shift Right by 8-bit immediate
12201 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12202 %{
12203   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12204   effect(KILL cr);
12205 
12206   format %{ "shrl    $dst, $shift" %}
12207   ins_encode %{
12208     __ shrl($dst$$Address, $shift$$constant);
12209   %}
12210   ins_pipe(ialu_mem_imm);
12211 %}
12212 
12213 // Logical Shift Right by variable
12214 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12215 %{
12216   predicate(!VM_Version::supports_bmi2());
12217   match(Set dst (URShiftI dst shift));
12218   effect(KILL cr);
12219 
12220   format %{ "shrl    $dst, $shift" %}
12221   ins_encode %{
12222     __ shrl($dst$$Register);
12223   %}
12224   ins_pipe(ialu_reg_reg);
12225 %}
12226 
12227 // Logical Shift Right by variable
12228 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12229 %{
12230   predicate(!VM_Version::supports_bmi2());
12231   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12232   effect(KILL cr);
12233 
12234   format %{ "shrl    $dst, $shift" %}
12235   ins_encode %{
12236     __ shrl($dst$$Address);
12237   %}
12238   ins_pipe(ialu_mem_reg);
12239 %}
12240 
12241 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12242 %{
12243   predicate(VM_Version::supports_bmi2());
12244   match(Set dst (URShiftI src shift));
12245 
12246   format %{ "shrxl   $dst, $src, $shift" %}
12247   ins_encode %{
12248     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12249   %}
12250   ins_pipe(ialu_reg_reg);
12251 %}
12252 
12253 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12254 %{
12255   predicate(VM_Version::supports_bmi2());
12256   match(Set dst (URShiftI (LoadI src) shift));
12257   ins_cost(175);
12258   format %{ "shrxl   $dst, $src, $shift" %}
12259   ins_encode %{
12260     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12261   %}
12262   ins_pipe(ialu_reg_mem);
12263 %}
12264 
12265 // Long Shift Instructions
12266 // Shift Left by one, two, three
12267 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12268 %{
12269   predicate(!UseAPX);
12270   match(Set dst (LShiftL dst shift));
12271   effect(KILL cr);
12272 
12273   format %{ "salq    $dst, $shift" %}
12274   ins_encode %{
12275     __ salq($dst$$Register, $shift$$constant);
12276   %}
12277   ins_pipe(ialu_reg);
12278 %}
12279 
12280 // Shift Left by one, two, three
12281 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12282 %{
12283   predicate(UseAPX);
12284   match(Set dst (LShiftL src shift));
12285   effect(KILL cr);
12286   flag(PD::Flag_ndd_demotable_opr1);
12287 
12288   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12289   ins_encode %{
12290     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12291   %}
12292   ins_pipe(ialu_reg);
12293 %}
12294 
12295 // Shift Left by 8-bit immediate
12296 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12297 %{
12298   predicate(!UseAPX);
12299   match(Set dst (LShiftL dst shift));
12300   effect(KILL cr);
12301 
12302   format %{ "salq    $dst, $shift" %}
12303   ins_encode %{
12304     __ salq($dst$$Register, $shift$$constant);
12305   %}
12306   ins_pipe(ialu_reg);
12307 %}
12308 
12309 // Shift Left by 8-bit immediate
12310 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12311 %{
12312   predicate(UseAPX);
12313   match(Set dst (LShiftL src shift));
12314   effect(KILL cr);
12315   flag(PD::Flag_ndd_demotable_opr1);
12316 
12317   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12318   ins_encode %{
12319     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12320   %}
12321   ins_pipe(ialu_reg);
12322 %}
12323 
12324 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12325 %{
12326   predicate(UseAPX);
12327   match(Set dst (LShiftL (LoadL src) shift));
12328   effect(KILL cr);
12329 
12330   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12331   ins_encode %{
12332     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12333   %}
12334   ins_pipe(ialu_reg);
12335 %}
12336 
12337 // Shift Left by 8-bit immediate
12338 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12339 %{
12340   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12341   effect(KILL cr);
12342 
12343   format %{ "salq    $dst, $shift" %}
12344   ins_encode %{
12345     __ salq($dst$$Address, $shift$$constant);
12346   %}
12347   ins_pipe(ialu_mem_imm);
12348 %}
12349 
12350 // Shift Left by variable
12351 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12352 %{
12353   predicate(!VM_Version::supports_bmi2());
12354   match(Set dst (LShiftL dst shift));
12355   effect(KILL cr);
12356 
12357   format %{ "salq    $dst, $shift" %}
12358   ins_encode %{
12359     __ salq($dst$$Register);
12360   %}
12361   ins_pipe(ialu_reg_reg);
12362 %}
12363 
12364 // Shift Left by variable
12365 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12366 %{
12367   predicate(!VM_Version::supports_bmi2());
12368   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12369   effect(KILL cr);
12370 
12371   format %{ "salq    $dst, $shift" %}
12372   ins_encode %{
12373     __ salq($dst$$Address);
12374   %}
12375   ins_pipe(ialu_mem_reg);
12376 %}
12377 
12378 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12379 %{
12380   predicate(VM_Version::supports_bmi2());
12381   match(Set dst (LShiftL src shift));
12382 
12383   format %{ "shlxq   $dst, $src, $shift" %}
12384   ins_encode %{
12385     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12386   %}
12387   ins_pipe(ialu_reg_reg);
12388 %}
12389 
12390 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12391 %{
12392   predicate(VM_Version::supports_bmi2());
12393   match(Set dst (LShiftL (LoadL src) shift));
12394   ins_cost(175);
12395   format %{ "shlxq   $dst, $src, $shift" %}
12396   ins_encode %{
12397     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12398   %}
12399   ins_pipe(ialu_reg_mem);
12400 %}
12401 
12402 // Arithmetic Shift Right by 8-bit immediate
12403 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12404 %{
12405   predicate(!UseAPX);
12406   match(Set dst (RShiftL dst shift));
12407   effect(KILL cr);
12408 
12409   format %{ "sarq    $dst, $shift" %}
12410   ins_encode %{
12411     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12412   %}
12413   ins_pipe(ialu_mem_imm);
12414 %}
12415 
12416 // Arithmetic Shift Right by 8-bit immediate
12417 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12418 %{
12419   predicate(UseAPX);
12420   match(Set dst (RShiftL src shift));
12421   effect(KILL cr);
12422   flag(PD::Flag_ndd_demotable_opr1);
12423 
12424   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12425   ins_encode %{
12426     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12427   %}
12428   ins_pipe(ialu_mem_imm);
12429 %}
12430 
12431 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12432 %{
12433   predicate(UseAPX);
12434   match(Set dst (RShiftL (LoadL src) shift));
12435   effect(KILL cr);
12436 
12437   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12438   ins_encode %{
12439     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12440   %}
12441   ins_pipe(ialu_mem_imm);
12442 %}
12443 
12444 // Arithmetic Shift Right by 8-bit immediate
12445 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12446 %{
12447   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12448   effect(KILL cr);
12449 
12450   format %{ "sarq    $dst, $shift" %}
12451   ins_encode %{
12452     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12453   %}
12454   ins_pipe(ialu_mem_imm);
12455 %}
12456 
12457 // Arithmetic Shift Right by variable
12458 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12459 %{
12460   predicate(!VM_Version::supports_bmi2());
12461   match(Set dst (RShiftL dst shift));
12462   effect(KILL cr);
12463 
12464   format %{ "sarq    $dst, $shift" %}
12465   ins_encode %{
12466     __ sarq($dst$$Register);
12467   %}
12468   ins_pipe(ialu_reg_reg);
12469 %}
12470 
12471 // Arithmetic Shift Right by variable
12472 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12473 %{
12474   predicate(!VM_Version::supports_bmi2());
12475   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12476   effect(KILL cr);
12477 
12478   format %{ "sarq    $dst, $shift" %}
12479   ins_encode %{
12480     __ sarq($dst$$Address);
12481   %}
12482   ins_pipe(ialu_mem_reg);
12483 %}
12484 
12485 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12486 %{
12487   predicate(VM_Version::supports_bmi2());
12488   match(Set dst (RShiftL src shift));
12489 
12490   format %{ "sarxq   $dst, $src, $shift" %}
12491   ins_encode %{
12492     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12493   %}
12494   ins_pipe(ialu_reg_reg);
12495 %}
12496 
12497 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12498 %{
12499   predicate(VM_Version::supports_bmi2());
12500   match(Set dst (RShiftL (LoadL src) shift));
12501   ins_cost(175);
12502   format %{ "sarxq   $dst, $src, $shift" %}
12503   ins_encode %{
12504     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12505   %}
12506   ins_pipe(ialu_reg_mem);
12507 %}
12508 
12509 // Logical Shift Right by 8-bit immediate
12510 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12511 %{
12512   predicate(!UseAPX);
12513   match(Set dst (URShiftL dst shift));
12514   effect(KILL cr);
12515 
12516   format %{ "shrq    $dst, $shift" %}
12517   ins_encode %{
12518     __ shrq($dst$$Register, $shift$$constant);
12519   %}
12520   ins_pipe(ialu_reg);
12521 %}
12522 
12523 // Logical Shift Right by 8-bit immediate
12524 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12525 %{
12526   predicate(UseAPX);
12527   match(Set dst (URShiftL src shift));
12528   effect(KILL cr);
12529   flag(PD::Flag_ndd_demotable_opr1);
12530 
12531   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12532   ins_encode %{
12533     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12534   %}
12535   ins_pipe(ialu_reg);
12536 %}
12537 
12538 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12539 %{
12540   predicate(UseAPX);
12541   match(Set dst (URShiftL (LoadL src) shift));
12542   effect(KILL cr);
12543 
12544   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12545   ins_encode %{
12546     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12547   %}
12548   ins_pipe(ialu_reg);
12549 %}
12550 
12551 // Logical Shift Right by 8-bit immediate
12552 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12553 %{
12554   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12555   effect(KILL cr);
12556 
12557   format %{ "shrq    $dst, $shift" %}
12558   ins_encode %{
12559     __ shrq($dst$$Address, $shift$$constant);
12560   %}
12561   ins_pipe(ialu_mem_imm);
12562 %}
12563 
12564 // Logical Shift Right by variable
12565 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12566 %{
12567   predicate(!VM_Version::supports_bmi2());
12568   match(Set dst (URShiftL dst shift));
12569   effect(KILL cr);
12570 
12571   format %{ "shrq    $dst, $shift" %}
12572   ins_encode %{
12573     __ shrq($dst$$Register);
12574   %}
12575   ins_pipe(ialu_reg_reg);
12576 %}
12577 
12578 // Logical Shift Right by variable
12579 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12580 %{
12581   predicate(!VM_Version::supports_bmi2());
12582   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12583   effect(KILL cr);
12584 
12585   format %{ "shrq    $dst, $shift" %}
12586   ins_encode %{
12587     __ shrq($dst$$Address);
12588   %}
12589   ins_pipe(ialu_mem_reg);
12590 %}
12591 
12592 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12593 %{
12594   predicate(VM_Version::supports_bmi2());
12595   match(Set dst (URShiftL src shift));
12596 
12597   format %{ "shrxq   $dst, $src, $shift" %}
12598   ins_encode %{
12599     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12600   %}
12601   ins_pipe(ialu_reg_reg);
12602 %}
12603 
12604 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12605 %{
12606   predicate(VM_Version::supports_bmi2());
12607   match(Set dst (URShiftL (LoadL src) shift));
12608   ins_cost(175);
12609   format %{ "shrxq   $dst, $src, $shift" %}
12610   ins_encode %{
12611     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12612   %}
12613   ins_pipe(ialu_reg_mem);
12614 %}
12615 
12616 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12617 // This idiom is used by the compiler for the i2b bytecode.
12618 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12619 %{
12620   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12621 
12622   format %{ "movsbl  $dst, $src\t# i2b" %}
12623   ins_encode %{
12624     __ movsbl($dst$$Register, $src$$Register);
12625   %}
12626   ins_pipe(ialu_reg_reg);
12627 %}
12628 
12629 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12630 // This idiom is used by the compiler the i2s bytecode.
12631 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12632 %{
12633   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12634 
12635   format %{ "movswl  $dst, $src\t# i2s" %}
12636   ins_encode %{
12637     __ movswl($dst$$Register, $src$$Register);
12638   %}
12639   ins_pipe(ialu_reg_reg);
12640 %}
12641 
12642 // ROL/ROR instructions
12643 
12644 // Rotate left by constant.
12645 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12646 %{
12647   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12648   match(Set dst (RotateLeft dst shift));
12649   effect(KILL cr);
12650   format %{ "roll    $dst, $shift" %}
12651   ins_encode %{
12652     __ roll($dst$$Register, $shift$$constant);
12653   %}
12654   ins_pipe(ialu_reg);
12655 %}
12656 
12657 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12658 %{
12659   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12660   match(Set dst (RotateLeft src shift));
12661   format %{ "rolxl   $dst, $src, $shift" %}
12662   ins_encode %{
12663     int shift = 32 - ($shift$$constant & 31);
12664     __ rorxl($dst$$Register, $src$$Register, shift);
12665   %}
12666   ins_pipe(ialu_reg_reg);
12667 %}
12668 
12669 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12670 %{
12671   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12672   match(Set dst (RotateLeft (LoadI src) shift));
12673   ins_cost(175);
12674   format %{ "rolxl   $dst, $src, $shift" %}
12675   ins_encode %{
12676     int shift = 32 - ($shift$$constant & 31);
12677     __ rorxl($dst$$Register, $src$$Address, shift);
12678   %}
12679   ins_pipe(ialu_reg_mem);
12680 %}
12681 
12682 // Rotate Left by variable
12683 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12684 %{
12685   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12686   match(Set dst (RotateLeft dst shift));
12687   effect(KILL cr);
12688   format %{ "roll    $dst, $shift" %}
12689   ins_encode %{
12690     __ roll($dst$$Register);
12691   %}
12692   ins_pipe(ialu_reg_reg);
12693 %}
12694 
12695 // Rotate Left by variable
12696 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12697 %{
12698   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12699   match(Set dst (RotateLeft src shift));
12700   effect(KILL cr);
12701   flag(PD::Flag_ndd_demotable_opr1);
12702 
12703   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12704   ins_encode %{
12705     __ eroll($dst$$Register, $src$$Register, false);
12706   %}
12707   ins_pipe(ialu_reg_reg);
12708 %}
12709 
12710 // Rotate Right by constant.
12711 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12712 %{
12713   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12714   match(Set dst (RotateRight dst shift));
12715   effect(KILL cr);
12716   format %{ "rorl    $dst, $shift" %}
12717   ins_encode %{
12718     __ rorl($dst$$Register, $shift$$constant);
12719   %}
12720   ins_pipe(ialu_reg);
12721 %}
12722 
12723 // Rotate Right by constant.
12724 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12725 %{
12726   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12727   match(Set dst (RotateRight src shift));
12728   format %{ "rorxl   $dst, $src, $shift" %}
12729   ins_encode %{
12730     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12731   %}
12732   ins_pipe(ialu_reg_reg);
12733 %}
12734 
12735 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12736 %{
12737   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12738   match(Set dst (RotateRight (LoadI src) shift));
12739   ins_cost(175);
12740   format %{ "rorxl   $dst, $src, $shift" %}
12741   ins_encode %{
12742     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12743   %}
12744   ins_pipe(ialu_reg_mem);
12745 %}
12746 
12747 // Rotate Right by variable
12748 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12749 %{
12750   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12751   match(Set dst (RotateRight dst shift));
12752   effect(KILL cr);
12753   format %{ "rorl    $dst, $shift" %}
12754   ins_encode %{
12755     __ rorl($dst$$Register);
12756   %}
12757   ins_pipe(ialu_reg_reg);
12758 %}
12759 
12760 // Rotate Right by variable
12761 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12762 %{
12763   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12764   match(Set dst (RotateRight src shift));
12765   effect(KILL cr);
12766   flag(PD::Flag_ndd_demotable_opr1);
12767 
12768   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12769   ins_encode %{
12770     __ erorl($dst$$Register, $src$$Register, false);
12771   %}
12772   ins_pipe(ialu_reg_reg);
12773 %}
12774 
12775 // Rotate Left by constant.
12776 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12777 %{
12778   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12779   match(Set dst (RotateLeft dst shift));
12780   effect(KILL cr);
12781   format %{ "rolq    $dst, $shift" %}
12782   ins_encode %{
12783     __ rolq($dst$$Register, $shift$$constant);
12784   %}
12785   ins_pipe(ialu_reg);
12786 %}
12787 
12788 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12789 %{
12790   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12791   match(Set dst (RotateLeft src shift));
12792   format %{ "rolxq   $dst, $src, $shift" %}
12793   ins_encode %{
12794     int shift = 64 - ($shift$$constant & 63);
12795     __ rorxq($dst$$Register, $src$$Register, shift);
12796   %}
12797   ins_pipe(ialu_reg_reg);
12798 %}
12799 
12800 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12801 %{
12802   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12803   match(Set dst (RotateLeft (LoadL src) shift));
12804   ins_cost(175);
12805   format %{ "rolxq   $dst, $src, $shift" %}
12806   ins_encode %{
12807     int shift = 64 - ($shift$$constant & 63);
12808     __ rorxq($dst$$Register, $src$$Address, shift);
12809   %}
12810   ins_pipe(ialu_reg_mem);
12811 %}
12812 
12813 // Rotate Left by variable
12814 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12815 %{
12816   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12817   match(Set dst (RotateLeft dst shift));
12818   effect(KILL cr);
12819 
12820   format %{ "rolq    $dst, $shift" %}
12821   ins_encode %{
12822     __ rolq($dst$$Register);
12823   %}
12824   ins_pipe(ialu_reg_reg);
12825 %}
12826 
12827 // Rotate Left by variable
12828 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12831   match(Set dst (RotateLeft src shift));
12832   effect(KILL cr);
12833   flag(PD::Flag_ndd_demotable_opr1);
12834 
12835   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12836   ins_encode %{
12837     __ erolq($dst$$Register, $src$$Register, false);
12838   %}
12839   ins_pipe(ialu_reg_reg);
12840 %}
12841 
12842 // Rotate Right by constant.
12843 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12844 %{
12845   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12846   match(Set dst (RotateRight dst shift));
12847   effect(KILL cr);
12848   format %{ "rorq    $dst, $shift" %}
12849   ins_encode %{
12850     __ rorq($dst$$Register, $shift$$constant);
12851   %}
12852   ins_pipe(ialu_reg);
12853 %}
12854 
12855 // Rotate Right by constant
12856 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12857 %{
12858   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859   match(Set dst (RotateRight src shift));
12860   format %{ "rorxq   $dst, $src, $shift" %}
12861   ins_encode %{
12862     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12863   %}
12864   ins_pipe(ialu_reg_reg);
12865 %}
12866 
12867 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12868 %{
12869   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12870   match(Set dst (RotateRight (LoadL src) shift));
12871   ins_cost(175);
12872   format %{ "rorxq   $dst, $src, $shift" %}
12873   ins_encode %{
12874     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12875   %}
12876   ins_pipe(ialu_reg_mem);
12877 %}
12878 
12879 // Rotate Right by variable
12880 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12881 %{
12882   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12883   match(Set dst (RotateRight dst shift));
12884   effect(KILL cr);
12885   format %{ "rorq    $dst, $shift" %}
12886   ins_encode %{
12887     __ rorq($dst$$Register);
12888   %}
12889   ins_pipe(ialu_reg_reg);
12890 %}
12891 
12892 // Rotate Right by variable
12893 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12894 %{
12895   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12896   match(Set dst (RotateRight src shift));
12897   effect(KILL cr);
12898   flag(PD::Flag_ndd_demotable_opr1);
12899 
12900   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12901   ins_encode %{
12902     __ erorq($dst$$Register, $src$$Register, false);
12903   %}
12904   ins_pipe(ialu_reg_reg);
12905 %}
12906 
12907 //----------------------------- CompressBits/ExpandBits ------------------------
12908 
12909 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12910   predicate(n->bottom_type()->isa_long());
12911   match(Set dst (CompressBits src mask));
12912   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12913   ins_encode %{
12914     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12915   %}
12916   ins_pipe( pipe_slow );
12917 %}
12918 
12919 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12920   predicate(n->bottom_type()->isa_long());
12921   match(Set dst (ExpandBits src mask));
12922   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12923   ins_encode %{
12924     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12925   %}
12926   ins_pipe( pipe_slow );
12927 %}
12928 
12929 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12930   predicate(n->bottom_type()->isa_long());
12931   match(Set dst (CompressBits src (LoadL mask)));
12932   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12933   ins_encode %{
12934     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12935   %}
12936   ins_pipe( pipe_slow );
12937 %}
12938 
12939 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12940   predicate(n->bottom_type()->isa_long());
12941   match(Set dst (ExpandBits src (LoadL mask)));
12942   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12943   ins_encode %{
12944     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12945   %}
12946   ins_pipe( pipe_slow );
12947 %}
12948 
12949 
12950 // Logical Instructions
12951 
12952 // Integer Logical Instructions
12953 
12954 // And Instructions
12955 // And Register with Register
12956 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12957 %{
12958   predicate(!UseAPX);
12959   match(Set dst (AndI dst src));
12960   effect(KILL cr);
12961   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12962 
12963   format %{ "andl    $dst, $src\t# int" %}
12964   ins_encode %{
12965     __ andl($dst$$Register, $src$$Register);
12966   %}
12967   ins_pipe(ialu_reg_reg);
12968 %}
12969 
12970 // And Register with Register using New Data Destination (NDD)
12971 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12972 %{
12973   predicate(UseAPX);
12974   match(Set dst (AndI src1 src2));
12975   effect(KILL cr);
12976   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12977 
12978   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12979   ins_encode %{
12980     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12981 
12982   %}
12983   ins_pipe(ialu_reg_reg);
12984 %}
12985 
12986 // And Register with Immediate 255
12987 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12988 %{
12989   match(Set dst (AndI src mask));
12990 
12991   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12992   ins_encode %{
12993     __ movzbl($dst$$Register, $src$$Register);
12994   %}
12995   ins_pipe(ialu_reg);
12996 %}
12997 
12998 // And Register with Immediate 255 and promote to long
12999 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13000 %{
13001   match(Set dst (ConvI2L (AndI src mask)));
13002 
13003   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13004   ins_encode %{
13005     __ movzbl($dst$$Register, $src$$Register);
13006   %}
13007   ins_pipe(ialu_reg);
13008 %}
13009 
13010 // And Register with Immediate 65535
13011 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13012 %{
13013   match(Set dst (AndI src mask));
13014 
13015   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13016   ins_encode %{
13017     __ movzwl($dst$$Register, $src$$Register);
13018   %}
13019   ins_pipe(ialu_reg);
13020 %}
13021 
13022 // And Register with Immediate 65535 and promote to long
13023 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13024 %{
13025   match(Set dst (ConvI2L (AndI src mask)));
13026 
13027   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13028   ins_encode %{
13029     __ movzwl($dst$$Register, $src$$Register);
13030   %}
13031   ins_pipe(ialu_reg);
13032 %}
13033 
13034 // Can skip int2long conversions after AND with small bitmask
13035 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13036 %{
13037   predicate(VM_Version::supports_bmi2());
13038   ins_cost(125);
13039   effect(TEMP tmp, KILL cr);
13040   match(Set dst (ConvI2L (AndI src mask)));
13041   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13042   ins_encode %{
13043     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13044     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13045   %}
13046   ins_pipe(ialu_reg_reg);
13047 %}
13048 
13049 // And Register with Immediate
13050 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13051 %{
13052   predicate(!UseAPX);
13053   match(Set dst (AndI dst src));
13054   effect(KILL cr);
13055   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13056 
13057   format %{ "andl    $dst, $src\t# int" %}
13058   ins_encode %{
13059     __ andl($dst$$Register, $src$$constant);
13060   %}
13061   ins_pipe(ialu_reg);
13062 %}
13063 
13064 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13065 %{
13066   predicate(UseAPX);
13067   match(Set dst (AndI src1 src2));
13068   effect(KILL cr);
13069   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13070 
13071   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13072   ins_encode %{
13073     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13074   %}
13075   ins_pipe(ialu_reg);
13076 %}
13077 
13078 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13079 %{
13080   predicate(UseAPX);
13081   match(Set dst (AndI (LoadI src1) src2));
13082   effect(KILL cr);
13083   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13084 
13085   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13086   ins_encode %{
13087     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13088   %}
13089   ins_pipe(ialu_reg);
13090 %}
13091 
13092 // And Register with Memory
13093 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13094 %{
13095   predicate(!UseAPX);
13096   match(Set dst (AndI dst (LoadI src)));
13097   effect(KILL cr);
13098   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13099 
13100   ins_cost(150);
13101   format %{ "andl    $dst, $src\t# int" %}
13102   ins_encode %{
13103     __ andl($dst$$Register, $src$$Address);
13104   %}
13105   ins_pipe(ialu_reg_mem);
13106 %}
13107 
13108 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13109 %{
13110   predicate(UseAPX);
13111   match(Set dst (AndI src1 (LoadI src2)));
13112   effect(KILL cr);
13113   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13114 
13115   ins_cost(150);
13116   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13117   ins_encode %{
13118     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13119   %}
13120   ins_pipe(ialu_reg_mem);
13121 %}
13122 
13123 // And Memory with Register
13124 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13125 %{
13126   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13127   effect(KILL cr);
13128   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13129 
13130   ins_cost(150);
13131   format %{ "andb    $dst, $src\t# byte" %}
13132   ins_encode %{
13133     __ andb($dst$$Address, $src$$Register);
13134   %}
13135   ins_pipe(ialu_mem_reg);
13136 %}
13137 
13138 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13139 %{
13140   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13141   effect(KILL cr);
13142   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13143 
13144   ins_cost(150);
13145   format %{ "andl    $dst, $src\t# int" %}
13146   ins_encode %{
13147     __ andl($dst$$Address, $src$$Register);
13148   %}
13149   ins_pipe(ialu_mem_reg);
13150 %}
13151 
13152 // And Memory with Immediate
13153 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13154 %{
13155   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13156   effect(KILL cr);
13157   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13158 
13159   ins_cost(125);
13160   format %{ "andl    $dst, $src\t# int" %}
13161   ins_encode %{
13162     __ andl($dst$$Address, $src$$constant);
13163   %}
13164   ins_pipe(ialu_mem_imm);
13165 %}
13166 
13167 // BMI1 instructions
13168 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13169   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13170   predicate(UseBMI1Instructions);
13171   effect(KILL cr);
13172   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13173 
13174   ins_cost(125);
13175   format %{ "andnl  $dst, $src1, $src2" %}
13176 
13177   ins_encode %{
13178     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13179   %}
13180   ins_pipe(ialu_reg_mem);
13181 %}
13182 
13183 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13184   match(Set dst (AndI (XorI src1 minus_1) src2));
13185   predicate(UseBMI1Instructions);
13186   effect(KILL cr);
13187   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13188 
13189   format %{ "andnl  $dst, $src1, $src2" %}
13190 
13191   ins_encode %{
13192     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13193   %}
13194   ins_pipe(ialu_reg);
13195 %}
13196 
13197 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13198   match(Set dst (AndI (SubI imm_zero src) src));
13199   predicate(UseBMI1Instructions);
13200   effect(KILL cr);
13201   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13202 
13203   format %{ "blsil  $dst, $src" %}
13204 
13205   ins_encode %{
13206     __ blsil($dst$$Register, $src$$Register);
13207   %}
13208   ins_pipe(ialu_reg);
13209 %}
13210 
13211 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13212   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13213   predicate(UseBMI1Instructions);
13214   effect(KILL cr);
13215   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13216 
13217   ins_cost(125);
13218   format %{ "blsil  $dst, $src" %}
13219 
13220   ins_encode %{
13221     __ blsil($dst$$Register, $src$$Address);
13222   %}
13223   ins_pipe(ialu_reg_mem);
13224 %}
13225 
13226 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13227 %{
13228   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13229   predicate(UseBMI1Instructions);
13230   effect(KILL cr);
13231   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13232 
13233   ins_cost(125);
13234   format %{ "blsmskl $dst, $src" %}
13235 
13236   ins_encode %{
13237     __ blsmskl($dst$$Register, $src$$Address);
13238   %}
13239   ins_pipe(ialu_reg_mem);
13240 %}
13241 
13242 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13243 %{
13244   match(Set dst (XorI (AddI src minus_1) src));
13245   predicate(UseBMI1Instructions);
13246   effect(KILL cr);
13247   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13248 
13249   format %{ "blsmskl $dst, $src" %}
13250 
13251   ins_encode %{
13252     __ blsmskl($dst$$Register, $src$$Register);
13253   %}
13254 
13255   ins_pipe(ialu_reg);
13256 %}
13257 
13258 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13259 %{
13260   match(Set dst (AndI (AddI src minus_1) src) );
13261   predicate(UseBMI1Instructions);
13262   effect(KILL cr);
13263   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13264 
13265   format %{ "blsrl  $dst, $src" %}
13266 
13267   ins_encode %{
13268     __ blsrl($dst$$Register, $src$$Register);
13269   %}
13270 
13271   ins_pipe(ialu_reg_mem);
13272 %}
13273 
13274 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13275 %{
13276   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13277   predicate(UseBMI1Instructions);
13278   effect(KILL cr);
13279   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13280 
13281   ins_cost(125);
13282   format %{ "blsrl  $dst, $src" %}
13283 
13284   ins_encode %{
13285     __ blsrl($dst$$Register, $src$$Address);
13286   %}
13287 
13288   ins_pipe(ialu_reg);
13289 %}
13290 
13291 // Or Instructions
13292 // Or Register with Register
13293 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13294 %{
13295   predicate(!UseAPX);
13296   match(Set dst (OrI dst src));
13297   effect(KILL cr);
13298   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13299 
13300   format %{ "orl     $dst, $src\t# int" %}
13301   ins_encode %{
13302     __ orl($dst$$Register, $src$$Register);
13303   %}
13304   ins_pipe(ialu_reg_reg);
13305 %}
13306 
13307 // Or Register with Register using New Data Destination (NDD)
13308 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13309 %{
13310   predicate(UseAPX);
13311   match(Set dst (OrI src1 src2));
13312   effect(KILL cr);
13313   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13314 
13315   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13316   ins_encode %{
13317     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13318   %}
13319   ins_pipe(ialu_reg_reg);
13320 %}
13321 
13322 // Or Register with Immediate
13323 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13324 %{
13325   predicate(!UseAPX);
13326   match(Set dst (OrI dst src));
13327   effect(KILL cr);
13328   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13329 
13330   format %{ "orl     $dst, $src\t# int" %}
13331   ins_encode %{
13332     __ orl($dst$$Register, $src$$constant);
13333   %}
13334   ins_pipe(ialu_reg);
13335 %}
13336 
13337 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13338 %{
13339   predicate(UseAPX);
13340   match(Set dst (OrI src1 src2));
13341   effect(KILL cr);
13342   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13343 
13344   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13345   ins_encode %{
13346     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13347   %}
13348   ins_pipe(ialu_reg);
13349 %}
13350 
13351 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13352 %{
13353   predicate(UseAPX);
13354   match(Set dst (OrI src1 src2));
13355   effect(KILL cr);
13356   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13357 
13358   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13359   ins_encode %{
13360     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13361   %}
13362   ins_pipe(ialu_reg);
13363 %}
13364 
13365 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13366 %{
13367   predicate(UseAPX);
13368   match(Set dst (OrI (LoadI src1) src2));
13369   effect(KILL cr);
13370   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13371 
13372   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13373   ins_encode %{
13374     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13375   %}
13376   ins_pipe(ialu_reg);
13377 %}
13378 
13379 // Or Register with Memory
13380 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13381 %{
13382   predicate(!UseAPX);
13383   match(Set dst (OrI dst (LoadI src)));
13384   effect(KILL cr);
13385   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13386 
13387   ins_cost(150);
13388   format %{ "orl     $dst, $src\t# int" %}
13389   ins_encode %{
13390     __ orl($dst$$Register, $src$$Address);
13391   %}
13392   ins_pipe(ialu_reg_mem);
13393 %}
13394 
13395 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13396 %{
13397   predicate(UseAPX);
13398   match(Set dst (OrI src1 (LoadI src2)));
13399   effect(KILL cr);
13400   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13401 
13402   ins_cost(150);
13403   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13404   ins_encode %{
13405     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13406   %}
13407   ins_pipe(ialu_reg_mem);
13408 %}
13409 
13410 // Or Memory with Register
13411 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13412 %{
13413   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13414   effect(KILL cr);
13415   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13416 
13417   ins_cost(150);
13418   format %{ "orb    $dst, $src\t# byte" %}
13419   ins_encode %{
13420     __ orb($dst$$Address, $src$$Register);
13421   %}
13422   ins_pipe(ialu_mem_reg);
13423 %}
13424 
13425 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13426 %{
13427   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13428   effect(KILL cr);
13429   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13430 
13431   ins_cost(150);
13432   format %{ "orl     $dst, $src\t# int" %}
13433   ins_encode %{
13434     __ orl($dst$$Address, $src$$Register);
13435   %}
13436   ins_pipe(ialu_mem_reg);
13437 %}
13438 
13439 // Or Memory with Immediate
13440 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13441 %{
13442   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13443   effect(KILL cr);
13444   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13445 
13446   ins_cost(125);
13447   format %{ "orl     $dst, $src\t# int" %}
13448   ins_encode %{
13449     __ orl($dst$$Address, $src$$constant);
13450   %}
13451   ins_pipe(ialu_mem_imm);
13452 %}
13453 
13454 // Xor Instructions
13455 // Xor Register with Register
13456 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13457 %{
13458   predicate(!UseAPX);
13459   match(Set dst (XorI dst src));
13460   effect(KILL cr);
13461   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13462 
13463   format %{ "xorl    $dst, $src\t# int" %}
13464   ins_encode %{
13465     __ xorl($dst$$Register, $src$$Register);
13466   %}
13467   ins_pipe(ialu_reg_reg);
13468 %}
13469 
13470 // Xor Register with Register using New Data Destination (NDD)
13471 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13472 %{
13473   predicate(UseAPX);
13474   match(Set dst (XorI src1 src2));
13475   effect(KILL cr);
13476   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13477 
13478   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13479   ins_encode %{
13480     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13481   %}
13482   ins_pipe(ialu_reg_reg);
13483 %}
13484 
13485 // Xor Register with Immediate -1
13486 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13487 %{
13488   predicate(!UseAPX);
13489   match(Set dst (XorI dst imm));
13490 
13491   format %{ "notl    $dst" %}
13492   ins_encode %{
13493      __ notl($dst$$Register);
13494   %}
13495   ins_pipe(ialu_reg);
13496 %}
13497 
13498 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13499 %{
13500   match(Set dst (XorI src imm));
13501   predicate(UseAPX);
13502   flag(PD::Flag_ndd_demotable_opr1);
13503 
13504   format %{ "enotl    $dst, $src" %}
13505   ins_encode %{
13506      __ enotl($dst$$Register, $src$$Register);
13507   %}
13508   ins_pipe(ialu_reg);
13509 %}
13510 
13511 // Xor Register with Immediate
13512 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13513 %{
13514   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13515   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13516   match(Set dst (XorI dst src));
13517   effect(KILL cr);
13518   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13519 
13520   format %{ "xorl    $dst, $src\t# int" %}
13521   ins_encode %{
13522     __ xorl($dst$$Register, $src$$constant);
13523   %}
13524   ins_pipe(ialu_reg);
13525 %}
13526 
13527 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13528 %{
13529   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13530   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13531   match(Set dst (XorI src1 src2));
13532   effect(KILL cr);
13533   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13534 
13535   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13536   ins_encode %{
13537     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13538   %}
13539   ins_pipe(ialu_reg);
13540 %}
13541 
13542 // Xor Memory with Immediate
13543 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13544 %{
13545   predicate(UseAPX);
13546   match(Set dst (XorI (LoadI src1) src2));
13547   effect(KILL cr);
13548   ins_cost(150);
13549   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13550 
13551   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13552   ins_encode %{
13553     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13554   %}
13555   ins_pipe(ialu_reg);
13556 %}
13557 
13558 // Xor Register with Memory
13559 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13560 %{
13561   predicate(!UseAPX);
13562   match(Set dst (XorI dst (LoadI src)));
13563   effect(KILL cr);
13564   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13565 
13566   ins_cost(150);
13567   format %{ "xorl    $dst, $src\t# int" %}
13568   ins_encode %{
13569     __ xorl($dst$$Register, $src$$Address);
13570   %}
13571   ins_pipe(ialu_reg_mem);
13572 %}
13573 
13574 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13575 %{
13576   predicate(UseAPX);
13577   match(Set dst (XorI src1 (LoadI src2)));
13578   effect(KILL cr);
13579   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13580 
13581   ins_cost(150);
13582   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13583   ins_encode %{
13584     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13585   %}
13586   ins_pipe(ialu_reg_mem);
13587 %}
13588 
13589 // Xor Memory with Register
13590 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13591 %{
13592   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13593   effect(KILL cr);
13594   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13595 
13596   ins_cost(150);
13597   format %{ "xorb    $dst, $src\t# byte" %}
13598   ins_encode %{
13599     __ xorb($dst$$Address, $src$$Register);
13600   %}
13601   ins_pipe(ialu_mem_reg);
13602 %}
13603 
13604 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13605 %{
13606   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13607   effect(KILL cr);
13608   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13609 
13610   ins_cost(150);
13611   format %{ "xorl    $dst, $src\t# int" %}
13612   ins_encode %{
13613     __ xorl($dst$$Address, $src$$Register);
13614   %}
13615   ins_pipe(ialu_mem_reg);
13616 %}
13617 
13618 // Xor Memory with Immediate
13619 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13620 %{
13621   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13622   effect(KILL cr);
13623   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13624 
13625   ins_cost(125);
13626   format %{ "xorl    $dst, $src\t# int" %}
13627   ins_encode %{
13628     __ xorl($dst$$Address, $src$$constant);
13629   %}
13630   ins_pipe(ialu_mem_imm);
13631 %}
13632 
13633 
13634 // Long Logical Instructions
13635 
13636 // And Instructions
13637 // And Register with Register
13638 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13639 %{
13640   predicate(!UseAPX);
13641   match(Set dst (AndL dst src));
13642   effect(KILL cr);
13643   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13644 
13645   format %{ "andq    $dst, $src\t# long" %}
13646   ins_encode %{
13647     __ andq($dst$$Register, $src$$Register);
13648   %}
13649   ins_pipe(ialu_reg_reg);
13650 %}
13651 
13652 // And Register with Register using New Data Destination (NDD)
13653 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13654 %{
13655   predicate(UseAPX);
13656   match(Set dst (AndL src1 src2));
13657   effect(KILL cr);
13658   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13659 
13660   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13661   ins_encode %{
13662     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13663 
13664   %}
13665   ins_pipe(ialu_reg_reg);
13666 %}
13667 
13668 // And Register with Immediate 255
13669 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13670 %{
13671   match(Set dst (AndL src mask));
13672 
13673   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13674   ins_encode %{
13675     // movzbl zeroes out the upper 32-bit and does not need REX.W
13676     __ movzbl($dst$$Register, $src$$Register);
13677   %}
13678   ins_pipe(ialu_reg);
13679 %}
13680 
13681 // And Register with Immediate 65535
13682 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13683 %{
13684   match(Set dst (AndL src mask));
13685 
13686   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13687   ins_encode %{
13688     // movzwl zeroes out the upper 32-bit and does not need REX.W
13689     __ movzwl($dst$$Register, $src$$Register);
13690   %}
13691   ins_pipe(ialu_reg);
13692 %}
13693 
13694 // And Register with Immediate
13695 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13696 %{
13697   predicate(!UseAPX);
13698   match(Set dst (AndL dst src));
13699   effect(KILL cr);
13700   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13701 
13702   format %{ "andq    $dst, $src\t# long" %}
13703   ins_encode %{
13704     __ andq($dst$$Register, $src$$constant);
13705   %}
13706   ins_pipe(ialu_reg);
13707 %}
13708 
13709 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13710 %{
13711   predicate(UseAPX);
13712   match(Set dst (AndL src1 src2));
13713   effect(KILL cr);
13714   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13715 
13716   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13717   ins_encode %{
13718     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13719   %}
13720   ins_pipe(ialu_reg);
13721 %}
13722 
13723 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13724 %{
13725   predicate(UseAPX);
13726   match(Set dst (AndL (LoadL src1) src2));
13727   effect(KILL cr);
13728   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729 
13730   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13731   ins_encode %{
13732     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13733   %}
13734   ins_pipe(ialu_reg);
13735 %}
13736 
13737 // And Register with Memory
13738 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13739 %{
13740   predicate(!UseAPX);
13741   match(Set dst (AndL dst (LoadL src)));
13742   effect(KILL cr);
13743   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13744 
13745   ins_cost(150);
13746   format %{ "andq    $dst, $src\t# long" %}
13747   ins_encode %{
13748     __ andq($dst$$Register, $src$$Address);
13749   %}
13750   ins_pipe(ialu_reg_mem);
13751 %}
13752 
13753 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13754 %{
13755   predicate(UseAPX);
13756   match(Set dst (AndL src1 (LoadL src2)));
13757   effect(KILL cr);
13758   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13759 
13760   ins_cost(150);
13761   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13762   ins_encode %{
13763     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13764   %}
13765   ins_pipe(ialu_reg_mem);
13766 %}
13767 
13768 // And Memory with Register
13769 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13770 %{
13771   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13772   effect(KILL cr);
13773   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13774 
13775   ins_cost(150);
13776   format %{ "andq    $dst, $src\t# long" %}
13777   ins_encode %{
13778     __ andq($dst$$Address, $src$$Register);
13779   %}
13780   ins_pipe(ialu_mem_reg);
13781 %}
13782 
13783 // And Memory with Immediate
13784 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13785 %{
13786   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13787   effect(KILL cr);
13788   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13789 
13790   ins_cost(125);
13791   format %{ "andq    $dst, $src\t# long" %}
13792   ins_encode %{
13793     __ andq($dst$$Address, $src$$constant);
13794   %}
13795   ins_pipe(ialu_mem_imm);
13796 %}
13797 
13798 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13799 %{
13800   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13801   // because AND/OR works well enough for 8/32-bit values.
13802   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13803 
13804   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13805   effect(KILL cr);
13806 
13807   ins_cost(125);
13808   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13809   ins_encode %{
13810     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13811   %}
13812   ins_pipe(ialu_mem_imm);
13813 %}
13814 
13815 // BMI1 instructions
13816 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13817   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13818   predicate(UseBMI1Instructions);
13819   effect(KILL cr);
13820   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13821 
13822   ins_cost(125);
13823   format %{ "andnq  $dst, $src1, $src2" %}
13824 
13825   ins_encode %{
13826     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13827   %}
13828   ins_pipe(ialu_reg_mem);
13829 %}
13830 
13831 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13832   match(Set dst (AndL (XorL src1 minus_1) src2));
13833   predicate(UseBMI1Instructions);
13834   effect(KILL cr);
13835   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13836 
13837   format %{ "andnq  $dst, $src1, $src2" %}
13838 
13839   ins_encode %{
13840   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13841   %}
13842   ins_pipe(ialu_reg_mem);
13843 %}
13844 
13845 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13846   match(Set dst (AndL (SubL imm_zero src) src));
13847   predicate(UseBMI1Instructions);
13848   effect(KILL cr);
13849   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13850 
13851   format %{ "blsiq  $dst, $src" %}
13852 
13853   ins_encode %{
13854     __ blsiq($dst$$Register, $src$$Register);
13855   %}
13856   ins_pipe(ialu_reg);
13857 %}
13858 
13859 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13860   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13861   predicate(UseBMI1Instructions);
13862   effect(KILL cr);
13863   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13864 
13865   ins_cost(125);
13866   format %{ "blsiq  $dst, $src" %}
13867 
13868   ins_encode %{
13869     __ blsiq($dst$$Register, $src$$Address);
13870   %}
13871   ins_pipe(ialu_reg_mem);
13872 %}
13873 
13874 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13875 %{
13876   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13877   predicate(UseBMI1Instructions);
13878   effect(KILL cr);
13879   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13880 
13881   ins_cost(125);
13882   format %{ "blsmskq $dst, $src" %}
13883 
13884   ins_encode %{
13885     __ blsmskq($dst$$Register, $src$$Address);
13886   %}
13887   ins_pipe(ialu_reg_mem);
13888 %}
13889 
13890 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13891 %{
13892   match(Set dst (XorL (AddL src minus_1) src));
13893   predicate(UseBMI1Instructions);
13894   effect(KILL cr);
13895   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13896 
13897   format %{ "blsmskq $dst, $src" %}
13898 
13899   ins_encode %{
13900     __ blsmskq($dst$$Register, $src$$Register);
13901   %}
13902 
13903   ins_pipe(ialu_reg);
13904 %}
13905 
13906 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13907 %{
13908   match(Set dst (AndL (AddL src minus_1) src) );
13909   predicate(UseBMI1Instructions);
13910   effect(KILL cr);
13911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13912 
13913   format %{ "blsrq  $dst, $src" %}
13914 
13915   ins_encode %{
13916     __ blsrq($dst$$Register, $src$$Register);
13917   %}
13918 
13919   ins_pipe(ialu_reg);
13920 %}
13921 
13922 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13923 %{
13924   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13925   predicate(UseBMI1Instructions);
13926   effect(KILL cr);
13927   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13928 
13929   ins_cost(125);
13930   format %{ "blsrq  $dst, $src" %}
13931 
13932   ins_encode %{
13933     __ blsrq($dst$$Register, $src$$Address);
13934   %}
13935 
13936   ins_pipe(ialu_reg);
13937 %}
13938 
13939 // Or Instructions
13940 // Or Register with Register
13941 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13942 %{
13943   predicate(!UseAPX);
13944   match(Set dst (OrL dst src));
13945   effect(KILL cr);
13946   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13947 
13948   format %{ "orq     $dst, $src\t# long" %}
13949   ins_encode %{
13950     __ orq($dst$$Register, $src$$Register);
13951   %}
13952   ins_pipe(ialu_reg_reg);
13953 %}
13954 
13955 // Or Register with Register using New Data Destination (NDD)
13956 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13957 %{
13958   predicate(UseAPX);
13959   match(Set dst (OrL src1 src2));
13960   effect(KILL cr);
13961   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13962 
13963   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13964   ins_encode %{
13965     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13966 
13967   %}
13968   ins_pipe(ialu_reg_reg);
13969 %}
13970 
13971 // Use any_RegP to match R15 (TLS register) without spilling.
13972 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13973   match(Set dst (OrL dst (CastP2X src)));
13974   effect(KILL cr);
13975   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13976 
13977   format %{ "orq     $dst, $src\t# long" %}
13978   ins_encode %{
13979     __ orq($dst$$Register, $src$$Register);
13980   %}
13981   ins_pipe(ialu_reg_reg);
13982 %}
13983 
13984 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13985   match(Set dst (OrL src1 (CastP2X src2)));
13986   effect(KILL cr);
13987   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13988 
13989   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13990   ins_encode %{
13991     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13992   %}
13993   ins_pipe(ialu_reg_reg);
13994 %}
13995 
13996 // Or Register with Immediate
13997 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13998 %{
13999   predicate(!UseAPX);
14000   match(Set dst (OrL dst src));
14001   effect(KILL cr);
14002   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14003 
14004   format %{ "orq     $dst, $src\t# long" %}
14005   ins_encode %{
14006     __ orq($dst$$Register, $src$$constant);
14007   %}
14008   ins_pipe(ialu_reg);
14009 %}
14010 
14011 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14012 %{
14013   predicate(UseAPX);
14014   match(Set dst (OrL src1 src2));
14015   effect(KILL cr);
14016   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14017 
14018   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14019   ins_encode %{
14020     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14021   %}
14022   ins_pipe(ialu_reg);
14023 %}
14024 
14025 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14026 %{
14027   predicate(UseAPX);
14028   match(Set dst (OrL src1 src2));
14029   effect(KILL cr);
14030   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14031 
14032   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14033   ins_encode %{
14034     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14035   %}
14036   ins_pipe(ialu_reg);
14037 %}
14038 
14039 // Or Memory with Immediate
14040 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14041 %{
14042   predicate(UseAPX);
14043   match(Set dst (OrL (LoadL src1) src2));
14044   effect(KILL cr);
14045   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14046 
14047   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14048   ins_encode %{
14049     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14050   %}
14051   ins_pipe(ialu_reg);
14052 %}
14053 
14054 // Or Register with Memory
14055 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14056 %{
14057   predicate(!UseAPX);
14058   match(Set dst (OrL dst (LoadL src)));
14059   effect(KILL cr);
14060   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14061 
14062   ins_cost(150);
14063   format %{ "orq     $dst, $src\t# long" %}
14064   ins_encode %{
14065     __ orq($dst$$Register, $src$$Address);
14066   %}
14067   ins_pipe(ialu_reg_mem);
14068 %}
14069 
14070 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14071 %{
14072   predicate(UseAPX);
14073   match(Set dst (OrL src1 (LoadL src2)));
14074   effect(KILL cr);
14075   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14076 
14077   ins_cost(150);
14078   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14079   ins_encode %{
14080     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14081   %}
14082   ins_pipe(ialu_reg_mem);
14083 %}
14084 
14085 // Or Memory with Register
14086 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14087 %{
14088   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14089   effect(KILL cr);
14090   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14091 
14092   ins_cost(150);
14093   format %{ "orq     $dst, $src\t# long" %}
14094   ins_encode %{
14095     __ orq($dst$$Address, $src$$Register);
14096   %}
14097   ins_pipe(ialu_mem_reg);
14098 %}
14099 
14100 // Or Memory with Immediate
14101 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14102 %{
14103   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14104   effect(KILL cr);
14105   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14106 
14107   ins_cost(125);
14108   format %{ "orq     $dst, $src\t# long" %}
14109   ins_encode %{
14110     __ orq($dst$$Address, $src$$constant);
14111   %}
14112   ins_pipe(ialu_mem_imm);
14113 %}
14114 
14115 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14116 %{
14117   // con should be a pure 64-bit power of 2 immediate
14118   // because AND/OR works well enough for 8/32-bit values.
14119   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14120 
14121   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14122   effect(KILL cr);
14123 
14124   ins_cost(125);
14125   format %{ "btsq    $dst, log2($con)\t# long" %}
14126   ins_encode %{
14127     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14128   %}
14129   ins_pipe(ialu_mem_imm);
14130 %}
14131 
14132 // Xor Instructions
14133 // Xor Register with Register
14134 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14135 %{
14136   predicate(!UseAPX);
14137   match(Set dst (XorL dst src));
14138   effect(KILL cr);
14139   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14140 
14141   format %{ "xorq    $dst, $src\t# long" %}
14142   ins_encode %{
14143     __ xorq($dst$$Register, $src$$Register);
14144   %}
14145   ins_pipe(ialu_reg_reg);
14146 %}
14147 
14148 // Xor Register with Register using New Data Destination (NDD)
14149 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14150 %{
14151   predicate(UseAPX);
14152   match(Set dst (XorL src1 src2));
14153   effect(KILL cr);
14154   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14155 
14156   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14157   ins_encode %{
14158     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14159   %}
14160   ins_pipe(ialu_reg_reg);
14161 %}
14162 
14163 // Xor Register with Immediate -1
14164 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14165 %{
14166   predicate(!UseAPX);
14167   match(Set dst (XorL dst imm));
14168 
14169   format %{ "notq   $dst" %}
14170   ins_encode %{
14171      __ notq($dst$$Register);
14172   %}
14173   ins_pipe(ialu_reg);
14174 %}
14175 
14176 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14177 %{
14178   predicate(UseAPX);
14179   match(Set dst (XorL src imm));
14180   flag(PD::Flag_ndd_demotable_opr1);
14181 
14182   format %{ "enotq   $dst, $src" %}
14183   ins_encode %{
14184     __ enotq($dst$$Register, $src$$Register);
14185   %}
14186   ins_pipe(ialu_reg);
14187 %}
14188 
14189 // Xor Register with Immediate
14190 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14191 %{
14192   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14193   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14194   match(Set dst (XorL dst src));
14195   effect(KILL cr);
14196   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14197 
14198   format %{ "xorq    $dst, $src\t# long" %}
14199   ins_encode %{
14200     __ xorq($dst$$Register, $src$$constant);
14201   %}
14202   ins_pipe(ialu_reg);
14203 %}
14204 
14205 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14206 %{
14207   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14208   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14209   match(Set dst (XorL src1 src2));
14210   effect(KILL cr);
14211   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14212 
14213   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14214   ins_encode %{
14215     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14216   %}
14217   ins_pipe(ialu_reg);
14218 %}
14219 
14220 // Xor Memory with Immediate
14221 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14222 %{
14223   predicate(UseAPX);
14224   match(Set dst (XorL (LoadL src1) src2));
14225   effect(KILL cr);
14226   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14227   ins_cost(150);
14228 
14229   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14230   ins_encode %{
14231     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14232   %}
14233   ins_pipe(ialu_reg);
14234 %}
14235 
14236 // Xor Register with Memory
14237 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14238 %{
14239   predicate(!UseAPX);
14240   match(Set dst (XorL dst (LoadL src)));
14241   effect(KILL cr);
14242   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14243 
14244   ins_cost(150);
14245   format %{ "xorq    $dst, $src\t# long" %}
14246   ins_encode %{
14247     __ xorq($dst$$Register, $src$$Address);
14248   %}
14249   ins_pipe(ialu_reg_mem);
14250 %}
14251 
14252 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14253 %{
14254   predicate(UseAPX);
14255   match(Set dst (XorL src1 (LoadL src2)));
14256   effect(KILL cr);
14257   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14258 
14259   ins_cost(150);
14260   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14261   ins_encode %{
14262     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14263   %}
14264   ins_pipe(ialu_reg_mem);
14265 %}
14266 
14267 // Xor Memory with Register
14268 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14269 %{
14270   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14271   effect(KILL cr);
14272   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14273 
14274   ins_cost(150);
14275   format %{ "xorq    $dst, $src\t# long" %}
14276   ins_encode %{
14277     __ xorq($dst$$Address, $src$$Register);
14278   %}
14279   ins_pipe(ialu_mem_reg);
14280 %}
14281 
14282 // Xor Memory with Immediate
14283 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14284 %{
14285   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14286   effect(KILL cr);
14287   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14288 
14289   ins_cost(125);
14290   format %{ "xorq    $dst, $src\t# long" %}
14291   ins_encode %{
14292     __ xorq($dst$$Address, $src$$constant);
14293   %}
14294   ins_pipe(ialu_mem_imm);
14295 %}
14296 
14297 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14298 %{
14299   match(Set dst (CmpLTMask p q));
14300   effect(KILL cr);
14301 
14302   ins_cost(400);
14303   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14304             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14305             "negl    $dst" %}
14306   ins_encode %{
14307     __ cmpl($p$$Register, $q$$Register);
14308     __ setcc(Assembler::less, $dst$$Register);
14309     __ negl($dst$$Register);
14310   %}
14311   ins_pipe(pipe_slow);
14312 %}
14313 
14314 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14315 %{
14316   match(Set dst (CmpLTMask dst zero));
14317   effect(KILL cr);
14318 
14319   ins_cost(100);
14320   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14321   ins_encode %{
14322     __ sarl($dst$$Register, 31);
14323   %}
14324   ins_pipe(ialu_reg);
14325 %}
14326 
14327 /* Better to save a register than avoid a branch */
14328 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14329 %{
14330   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14331   effect(KILL cr);
14332   ins_cost(300);
14333   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14334             "jge     done\n\t"
14335             "addl    $p,$y\n"
14336             "done:   " %}
14337   ins_encode %{
14338     Register Rp = $p$$Register;
14339     Register Rq = $q$$Register;
14340     Register Ry = $y$$Register;
14341     Label done;
14342     __ subl(Rp, Rq);
14343     __ jccb(Assembler::greaterEqual, done);
14344     __ addl(Rp, Ry);
14345     __ bind(done);
14346   %}
14347   ins_pipe(pipe_cmplt);
14348 %}
14349 
14350 /* Better to save a register than avoid a branch */
14351 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14352 %{
14353   match(Set y (AndI (CmpLTMask p q) y));
14354   effect(KILL cr);
14355 
14356   ins_cost(300);
14357 
14358   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14359             "jlt     done\n\t"
14360             "xorl    $y, $y\n"
14361             "done:   " %}
14362   ins_encode %{
14363     Register Rp = $p$$Register;
14364     Register Rq = $q$$Register;
14365     Register Ry = $y$$Register;
14366     Label done;
14367     __ cmpl(Rp, Rq);
14368     __ jccb(Assembler::less, done);
14369     __ xorl(Ry, Ry);
14370     __ bind(done);
14371   %}
14372   ins_pipe(pipe_cmplt);
14373 %}
14374 
14375 
14376 //---------- FP Instructions------------------------------------------------
14377 
14378 // Really expensive, avoid
14379 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14380 %{
14381   match(Set cr (CmpF src1 src2));
14382 
14383   ins_cost(500);
14384   format %{ "ucomiss $src1, $src2\n\t"
14385             "jnp,s   exit\n\t"
14386             "pushfq\t# saw NaN, set CF\n\t"
14387             "andq    [rsp], #0xffffff2b\n\t"
14388             "popfq\n"
14389     "exit:" %}
14390   ins_encode %{
14391     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14392     emit_cmpfp_fixup(masm);
14393   %}
14394   ins_pipe(pipe_slow);
14395 %}
14396 
14397 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14398   match(Set cr (CmpF src1 src2));
14399 
14400   ins_cost(100);
14401   format %{ "ucomiss $src1, $src2" %}
14402   ins_encode %{
14403     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14404   %}
14405   ins_pipe(pipe_slow);
14406 %}
14407 
14408 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14409   match(Set cr (CmpF src1 src2));
14410 
14411   ins_cost(100);
14412   format %{ "vucomxss $src1, $src2" %}
14413   ins_encode %{
14414     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14415   %}
14416   ins_pipe(pipe_slow);
14417 %}
14418 
14419 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14420   match(Set cr (CmpF src1 (LoadF src2)));
14421 
14422   ins_cost(100);
14423   format %{ "ucomiss $src1, $src2" %}
14424   ins_encode %{
14425     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14426   %}
14427   ins_pipe(pipe_slow);
14428 %}
14429 
14430 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14431   match(Set cr (CmpF src1 (LoadF src2)));
14432 
14433   ins_cost(100);
14434   format %{ "vucomxss $src1, $src2" %}
14435   ins_encode %{
14436     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14437   %}
14438   ins_pipe(pipe_slow);
14439 %}
14440 
14441 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14442   match(Set cr (CmpF src con));
14443 
14444   ins_cost(100);
14445   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14446   ins_encode %{
14447     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14448   %}
14449   ins_pipe(pipe_slow);
14450 %}
14451 
14452 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14453   match(Set cr (CmpF src con));
14454 
14455   ins_cost(100);
14456   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14457   ins_encode %{
14458     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14459   %}
14460   ins_pipe(pipe_slow);
14461 %}
14462 
14463 // Really expensive, avoid
14464 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14465 %{
14466   match(Set cr (CmpD src1 src2));
14467 
14468   ins_cost(500);
14469   format %{ "ucomisd $src1, $src2\n\t"
14470             "jnp,s   exit\n\t"
14471             "pushfq\t# saw NaN, set CF\n\t"
14472             "andq    [rsp], #0xffffff2b\n\t"
14473             "popfq\n"
14474     "exit:" %}
14475   ins_encode %{
14476     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14477     emit_cmpfp_fixup(masm);
14478   %}
14479   ins_pipe(pipe_slow);
14480 %}
14481 
14482 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14483   match(Set cr (CmpD src1 src2));
14484 
14485   ins_cost(100);
14486   format %{ "ucomisd $src1, $src2 test" %}
14487   ins_encode %{
14488     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14489   %}
14490   ins_pipe(pipe_slow);
14491 %}
14492 
14493 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14494   match(Set cr (CmpD src1 src2));
14495 
14496   ins_cost(100);
14497   format %{ "vucomxsd $src1, $src2 test" %}
14498   ins_encode %{
14499     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14500   %}
14501   ins_pipe(pipe_slow);
14502 %}
14503 
14504 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14505   match(Set cr (CmpD src1 (LoadD src2)));
14506 
14507   ins_cost(100);
14508   format %{ "ucomisd $src1, $src2" %}
14509   ins_encode %{
14510     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14511   %}
14512   ins_pipe(pipe_slow);
14513 %}
14514 
14515 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14516   match(Set cr (CmpD src1 (LoadD src2)));
14517 
14518   ins_cost(100);
14519   format %{ "vucomxsd $src1, $src2" %}
14520   ins_encode %{
14521     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14522   %}
14523   ins_pipe(pipe_slow);
14524 %}
14525 
14526 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14527   match(Set cr (CmpD src con));
14528   ins_cost(100);
14529   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14530   ins_encode %{
14531     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14532   %}
14533   ins_pipe(pipe_slow);
14534 %}
14535 
14536 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14537   match(Set cr (CmpD src con));
14538 
14539   ins_cost(100);
14540   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14541   ins_encode %{
14542     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14543   %}
14544   ins_pipe(pipe_slow);
14545 %}
14546 
14547 // Compare into -1,0,1
14548 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14549 %{
14550   match(Set dst (CmpF3 src1 src2));
14551   effect(KILL cr);
14552 
14553   ins_cost(275);
14554   format %{ "ucomiss $src1, $src2\n\t"
14555             "movl    $dst, #-1\n\t"
14556             "jp,s    done\n\t"
14557             "jb,s    done\n\t"
14558             "setne   $dst\n\t"
14559             "movzbl  $dst, $dst\n"
14560     "done:" %}
14561   ins_encode %{
14562     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14563     emit_cmpfp3(masm, $dst$$Register);
14564   %}
14565   ins_pipe(pipe_slow);
14566 %}
14567 
14568 // Compare into -1,0,1
14569 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14570 %{
14571   match(Set dst (CmpF3 src1 (LoadF src2)));
14572   effect(KILL cr);
14573 
14574   ins_cost(275);
14575   format %{ "ucomiss $src1, $src2\n\t"
14576             "movl    $dst, #-1\n\t"
14577             "jp,s    done\n\t"
14578             "jb,s    done\n\t"
14579             "setne   $dst\n\t"
14580             "movzbl  $dst, $dst\n"
14581     "done:" %}
14582   ins_encode %{
14583     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14584     emit_cmpfp3(masm, $dst$$Register);
14585   %}
14586   ins_pipe(pipe_slow);
14587 %}
14588 
14589 // Compare into -1,0,1
14590 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14591   match(Set dst (CmpF3 src con));
14592   effect(KILL cr);
14593 
14594   ins_cost(275);
14595   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14596             "movl    $dst, #-1\n\t"
14597             "jp,s    done\n\t"
14598             "jb,s    done\n\t"
14599             "setne   $dst\n\t"
14600             "movzbl  $dst, $dst\n"
14601     "done:" %}
14602   ins_encode %{
14603     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14604     emit_cmpfp3(masm, $dst$$Register);
14605   %}
14606   ins_pipe(pipe_slow);
14607 %}
14608 
14609 // Compare into -1,0,1
14610 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14611 %{
14612   match(Set dst (CmpD3 src1 src2));
14613   effect(KILL cr);
14614 
14615   ins_cost(275);
14616   format %{ "ucomisd $src1, $src2\n\t"
14617             "movl    $dst, #-1\n\t"
14618             "jp,s    done\n\t"
14619             "jb,s    done\n\t"
14620             "setne   $dst\n\t"
14621             "movzbl  $dst, $dst\n"
14622     "done:" %}
14623   ins_encode %{
14624     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14625     emit_cmpfp3(masm, $dst$$Register);
14626   %}
14627   ins_pipe(pipe_slow);
14628 %}
14629 
14630 // Compare into -1,0,1
14631 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14632 %{
14633   match(Set dst (CmpD3 src1 (LoadD src2)));
14634   effect(KILL cr);
14635 
14636   ins_cost(275);
14637   format %{ "ucomisd $src1, $src2\n\t"
14638             "movl    $dst, #-1\n\t"
14639             "jp,s    done\n\t"
14640             "jb,s    done\n\t"
14641             "setne   $dst\n\t"
14642             "movzbl  $dst, $dst\n"
14643     "done:" %}
14644   ins_encode %{
14645     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14646     emit_cmpfp3(masm, $dst$$Register);
14647   %}
14648   ins_pipe(pipe_slow);
14649 %}
14650 
14651 // Compare into -1,0,1
14652 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14653   match(Set dst (CmpD3 src con));
14654   effect(KILL cr);
14655 
14656   ins_cost(275);
14657   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14658             "movl    $dst, #-1\n\t"
14659             "jp,s    done\n\t"
14660             "jb,s    done\n\t"
14661             "setne   $dst\n\t"
14662             "movzbl  $dst, $dst\n"
14663     "done:" %}
14664   ins_encode %{
14665     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14666     emit_cmpfp3(masm, $dst$$Register);
14667   %}
14668   ins_pipe(pipe_slow);
14669 %}
14670 
14671 //----------Arithmetic Conversion Instructions---------------------------------
14672 
14673 instruct convF2D_reg_reg(regD dst, regF src)
14674 %{
14675   match(Set dst (ConvF2D src));
14676 
14677   format %{ "cvtss2sd $dst, $src" %}
14678   ins_encode %{
14679     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14680   %}
14681   ins_pipe(pipe_slow); // XXX
14682 %}
14683 
14684 instruct convF2D_reg_mem(regD dst, memory src)
14685 %{
14686   predicate(UseAVX == 0);
14687   match(Set dst (ConvF2D (LoadF src)));
14688 
14689   format %{ "cvtss2sd $dst, $src" %}
14690   ins_encode %{
14691     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14692   %}
14693   ins_pipe(pipe_slow); // XXX
14694 %}
14695 
14696 instruct convD2F_reg_reg(regF dst, regD src)
14697 %{
14698   match(Set dst (ConvD2F src));
14699 
14700   format %{ "cvtsd2ss $dst, $src" %}
14701   ins_encode %{
14702     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14703   %}
14704   ins_pipe(pipe_slow); // XXX
14705 %}
14706 
14707 instruct convD2F_reg_mem(regF dst, memory src)
14708 %{
14709   predicate(UseAVX == 0);
14710   match(Set dst (ConvD2F (LoadD src)));
14711 
14712   format %{ "cvtsd2ss $dst, $src" %}
14713   ins_encode %{
14714     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14715   %}
14716   ins_pipe(pipe_slow); // XXX
14717 %}
14718 
14719 // XXX do mem variants
14720 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14721 %{
14722   predicate(!VM_Version::supports_avx10_2());
14723   match(Set dst (ConvF2I src));
14724   effect(KILL cr);
14725   format %{ "convert_f2i $dst, $src" %}
14726   ins_encode %{
14727     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14728   %}
14729   ins_pipe(pipe_slow);
14730 %}
14731 
14732 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14733 %{
14734   predicate(VM_Version::supports_avx10_2());
14735   match(Set dst (ConvF2I src));
14736   format %{ "evcvttss2sisl $dst, $src" %}
14737   ins_encode %{
14738     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14739   %}
14740   ins_pipe(pipe_slow);
14741 %}
14742 
14743 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14744 %{
14745   predicate(VM_Version::supports_avx10_2());
14746   match(Set dst (ConvF2I (LoadF src)));
14747   format %{ "evcvttss2sisl $dst, $src" %}
14748   ins_encode %{
14749     __ evcvttss2sisl($dst$$Register, $src$$Address);
14750   %}
14751   ins_pipe(pipe_slow);
14752 %}
14753 
14754 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14755 %{
14756   predicate(!VM_Version::supports_avx10_2());
14757   match(Set dst (ConvF2L src));
14758   effect(KILL cr);
14759   format %{ "convert_f2l $dst, $src"%}
14760   ins_encode %{
14761     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14762   %}
14763   ins_pipe(pipe_slow);
14764 %}
14765 
14766 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14767 %{
14768   predicate(VM_Version::supports_avx10_2());
14769   match(Set dst (ConvF2L src));
14770   format %{ "evcvttss2sisq $dst, $src" %}
14771   ins_encode %{
14772     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14773   %}
14774   ins_pipe(pipe_slow);
14775 %}
14776 
14777 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14778 %{
14779   predicate(VM_Version::supports_avx10_2());
14780   match(Set dst (ConvF2L (LoadF src)));
14781   format %{ "evcvttss2sisq $dst, $src" %}
14782   ins_encode %{
14783     __ evcvttss2sisq($dst$$Register, $src$$Address);
14784   %}
14785   ins_pipe(pipe_slow);
14786 %}
14787 
14788 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14789 %{
14790   predicate(!VM_Version::supports_avx10_2());
14791   match(Set dst (ConvD2I src));
14792   effect(KILL cr);
14793   format %{ "convert_d2i $dst, $src"%}
14794   ins_encode %{
14795     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14796   %}
14797   ins_pipe(pipe_slow);
14798 %}
14799 
14800 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14801 %{
14802   predicate(VM_Version::supports_avx10_2());
14803   match(Set dst (ConvD2I src));
14804   format %{ "evcvttsd2sisl $dst, $src" %}
14805   ins_encode %{
14806     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14807   %}
14808   ins_pipe(pipe_slow);
14809 %}
14810 
14811 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14812 %{
14813   predicate(VM_Version::supports_avx10_2());
14814   match(Set dst (ConvD2I (LoadD src)));
14815   format %{ "evcvttsd2sisl $dst, $src" %}
14816   ins_encode %{
14817     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14818   %}
14819   ins_pipe(pipe_slow);
14820 %}
14821 
14822 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14823 %{
14824   predicate(!VM_Version::supports_avx10_2());
14825   match(Set dst (ConvD2L src));
14826   effect(KILL cr);
14827   format %{ "convert_d2l $dst, $src"%}
14828   ins_encode %{
14829     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14830   %}
14831   ins_pipe(pipe_slow);
14832 %}
14833 
14834 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14835 %{
14836   predicate(VM_Version::supports_avx10_2());
14837   match(Set dst (ConvD2L src));
14838   format %{ "evcvttsd2sisq $dst, $src" %}
14839   ins_encode %{
14840     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14841   %}
14842   ins_pipe(pipe_slow);
14843 %}
14844 
14845 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14846 %{
14847   predicate(VM_Version::supports_avx10_2());
14848   match(Set dst (ConvD2L (LoadD src)));
14849   format %{ "evcvttsd2sisq $dst, $src" %}
14850   ins_encode %{
14851     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14852   %}
14853   ins_pipe(pipe_slow);
14854 %}
14855 
14856 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14857 %{
14858   match(Set dst (RoundD src));
14859   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14860   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14861   ins_encode %{
14862     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14863   %}
14864   ins_pipe(pipe_slow);
14865 %}
14866 
14867 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14868 %{
14869   match(Set dst (RoundF src));
14870   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14871   format %{ "round_float $dst,$src" %}
14872   ins_encode %{
14873     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14874   %}
14875   ins_pipe(pipe_slow);
14876 %}
14877 
14878 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14879 %{
14880   predicate(!UseXmmI2F);
14881   match(Set dst (ConvI2F src));
14882 
14883   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14884   ins_encode %{
14885     if (UseAVX > 0) {
14886       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14887     }
14888     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14889   %}
14890   ins_pipe(pipe_slow); // XXX
14891 %}
14892 
14893 instruct convI2F_reg_mem(regF dst, memory src)
14894 %{
14895   predicate(UseAVX == 0);
14896   match(Set dst (ConvI2F (LoadI src)));
14897 
14898   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14899   ins_encode %{
14900     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14901   %}
14902   ins_pipe(pipe_slow); // XXX
14903 %}
14904 
14905 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14906 %{
14907   predicate(!UseXmmI2D);
14908   match(Set dst (ConvI2D src));
14909 
14910   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14911   ins_encode %{
14912     if (UseAVX > 0) {
14913       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14914     }
14915     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14916   %}
14917   ins_pipe(pipe_slow); // XXX
14918 %}
14919 
14920 instruct convI2D_reg_mem(regD dst, memory src)
14921 %{
14922   predicate(UseAVX == 0);
14923   match(Set dst (ConvI2D (LoadI src)));
14924 
14925   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14926   ins_encode %{
14927     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14928   %}
14929   ins_pipe(pipe_slow); // XXX
14930 %}
14931 
14932 instruct convXI2F_reg(regF dst, rRegI src)
14933 %{
14934   predicate(UseXmmI2F);
14935   match(Set dst (ConvI2F src));
14936 
14937   format %{ "movdl $dst, $src\n\t"
14938             "cvtdq2psl $dst, $dst\t# i2f" %}
14939   ins_encode %{
14940     __ movdl($dst$$XMMRegister, $src$$Register);
14941     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14942   %}
14943   ins_pipe(pipe_slow); // XXX
14944 %}
14945 
14946 instruct convXI2D_reg(regD dst, rRegI src)
14947 %{
14948   predicate(UseXmmI2D);
14949   match(Set dst (ConvI2D src));
14950 
14951   format %{ "movdl $dst, $src\n\t"
14952             "cvtdq2pdl $dst, $dst\t# i2d" %}
14953   ins_encode %{
14954     __ movdl($dst$$XMMRegister, $src$$Register);
14955     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14956   %}
14957   ins_pipe(pipe_slow); // XXX
14958 %}
14959 
14960 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14961 %{
14962   match(Set dst (ConvL2F src));
14963 
14964   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14965   ins_encode %{
14966     if (UseAVX > 0) {
14967       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14968     }
14969     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14970   %}
14971   ins_pipe(pipe_slow); // XXX
14972 %}
14973 
14974 instruct convL2F_reg_mem(regF dst, memory src)
14975 %{
14976   predicate(UseAVX == 0);
14977   match(Set dst (ConvL2F (LoadL src)));
14978 
14979   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14980   ins_encode %{
14981     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14982   %}
14983   ins_pipe(pipe_slow); // XXX
14984 %}
14985 
14986 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14987 %{
14988   match(Set dst (ConvL2D src));
14989 
14990   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14991   ins_encode %{
14992     if (UseAVX > 0) {
14993       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14994     }
14995     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14996   %}
14997   ins_pipe(pipe_slow); // XXX
14998 %}
14999 
15000 instruct convL2D_reg_mem(regD dst, memory src)
15001 %{
15002   predicate(UseAVX == 0);
15003   match(Set dst (ConvL2D (LoadL src)));
15004 
15005   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15006   ins_encode %{
15007     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15008   %}
15009   ins_pipe(pipe_slow); // XXX
15010 %}
15011 
15012 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15013 %{
15014   match(Set dst (ConvI2L src));
15015 
15016   ins_cost(125);
15017   format %{ "movslq  $dst, $src\t# i2l" %}
15018   ins_encode %{
15019     __ movslq($dst$$Register, $src$$Register);
15020   %}
15021   ins_pipe(ialu_reg_reg);
15022 %}
15023 
15024 // Zero-extend convert int to long
15025 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15026 %{
15027   match(Set dst (AndL (ConvI2L src) mask));
15028 
15029   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15030   ins_encode %{
15031     if ($dst$$reg != $src$$reg) {
15032       __ movl($dst$$Register, $src$$Register);
15033     }
15034   %}
15035   ins_pipe(ialu_reg_reg);
15036 %}
15037 
15038 // Zero-extend convert int to long
15039 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15040 %{
15041   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15042 
15043   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15044   ins_encode %{
15045     __ movl($dst$$Register, $src$$Address);
15046   %}
15047   ins_pipe(ialu_reg_mem);
15048 %}
15049 
15050 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15051 %{
15052   match(Set dst (AndL src mask));
15053 
15054   format %{ "movl    $dst, $src\t# zero-extend long" %}
15055   ins_encode %{
15056     __ movl($dst$$Register, $src$$Register);
15057   %}
15058   ins_pipe(ialu_reg_reg);
15059 %}
15060 
15061 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15062 %{
15063   match(Set dst (ConvL2I src));
15064 
15065   format %{ "movl    $dst, $src\t# l2i" %}
15066   ins_encode %{
15067     __ movl($dst$$Register, $src$$Register);
15068   %}
15069   ins_pipe(ialu_reg_reg);
15070 %}
15071 
15072 
15073 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15074   match(Set dst (MoveF2I src));
15075   effect(DEF dst, USE src);
15076 
15077   ins_cost(125);
15078   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15079   ins_encode %{
15080     __ movl($dst$$Register, Address(rsp, $src$$disp));
15081   %}
15082   ins_pipe(ialu_reg_mem);
15083 %}
15084 
15085 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15086   match(Set dst (MoveI2F src));
15087   effect(DEF dst, USE src);
15088 
15089   ins_cost(125);
15090   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15091   ins_encode %{
15092     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15093   %}
15094   ins_pipe(pipe_slow);
15095 %}
15096 
15097 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15098   match(Set dst (MoveD2L src));
15099   effect(DEF dst, USE src);
15100 
15101   ins_cost(125);
15102   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15103   ins_encode %{
15104     __ movq($dst$$Register, Address(rsp, $src$$disp));
15105   %}
15106   ins_pipe(ialu_reg_mem);
15107 %}
15108 
15109 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15110   predicate(!UseXmmLoadAndClearUpper);
15111   match(Set dst (MoveL2D src));
15112   effect(DEF dst, USE src);
15113 
15114   ins_cost(125);
15115   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15116   ins_encode %{
15117     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15118   %}
15119   ins_pipe(pipe_slow);
15120 %}
15121 
15122 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15123   predicate(UseXmmLoadAndClearUpper);
15124   match(Set dst (MoveL2D src));
15125   effect(DEF dst, USE src);
15126 
15127   ins_cost(125);
15128   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15129   ins_encode %{
15130     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15131   %}
15132   ins_pipe(pipe_slow);
15133 %}
15134 
15135 
15136 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15137   match(Set dst (MoveF2I src));
15138   effect(DEF dst, USE src);
15139 
15140   ins_cost(95); // XXX
15141   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15142   ins_encode %{
15143     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15144   %}
15145   ins_pipe(pipe_slow);
15146 %}
15147 
15148 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15149   match(Set dst (MoveI2F src));
15150   effect(DEF dst, USE src);
15151 
15152   ins_cost(100);
15153   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15154   ins_encode %{
15155     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15156   %}
15157   ins_pipe( ialu_mem_reg );
15158 %}
15159 
15160 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15161   match(Set dst (MoveD2L src));
15162   effect(DEF dst, USE src);
15163 
15164   ins_cost(95); // XXX
15165   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15166   ins_encode %{
15167     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15168   %}
15169   ins_pipe(pipe_slow);
15170 %}
15171 
15172 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15173   match(Set dst (MoveL2D src));
15174   effect(DEF dst, USE src);
15175 
15176   ins_cost(100);
15177   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15178   ins_encode %{
15179     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15180   %}
15181   ins_pipe(ialu_mem_reg);
15182 %}
15183 
15184 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15185   match(Set dst (MoveF2I src));
15186   effect(DEF dst, USE src);
15187   ins_cost(85);
15188   format %{ "movd    $dst,$src\t# MoveF2I" %}
15189   ins_encode %{
15190     __ movdl($dst$$Register, $src$$XMMRegister);
15191   %}
15192   ins_pipe( pipe_slow );
15193 %}
15194 
15195 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15196   match(Set dst (MoveD2L src));
15197   effect(DEF dst, USE src);
15198   ins_cost(85);
15199   format %{ "movd    $dst,$src\t# MoveD2L" %}
15200   ins_encode %{
15201     __ movdq($dst$$Register, $src$$XMMRegister);
15202   %}
15203   ins_pipe( pipe_slow );
15204 %}
15205 
15206 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15207   match(Set dst (MoveI2F src));
15208   effect(DEF dst, USE src);
15209   ins_cost(100);
15210   format %{ "movd    $dst,$src\t# MoveI2F" %}
15211   ins_encode %{
15212     __ movdl($dst$$XMMRegister, $src$$Register);
15213   %}
15214   ins_pipe( pipe_slow );
15215 %}
15216 
15217 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15218   match(Set dst (MoveL2D src));
15219   effect(DEF dst, USE src);
15220   ins_cost(100);
15221   format %{ "movd    $dst,$src\t# MoveL2D" %}
15222   ins_encode %{
15223      __ movdq($dst$$XMMRegister, $src$$Register);
15224   %}
15225   ins_pipe( pipe_slow );
15226 %}
15227 
15228 // Fast clearing of an array
15229 // Small non-constant lenght ClearArray for non-AVX512 targets.
15230 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15231                   Universe dummy, rFlagsReg cr)
15232 %{
15233   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15234   match(Set dummy (ClearArray cnt base));
15235   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15236 
15237   format %{ $$template
15238     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15239     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15240     $$emit$$"jg      LARGE\n\t"
15241     $$emit$$"dec     rcx\n\t"
15242     $$emit$$"js      DONE\t# Zero length\n\t"
15243     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15244     $$emit$$"dec     rcx\n\t"
15245     $$emit$$"jge     LOOP\n\t"
15246     $$emit$$"jmp     DONE\n\t"
15247     $$emit$$"# LARGE:\n\t"
15248     if (UseFastStosb) {
15249        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15250        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15251     } else if (UseXMMForObjInit) {
15252        $$emit$$"mov     rdi,rax\n\t"
15253        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15254        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15255        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15257        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15258        $$emit$$"add     0x40,rax\n\t"
15259        $$emit$$"# L_zero_64_bytes:\n\t"
15260        $$emit$$"sub     0x8,rcx\n\t"
15261        $$emit$$"jge     L_loop\n\t"
15262        $$emit$$"add     0x4,rcx\n\t"
15263        $$emit$$"jl      L_tail\n\t"
15264        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15265        $$emit$$"add     0x20,rax\n\t"
15266        $$emit$$"sub     0x4,rcx\n\t"
15267        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268        $$emit$$"add     0x4,rcx\n\t"
15269        $$emit$$"jle     L_end\n\t"
15270        $$emit$$"dec     rcx\n\t"
15271        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272        $$emit$$"vmovq   xmm0,(rax)\n\t"
15273        $$emit$$"add     0x8,rax\n\t"
15274        $$emit$$"dec     rcx\n\t"
15275        $$emit$$"jge     L_sloop\n\t"
15276        $$emit$$"# L_end:\n\t"
15277     } else {
15278        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279     }
15280     $$emit$$"# DONE"
15281   %}
15282   ins_encode %{
15283     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15284                  $tmp$$XMMRegister, false, knoreg);
15285   %}
15286   ins_pipe(pipe_slow);
15287 %}
15288 
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15291                        Universe dummy, rFlagsReg cr)
15292 %{
15293   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15294   match(Set dummy (ClearArray cnt base));
15295   ins_cost(125);
15296   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15297 
15298   format %{ $$template
15299     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15300     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15301     $$emit$$"jg      LARGE\n\t"
15302     $$emit$$"dec     rcx\n\t"
15303     $$emit$$"js      DONE\t# Zero length\n\t"
15304     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15305     $$emit$$"dec     rcx\n\t"
15306     $$emit$$"jge     LOOP\n\t"
15307     $$emit$$"jmp     DONE\n\t"
15308     $$emit$$"# LARGE:\n\t"
15309     if (UseFastStosb) {
15310        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15311        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312     } else if (UseXMMForObjInit) {
15313        $$emit$$"mov     rdi,rax\n\t"
15314        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15315        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15316        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15317        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15318        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15319        $$emit$$"add     0x40,rax\n\t"
15320        $$emit$$"# L_zero_64_bytes:\n\t"
15321        $$emit$$"sub     0x8,rcx\n\t"
15322        $$emit$$"jge     L_loop\n\t"
15323        $$emit$$"add     0x4,rcx\n\t"
15324        $$emit$$"jl      L_tail\n\t"
15325        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326        $$emit$$"add     0x20,rax\n\t"
15327        $$emit$$"sub     0x4,rcx\n\t"
15328        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329        $$emit$$"add     0x4,rcx\n\t"
15330        $$emit$$"jle     L_end\n\t"
15331        $$emit$$"dec     rcx\n\t"
15332        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333        $$emit$$"vmovq   xmm0,(rax)\n\t"
15334        $$emit$$"add     0x8,rax\n\t"
15335        $$emit$$"dec     rcx\n\t"
15336        $$emit$$"jge     L_sloop\n\t"
15337        $$emit$$"# L_end:\n\t"
15338     } else {
15339        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340     }
15341     $$emit$$"# DONE"
15342   %}
15343   ins_encode %{
15344     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15345                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15346   %}
15347   ins_pipe(pipe_slow);
15348 %}
15349 
15350 // Large non-constant length ClearArray for non-AVX512 targets.
15351 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15352                         Universe dummy, rFlagsReg cr)
15353 %{
15354   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15355   match(Set dummy (ClearArray cnt base));
15356   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15357 
15358   format %{ $$template
15359     if (UseFastStosb) {
15360        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15361        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15362        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15363     } else if (UseXMMForObjInit) {
15364        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15365        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15366        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15367        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15368        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15369        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15370        $$emit$$"add     0x40,rax\n\t"
15371        $$emit$$"# L_zero_64_bytes:\n\t"
15372        $$emit$$"sub     0x8,rcx\n\t"
15373        $$emit$$"jge     L_loop\n\t"
15374        $$emit$$"add     0x4,rcx\n\t"
15375        $$emit$$"jl      L_tail\n\t"
15376        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15377        $$emit$$"add     0x20,rax\n\t"
15378        $$emit$$"sub     0x4,rcx\n\t"
15379        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15380        $$emit$$"add     0x4,rcx\n\t"
15381        $$emit$$"jle     L_end\n\t"
15382        $$emit$$"dec     rcx\n\t"
15383        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15384        $$emit$$"vmovq   xmm0,(rax)\n\t"
15385        $$emit$$"add     0x8,rax\n\t"
15386        $$emit$$"dec     rcx\n\t"
15387        $$emit$$"jge     L_sloop\n\t"
15388        $$emit$$"# L_end:\n\t"
15389     } else {
15390        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15391        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15392     }
15393   %}
15394   ins_encode %{
15395     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15396                  $tmp$$XMMRegister, true, knoreg);
15397   %}
15398   ins_pipe(pipe_slow);
15399 %}
15400 
15401 // Large non-constant length ClearArray for AVX512 targets.
15402 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15403                              Universe dummy, rFlagsReg cr)
15404 %{
15405   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15406   match(Set dummy (ClearArray cnt base));
15407   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15408 
15409   format %{ $$template
15410     if (UseFastStosb) {
15411        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15412        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15413        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15414     } else if (UseXMMForObjInit) {
15415        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15416        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15417        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15418        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15419        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15420        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15421        $$emit$$"add     0x40,rax\n\t"
15422        $$emit$$"# L_zero_64_bytes:\n\t"
15423        $$emit$$"sub     0x8,rcx\n\t"
15424        $$emit$$"jge     L_loop\n\t"
15425        $$emit$$"add     0x4,rcx\n\t"
15426        $$emit$$"jl      L_tail\n\t"
15427        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15428        $$emit$$"add     0x20,rax\n\t"
15429        $$emit$$"sub     0x4,rcx\n\t"
15430        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15431        $$emit$$"add     0x4,rcx\n\t"
15432        $$emit$$"jle     L_end\n\t"
15433        $$emit$$"dec     rcx\n\t"
15434        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15435        $$emit$$"vmovq   xmm0,(rax)\n\t"
15436        $$emit$$"add     0x8,rax\n\t"
15437        $$emit$$"dec     rcx\n\t"
15438        $$emit$$"jge     L_sloop\n\t"
15439        $$emit$$"# L_end:\n\t"
15440     } else {
15441        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15442        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15443     }
15444   %}
15445   ins_encode %{
15446     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15447                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15448   %}
15449   ins_pipe(pipe_slow);
15450 %}
15451 
15452 // Small constant length ClearArray for AVX512 targets.
15453 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15454 %{
15455   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15456   match(Set dummy (ClearArray cnt base));
15457   ins_cost(100);
15458   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15459   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15460   ins_encode %{
15461    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15462   %}
15463   ins_pipe(pipe_slow);
15464 %}
15465 
15466 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15467                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15468 %{
15469   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15470   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15471   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15472 
15473   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15474   ins_encode %{
15475     __ string_compare($str1$$Register, $str2$$Register,
15476                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15477                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15478   %}
15479   ins_pipe( pipe_slow );
15480 %}
15481 
15482 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15483                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15484 %{
15485   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15486   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15487   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15488 
15489   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15490   ins_encode %{
15491     __ string_compare($str1$$Register, $str2$$Register,
15492                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15493                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15494   %}
15495   ins_pipe( pipe_slow );
15496 %}
15497 
15498 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15499                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15500 %{
15501   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15502   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15503   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15504 
15505   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15506   ins_encode %{
15507     __ string_compare($str1$$Register, $str2$$Register,
15508                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15509                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15510   %}
15511   ins_pipe( pipe_slow );
15512 %}
15513 
15514 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15515                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15516 %{
15517   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15518   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15519   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15520 
15521   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15522   ins_encode %{
15523     __ string_compare($str1$$Register, $str2$$Register,
15524                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15525                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15526   %}
15527   ins_pipe( pipe_slow );
15528 %}
15529 
15530 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15531                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15532 %{
15533   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15534   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15535   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15536 
15537   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15538   ins_encode %{
15539     __ string_compare($str1$$Register, $str2$$Register,
15540                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15541                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15542   %}
15543   ins_pipe( pipe_slow );
15544 %}
15545 
15546 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15547                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15548 %{
15549   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15550   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15551   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15552 
15553   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15554   ins_encode %{
15555     __ string_compare($str1$$Register, $str2$$Register,
15556                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15557                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15558   %}
15559   ins_pipe( pipe_slow );
15560 %}
15561 
15562 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15563                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15564 %{
15565   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15566   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15567   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15568 
15569   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15570   ins_encode %{
15571     __ string_compare($str2$$Register, $str1$$Register,
15572                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15573                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15574   %}
15575   ins_pipe( pipe_slow );
15576 %}
15577 
15578 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15579                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15580 %{
15581   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15582   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15583   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15584 
15585   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15586   ins_encode %{
15587     __ string_compare($str2$$Register, $str1$$Register,
15588                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15589                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15590   %}
15591   ins_pipe( pipe_slow );
15592 %}
15593 
15594 // fast search of substring with known size.
15595 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15596                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15597 %{
15598   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15599   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15600   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15601 
15602   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15603   ins_encode %{
15604     int icnt2 = (int)$int_cnt2$$constant;
15605     if (icnt2 >= 16) {
15606       // IndexOf for constant substrings with size >= 16 elements
15607       // which don't need to be loaded through stack.
15608       __ string_indexofC8($str1$$Register, $str2$$Register,
15609                           $cnt1$$Register, $cnt2$$Register,
15610                           icnt2, $result$$Register,
15611                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15612     } else {
15613       // Small strings are loaded through stack if they cross page boundary.
15614       __ string_indexof($str1$$Register, $str2$$Register,
15615                         $cnt1$$Register, $cnt2$$Register,
15616                         icnt2, $result$$Register,
15617                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15618     }
15619   %}
15620   ins_pipe( pipe_slow );
15621 %}
15622 
15623 // fast search of substring with known size.
15624 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15625                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15626 %{
15627   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15628   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15629   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15630 
15631   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15632   ins_encode %{
15633     int icnt2 = (int)$int_cnt2$$constant;
15634     if (icnt2 >= 8) {
15635       // IndexOf for constant substrings with size >= 8 elements
15636       // which don't need to be loaded through stack.
15637       __ string_indexofC8($str1$$Register, $str2$$Register,
15638                           $cnt1$$Register, $cnt2$$Register,
15639                           icnt2, $result$$Register,
15640                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15641     } else {
15642       // Small strings are loaded through stack if they cross page boundary.
15643       __ string_indexof($str1$$Register, $str2$$Register,
15644                         $cnt1$$Register, $cnt2$$Register,
15645                         icnt2, $result$$Register,
15646                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15647     }
15648   %}
15649   ins_pipe( pipe_slow );
15650 %}
15651 
15652 // fast search of substring with known size.
15653 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15654                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15655 %{
15656   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15657   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15658   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15659 
15660   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15661   ins_encode %{
15662     int icnt2 = (int)$int_cnt2$$constant;
15663     if (icnt2 >= 8) {
15664       // IndexOf for constant substrings with size >= 8 elements
15665       // which don't need to be loaded through stack.
15666       __ string_indexofC8($str1$$Register, $str2$$Register,
15667                           $cnt1$$Register, $cnt2$$Register,
15668                           icnt2, $result$$Register,
15669                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15670     } else {
15671       // Small strings are loaded through stack if they cross page boundary.
15672       __ string_indexof($str1$$Register, $str2$$Register,
15673                         $cnt1$$Register, $cnt2$$Register,
15674                         icnt2, $result$$Register,
15675                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15676     }
15677   %}
15678   ins_pipe( pipe_slow );
15679 %}
15680 
15681 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15682                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15683 %{
15684   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15685   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15686   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15687 
15688   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15689   ins_encode %{
15690     __ string_indexof($str1$$Register, $str2$$Register,
15691                       $cnt1$$Register, $cnt2$$Register,
15692                       (-1), $result$$Register,
15693                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15694   %}
15695   ins_pipe( pipe_slow );
15696 %}
15697 
15698 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15699                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15700 %{
15701   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15702   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15703   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15704 
15705   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15706   ins_encode %{
15707     __ string_indexof($str1$$Register, $str2$$Register,
15708                       $cnt1$$Register, $cnt2$$Register,
15709                       (-1), $result$$Register,
15710                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15711   %}
15712   ins_pipe( pipe_slow );
15713 %}
15714 
15715 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15716                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15717 %{
15718   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15719   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15720   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15721 
15722   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15723   ins_encode %{
15724     __ string_indexof($str1$$Register, $str2$$Register,
15725                       $cnt1$$Register, $cnt2$$Register,
15726                       (-1), $result$$Register,
15727                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15728   %}
15729   ins_pipe( pipe_slow );
15730 %}
15731 
15732 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15733                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15734 %{
15735   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15736   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15737   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15738   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15739   ins_encode %{
15740     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15741                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15742   %}
15743   ins_pipe( pipe_slow );
15744 %}
15745 
15746 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15747                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15748 %{
15749   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15750   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15751   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15752   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15753   ins_encode %{
15754     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15755                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15756   %}
15757   ins_pipe( pipe_slow );
15758 %}
15759 
15760 // fast string equals
15761 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15762                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15763 %{
15764   predicate(!VM_Version::supports_avx512vlbw());
15765   match(Set result (StrEquals (Binary str1 str2) cnt));
15766   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15767 
15768   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15769   ins_encode %{
15770     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15771                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15772                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15773   %}
15774   ins_pipe( pipe_slow );
15775 %}
15776 
15777 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15778                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15779 %{
15780   predicate(VM_Version::supports_avx512vlbw());
15781   match(Set result (StrEquals (Binary str1 str2) cnt));
15782   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15783 
15784   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15785   ins_encode %{
15786     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15787                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15788                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15789   %}
15790   ins_pipe( pipe_slow );
15791 %}
15792 
15793 // fast array equals
15794 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15795                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15796 %{
15797   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15798   match(Set result (AryEq ary1 ary2));
15799   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15800 
15801   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15802   ins_encode %{
15803     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15804                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15805                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15806   %}
15807   ins_pipe( pipe_slow );
15808 %}
15809 
15810 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15811                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15812 %{
15813   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15814   match(Set result (AryEq ary1 ary2));
15815   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15816 
15817   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15818   ins_encode %{
15819     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15820                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15821                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15822   %}
15823   ins_pipe( pipe_slow );
15824 %}
15825 
15826 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15827                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15828 %{
15829   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15830   match(Set result (AryEq ary1 ary2));
15831   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15832 
15833   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15834   ins_encode %{
15835     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15836                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15837                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15838   %}
15839   ins_pipe( pipe_slow );
15840 %}
15841 
15842 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15843                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15844 %{
15845   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15846   match(Set result (AryEq ary1 ary2));
15847   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15848 
15849   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15850   ins_encode %{
15851     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15852                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15853                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15854   %}
15855   ins_pipe( pipe_slow );
15856 %}
15857 
15858 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15859                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15860                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15861                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15862                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15863 %{
15864   predicate(UseAVX >= 2);
15865   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15866   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15867          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15868          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15869          USE basic_type, KILL cr);
15870 
15871   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15872   ins_encode %{
15873     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15874                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15875                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15876                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15877                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15878                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15879                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15880   %}
15881   ins_pipe( pipe_slow );
15882 %}
15883 
15884 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15885                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15886 %{
15887   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15888   match(Set result (CountPositives ary1 len));
15889   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15890 
15891   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15892   ins_encode %{
15893     __ count_positives($ary1$$Register, $len$$Register,
15894                        $result$$Register, $tmp3$$Register,
15895                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15896   %}
15897   ins_pipe( pipe_slow );
15898 %}
15899 
15900 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15901                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15902 %{
15903   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15904   match(Set result (CountPositives ary1 len));
15905   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15906 
15907   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15908   ins_encode %{
15909     __ count_positives($ary1$$Register, $len$$Register,
15910                        $result$$Register, $tmp3$$Register,
15911                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15912   %}
15913   ins_pipe( pipe_slow );
15914 %}
15915 
15916 // fast char[] to byte[] compression
15917 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15918                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15919   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15920   match(Set result (StrCompressedCopy src (Binary dst len)));
15921   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15922          USE_KILL len, KILL tmp5, KILL cr);
15923 
15924   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15925   ins_encode %{
15926     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15927                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15928                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15929                            knoreg, knoreg);
15930   %}
15931   ins_pipe( pipe_slow );
15932 %}
15933 
15934 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15935                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15936   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15937   match(Set result (StrCompressedCopy src (Binary dst len)));
15938   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15939          USE_KILL len, KILL tmp5, KILL cr);
15940 
15941   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15942   ins_encode %{
15943     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15944                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15945                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15946                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15947   %}
15948   ins_pipe( pipe_slow );
15949 %}
15950 // fast byte[] to char[] inflation
15951 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15952                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15953   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15954   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15955   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15956 
15957   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15958   ins_encode %{
15959     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15960                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15961   %}
15962   ins_pipe( pipe_slow );
15963 %}
15964 
15965 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15966                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15967   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15968   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15969   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15970 
15971   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15972   ins_encode %{
15973     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15974                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15975   %}
15976   ins_pipe( pipe_slow );
15977 %}
15978 
15979 // encode char[] to byte[] in ISO_8859_1
15980 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15981                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15982                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15983   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15984   match(Set result (EncodeISOArray src (Binary dst len)));
15985   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15986 
15987   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15988   ins_encode %{
15989     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15990                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15991                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15992   %}
15993   ins_pipe( pipe_slow );
15994 %}
15995 
15996 // encode char[] to byte[] in ASCII
15997 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15998                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15999                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16000   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16001   match(Set result (EncodeISOArray src (Binary dst len)));
16002   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16003 
16004   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16005   ins_encode %{
16006     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16007                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16008                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16009   %}
16010   ins_pipe( pipe_slow );
16011 %}
16012 
16013 //----------Overflow Math Instructions-----------------------------------------
16014 
16015 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16016 %{
16017   match(Set cr (OverflowAddI op1 op2));
16018   effect(DEF cr, USE_KILL op1, USE op2);
16019 
16020   format %{ "addl    $op1, $op2\t# overflow check int" %}
16021 
16022   ins_encode %{
16023     __ addl($op1$$Register, $op2$$Register);
16024   %}
16025   ins_pipe(ialu_reg_reg);
16026 %}
16027 
16028 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16029 %{
16030   match(Set cr (OverflowAddI op1 op2));
16031   effect(DEF cr, USE_KILL op1, USE op2);
16032 
16033   format %{ "addl    $op1, $op2\t# overflow check int" %}
16034 
16035   ins_encode %{
16036     __ addl($op1$$Register, $op2$$constant);
16037   %}
16038   ins_pipe(ialu_reg_reg);
16039 %}
16040 
16041 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16042 %{
16043   match(Set cr (OverflowAddL op1 op2));
16044   effect(DEF cr, USE_KILL op1, USE op2);
16045 
16046   format %{ "addq    $op1, $op2\t# overflow check long" %}
16047   ins_encode %{
16048     __ addq($op1$$Register, $op2$$Register);
16049   %}
16050   ins_pipe(ialu_reg_reg);
16051 %}
16052 
16053 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16054 %{
16055   match(Set cr (OverflowAddL op1 op2));
16056   effect(DEF cr, USE_KILL op1, USE op2);
16057 
16058   format %{ "addq    $op1, $op2\t# overflow check long" %}
16059   ins_encode %{
16060     __ addq($op1$$Register, $op2$$constant);
16061   %}
16062   ins_pipe(ialu_reg_reg);
16063 %}
16064 
16065 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16066 %{
16067   match(Set cr (OverflowSubI op1 op2));
16068 
16069   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16070   ins_encode %{
16071     __ cmpl($op1$$Register, $op2$$Register);
16072   %}
16073   ins_pipe(ialu_reg_reg);
16074 %}
16075 
16076 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16077 %{
16078   match(Set cr (OverflowSubI op1 op2));
16079 
16080   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16081   ins_encode %{
16082     __ cmpl($op1$$Register, $op2$$constant);
16083   %}
16084   ins_pipe(ialu_reg_reg);
16085 %}
16086 
16087 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16088 %{
16089   match(Set cr (OverflowSubL op1 op2));
16090 
16091   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16092   ins_encode %{
16093     __ cmpq($op1$$Register, $op2$$Register);
16094   %}
16095   ins_pipe(ialu_reg_reg);
16096 %}
16097 
16098 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16099 %{
16100   match(Set cr (OverflowSubL op1 op2));
16101 
16102   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16103   ins_encode %{
16104     __ cmpq($op1$$Register, $op2$$constant);
16105   %}
16106   ins_pipe(ialu_reg_reg);
16107 %}
16108 
16109 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16110 %{
16111   match(Set cr (OverflowSubI zero op2));
16112   effect(DEF cr, USE_KILL op2);
16113 
16114   format %{ "negl    $op2\t# overflow check int" %}
16115   ins_encode %{
16116     __ negl($op2$$Register);
16117   %}
16118   ins_pipe(ialu_reg_reg);
16119 %}
16120 
16121 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16122 %{
16123   match(Set cr (OverflowSubL zero op2));
16124   effect(DEF cr, USE_KILL op2);
16125 
16126   format %{ "negq    $op2\t# overflow check long" %}
16127   ins_encode %{
16128     __ negq($op2$$Register);
16129   %}
16130   ins_pipe(ialu_reg_reg);
16131 %}
16132 
16133 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16134 %{
16135   match(Set cr (OverflowMulI op1 op2));
16136   effect(DEF cr, USE_KILL op1, USE op2);
16137 
16138   format %{ "imull    $op1, $op2\t# overflow check int" %}
16139   ins_encode %{
16140     __ imull($op1$$Register, $op2$$Register);
16141   %}
16142   ins_pipe(ialu_reg_reg_alu0);
16143 %}
16144 
16145 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16146 %{
16147   match(Set cr (OverflowMulI op1 op2));
16148   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16149 
16150   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16151   ins_encode %{
16152     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16153   %}
16154   ins_pipe(ialu_reg_reg_alu0);
16155 %}
16156 
16157 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16158 %{
16159   match(Set cr (OverflowMulL op1 op2));
16160   effect(DEF cr, USE_KILL op1, USE op2);
16161 
16162   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16163   ins_encode %{
16164     __ imulq($op1$$Register, $op2$$Register);
16165   %}
16166   ins_pipe(ialu_reg_reg_alu0);
16167 %}
16168 
16169 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16170 %{
16171   match(Set cr (OverflowMulL op1 op2));
16172   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16173 
16174   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16175   ins_encode %{
16176     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16177   %}
16178   ins_pipe(ialu_reg_reg_alu0);
16179 %}
16180 
16181 
16182 //----------Control Flow Instructions------------------------------------------
16183 // Signed compare Instructions
16184 
16185 // XXX more variants!!
16186 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16187 %{
16188   match(Set cr (CmpI op1 op2));
16189   effect(DEF cr, USE op1, USE op2);
16190 
16191   format %{ "cmpl    $op1, $op2" %}
16192   ins_encode %{
16193     __ cmpl($op1$$Register, $op2$$Register);
16194   %}
16195   ins_pipe(ialu_cr_reg_reg);
16196 %}
16197 
16198 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16199 %{
16200   match(Set cr (CmpI op1 op2));
16201 
16202   format %{ "cmpl    $op1, $op2" %}
16203   ins_encode %{
16204     __ cmpl($op1$$Register, $op2$$constant);
16205   %}
16206   ins_pipe(ialu_cr_reg_imm);
16207 %}
16208 
16209 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16210 %{
16211   match(Set cr (CmpI op1 (LoadI op2)));
16212 
16213   ins_cost(500); // XXX
16214   format %{ "cmpl    $op1, $op2" %}
16215   ins_encode %{
16216     __ cmpl($op1$$Register, $op2$$Address);
16217   %}
16218   ins_pipe(ialu_cr_reg_mem);
16219 %}
16220 
16221 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16222 %{
16223   match(Set cr (CmpI src zero));
16224 
16225   format %{ "testl   $src, $src" %}
16226   ins_encode %{
16227     __ testl($src$$Register, $src$$Register);
16228   %}
16229   ins_pipe(ialu_cr_reg_imm);
16230 %}
16231 
16232 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16233 %{
16234   match(Set cr (CmpI (AndI src con) zero));
16235 
16236   format %{ "testl   $src, $con" %}
16237   ins_encode %{
16238     __ testl($src$$Register, $con$$constant);
16239   %}
16240   ins_pipe(ialu_cr_reg_imm);
16241 %}
16242 
16243 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16244 %{
16245   match(Set cr (CmpI (AndI src1 src2) zero));
16246 
16247   format %{ "testl   $src1, $src2" %}
16248   ins_encode %{
16249     __ testl($src1$$Register, $src2$$Register);
16250   %}
16251   ins_pipe(ialu_cr_reg_imm);
16252 %}
16253 
16254 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16255 %{
16256   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16257 
16258   format %{ "testl   $src, $mem" %}
16259   ins_encode %{
16260     __ testl($src$$Register, $mem$$Address);
16261   %}
16262   ins_pipe(ialu_cr_reg_mem);
16263 %}
16264 
16265 // Unsigned compare Instructions; really, same as signed except they
16266 // produce an rFlagsRegU instead of rFlagsReg.
16267 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16268 %{
16269   match(Set cr (CmpU op1 op2));
16270 
16271   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16272   ins_encode %{
16273     __ cmpl($op1$$Register, $op2$$Register);
16274   %}
16275   ins_pipe(ialu_cr_reg_reg);
16276 %}
16277 
16278 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16279 %{
16280   match(Set cr (CmpU op1 op2));
16281 
16282   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16283   ins_encode %{
16284     __ cmpl($op1$$Register, $op2$$constant);
16285   %}
16286   ins_pipe(ialu_cr_reg_imm);
16287 %}
16288 
16289 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16290 %{
16291   match(Set cr (CmpU op1 (LoadI op2)));
16292 
16293   ins_cost(500); // XXX
16294   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16295   ins_encode %{
16296     __ cmpl($op1$$Register, $op2$$Address);
16297   %}
16298   ins_pipe(ialu_cr_reg_mem);
16299 %}
16300 
16301 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16302 %{
16303   match(Set cr (CmpU src zero));
16304 
16305   format %{ "testl   $src, $src\t# unsigned" %}
16306   ins_encode %{
16307     __ testl($src$$Register, $src$$Register);
16308   %}
16309   ins_pipe(ialu_cr_reg_imm);
16310 %}
16311 
16312 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16313 %{
16314   match(Set cr (CmpP op1 op2));
16315 
16316   format %{ "cmpq    $op1, $op2\t# ptr" %}
16317   ins_encode %{
16318     __ cmpq($op1$$Register, $op2$$Register);
16319   %}
16320   ins_pipe(ialu_cr_reg_reg);
16321 %}
16322 
16323 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16324 %{
16325   match(Set cr (CmpP op1 (LoadP op2)));
16326   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16327 
16328   ins_cost(500); // XXX
16329   format %{ "cmpq    $op1, $op2\t# ptr" %}
16330   ins_encode %{
16331     __ cmpq($op1$$Register, $op2$$Address);
16332   %}
16333   ins_pipe(ialu_cr_reg_mem);
16334 %}
16335 
16336 // XXX this is generalized by compP_rReg_mem???
16337 // Compare raw pointer (used in out-of-heap check).
16338 // Only works because non-oop pointers must be raw pointers
16339 // and raw pointers have no anti-dependencies.
16340 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16341 %{
16342   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16343             n->in(2)->as_Load()->barrier_data() == 0);
16344   match(Set cr (CmpP op1 (LoadP op2)));
16345 
16346   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16347   ins_encode %{
16348     __ cmpq($op1$$Register, $op2$$Address);
16349   %}
16350   ins_pipe(ialu_cr_reg_mem);
16351 %}
16352 
16353 // This will generate a signed flags result. This should be OK since
16354 // any compare to a zero should be eq/neq.
16355 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16356 %{
16357   match(Set cr (CmpP src zero));
16358 
16359   format %{ "testq   $src, $src\t# ptr" %}
16360   ins_encode %{
16361     __ testq($src$$Register, $src$$Register);
16362   %}
16363   ins_pipe(ialu_cr_reg_imm);
16364 %}
16365 
16366 // This will generate a signed flags result. This should be OK since
16367 // any compare to a zero should be eq/neq.
16368 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16369 %{
16370   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16371             n->in(1)->as_Load()->barrier_data() == 0);
16372   match(Set cr (CmpP (LoadP op) zero));
16373 
16374   ins_cost(500); // XXX
16375   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16376   ins_encode %{
16377     __ testq($op$$Address, 0xFFFFFFFF);
16378   %}
16379   ins_pipe(ialu_cr_reg_imm);
16380 %}
16381 
16382 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16383 %{
16384   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16385             n->in(1)->as_Load()->barrier_data() == 0);
16386   match(Set cr (CmpP (LoadP mem) zero));
16387 
16388   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16389   ins_encode %{
16390     __ cmpq(r12, $mem$$Address);
16391   %}
16392   ins_pipe(ialu_cr_reg_mem);
16393 %}
16394 
16395 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16396 %{
16397   match(Set cr (CmpN op1 op2));
16398 
16399   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16400   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16401   ins_pipe(ialu_cr_reg_reg);
16402 %}
16403 
16404 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16405 %{
16406   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16407   match(Set cr (CmpN src (LoadN mem)));
16408 
16409   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16410   ins_encode %{
16411     __ cmpl($src$$Register, $mem$$Address);
16412   %}
16413   ins_pipe(ialu_cr_reg_mem);
16414 %}
16415 
16416 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16417   match(Set cr (CmpN op1 op2));
16418 
16419   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16420   ins_encode %{
16421     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16422   %}
16423   ins_pipe(ialu_cr_reg_imm);
16424 %}
16425 
16426 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16427 %{
16428   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16429   match(Set cr (CmpN src (LoadN mem)));
16430 
16431   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16432   ins_encode %{
16433     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16434   %}
16435   ins_pipe(ialu_cr_reg_mem);
16436 %}
16437 
16438 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16439   match(Set cr (CmpN op1 op2));
16440 
16441   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16442   ins_encode %{
16443     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16444   %}
16445   ins_pipe(ialu_cr_reg_imm);
16446 %}
16447 
16448 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16449 %{
16450   predicate(!UseCompactObjectHeaders);
16451   match(Set cr (CmpN src (LoadNKlass mem)));
16452 
16453   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16454   ins_encode %{
16455     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16456   %}
16457   ins_pipe(ialu_cr_reg_mem);
16458 %}
16459 
16460 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16461   match(Set cr (CmpN src zero));
16462 
16463   format %{ "testl   $src, $src\t# compressed ptr" %}
16464   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16465   ins_pipe(ialu_cr_reg_imm);
16466 %}
16467 
16468 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16469 %{
16470   predicate(CompressedOops::base() != nullptr &&
16471             n->in(1)->as_Load()->barrier_data() == 0);
16472   match(Set cr (CmpN (LoadN mem) zero));
16473 
16474   ins_cost(500); // XXX
16475   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16476   ins_encode %{
16477     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16478   %}
16479   ins_pipe(ialu_cr_reg_mem);
16480 %}
16481 
16482 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16483 %{
16484   predicate(CompressedOops::base() == nullptr &&
16485             n->in(1)->as_Load()->barrier_data() == 0);
16486   match(Set cr (CmpN (LoadN mem) zero));
16487 
16488   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16489   ins_encode %{
16490     __ cmpl(r12, $mem$$Address);
16491   %}
16492   ins_pipe(ialu_cr_reg_mem);
16493 %}
16494 
16495 // Yanked all unsigned pointer compare operations.
16496 // Pointer compares are done with CmpP which is already unsigned.
16497 
16498 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16499 %{
16500   match(Set cr (CmpL op1 op2));
16501 
16502   format %{ "cmpq    $op1, $op2" %}
16503   ins_encode %{
16504     __ cmpq($op1$$Register, $op2$$Register);
16505   %}
16506   ins_pipe(ialu_cr_reg_reg);
16507 %}
16508 
16509 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16510 %{
16511   match(Set cr (CmpL op1 op2));
16512 
16513   format %{ "cmpq    $op1, $op2" %}
16514   ins_encode %{
16515     __ cmpq($op1$$Register, $op2$$constant);
16516   %}
16517   ins_pipe(ialu_cr_reg_imm);
16518 %}
16519 
16520 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16521 %{
16522   match(Set cr (CmpL op1 (LoadL op2)));
16523 
16524   format %{ "cmpq    $op1, $op2" %}
16525   ins_encode %{
16526     __ cmpq($op1$$Register, $op2$$Address);
16527   %}
16528   ins_pipe(ialu_cr_reg_mem);
16529 %}
16530 
16531 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16532 %{
16533   match(Set cr (CmpL src zero));
16534 
16535   format %{ "testq   $src, $src" %}
16536   ins_encode %{
16537     __ testq($src$$Register, $src$$Register);
16538   %}
16539   ins_pipe(ialu_cr_reg_imm);
16540 %}
16541 
16542 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16543 %{
16544   match(Set cr (CmpL (AndL src con) zero));
16545 
16546   format %{ "testq   $src, $con\t# long" %}
16547   ins_encode %{
16548     __ testq($src$$Register, $con$$constant);
16549   %}
16550   ins_pipe(ialu_cr_reg_imm);
16551 %}
16552 
16553 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16554 %{
16555   match(Set cr (CmpL (AndL src1 src2) zero));
16556 
16557   format %{ "testq   $src1, $src2\t# long" %}
16558   ins_encode %{
16559     __ testq($src1$$Register, $src2$$Register);
16560   %}
16561   ins_pipe(ialu_cr_reg_imm);
16562 %}
16563 
16564 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16565 %{
16566   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16567 
16568   format %{ "testq   $src, $mem" %}
16569   ins_encode %{
16570     __ testq($src$$Register, $mem$$Address);
16571   %}
16572   ins_pipe(ialu_cr_reg_mem);
16573 %}
16574 
16575 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16576 %{
16577   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16578 
16579   format %{ "testq   $src, $mem" %}
16580   ins_encode %{
16581     __ testq($src$$Register, $mem$$Address);
16582   %}
16583   ins_pipe(ialu_cr_reg_mem);
16584 %}
16585 
16586 // Manifest a CmpU result in an integer register.  Very painful.
16587 // This is the test to avoid.
16588 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16589 %{
16590   match(Set dst (CmpU3 src1 src2));
16591   effect(KILL flags);
16592 
16593   ins_cost(275); // XXX
16594   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16595             "movl    $dst, -1\n\t"
16596             "jb,u    done\n\t"
16597             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16598     "done:" %}
16599   ins_encode %{
16600     Label done;
16601     __ cmpl($src1$$Register, $src2$$Register);
16602     __ movl($dst$$Register, -1);
16603     __ jccb(Assembler::below, done);
16604     __ setcc(Assembler::notZero, $dst$$Register);
16605     __ bind(done);
16606   %}
16607   ins_pipe(pipe_slow);
16608 %}
16609 
16610 // Manifest a CmpL result in an integer register.  Very painful.
16611 // This is the test to avoid.
16612 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16613 %{
16614   match(Set dst (CmpL3 src1 src2));
16615   effect(KILL flags);
16616 
16617   ins_cost(275); // XXX
16618   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16619             "movl    $dst, -1\n\t"
16620             "jl,s    done\n\t"
16621             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16622     "done:" %}
16623   ins_encode %{
16624     Label done;
16625     __ cmpq($src1$$Register, $src2$$Register);
16626     __ movl($dst$$Register, -1);
16627     __ jccb(Assembler::less, done);
16628     __ setcc(Assembler::notZero, $dst$$Register);
16629     __ bind(done);
16630   %}
16631   ins_pipe(pipe_slow);
16632 %}
16633 
16634 // Manifest a CmpUL result in an integer register.  Very painful.
16635 // This is the test to avoid.
16636 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16637 %{
16638   match(Set dst (CmpUL3 src1 src2));
16639   effect(KILL flags);
16640 
16641   ins_cost(275); // XXX
16642   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16643             "movl    $dst, -1\n\t"
16644             "jb,u    done\n\t"
16645             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16646     "done:" %}
16647   ins_encode %{
16648     Label done;
16649     __ cmpq($src1$$Register, $src2$$Register);
16650     __ movl($dst$$Register, -1);
16651     __ jccb(Assembler::below, done);
16652     __ setcc(Assembler::notZero, $dst$$Register);
16653     __ bind(done);
16654   %}
16655   ins_pipe(pipe_slow);
16656 %}
16657 
16658 // Unsigned long compare Instructions; really, same as signed long except they
16659 // produce an rFlagsRegU instead of rFlagsReg.
16660 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16661 %{
16662   match(Set cr (CmpUL op1 op2));
16663 
16664   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16665   ins_encode %{
16666     __ cmpq($op1$$Register, $op2$$Register);
16667   %}
16668   ins_pipe(ialu_cr_reg_reg);
16669 %}
16670 
16671 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16672 %{
16673   match(Set cr (CmpUL op1 op2));
16674 
16675   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16676   ins_encode %{
16677     __ cmpq($op1$$Register, $op2$$constant);
16678   %}
16679   ins_pipe(ialu_cr_reg_imm);
16680 %}
16681 
16682 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16683 %{
16684   match(Set cr (CmpUL op1 (LoadL op2)));
16685 
16686   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16687   ins_encode %{
16688     __ cmpq($op1$$Register, $op2$$Address);
16689   %}
16690   ins_pipe(ialu_cr_reg_mem);
16691 %}
16692 
16693 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16694 %{
16695   match(Set cr (CmpUL src zero));
16696 
16697   format %{ "testq   $src, $src\t# unsigned" %}
16698   ins_encode %{
16699     __ testq($src$$Register, $src$$Register);
16700   %}
16701   ins_pipe(ialu_cr_reg_imm);
16702 %}
16703 
16704 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16705 %{
16706   match(Set cr (CmpI (LoadB mem) imm));
16707 
16708   ins_cost(125);
16709   format %{ "cmpb    $mem, $imm" %}
16710   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16711   ins_pipe(ialu_cr_reg_mem);
16712 %}
16713 
16714 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16715 %{
16716   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16717 
16718   ins_cost(125);
16719   format %{ "testb   $mem, $imm\t# ubyte" %}
16720   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16721   ins_pipe(ialu_cr_reg_mem);
16722 %}
16723 
16724 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16725 %{
16726   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16727 
16728   ins_cost(125);
16729   format %{ "testb   $mem, $imm\t# byte" %}
16730   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16731   ins_pipe(ialu_cr_reg_mem);
16732 %}
16733 
16734 //----------Max and Min--------------------------------------------------------
16735 // Min Instructions
16736 
16737 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16738 %{
16739   predicate(!UseAPX);
16740   effect(USE_DEF dst, USE src, USE cr);
16741 
16742   format %{ "cmovlgt $dst, $src\t# min" %}
16743   ins_encode %{
16744     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16745   %}
16746   ins_pipe(pipe_cmov_reg);
16747 %}
16748 
16749 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16750 %{
16751   predicate(UseAPX);
16752   effect(DEF dst, USE src1, USE src2, USE cr);
16753 
16754   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16755   ins_encode %{
16756     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16757   %}
16758   ins_pipe(pipe_cmov_reg);
16759 %}
16760 
16761 instruct minI_rReg(rRegI dst, rRegI src)
16762 %{
16763   predicate(!UseAPX);
16764   match(Set dst (MinI dst src));
16765 
16766   ins_cost(200);
16767   expand %{
16768     rFlagsReg cr;
16769     compI_rReg(cr, dst, src);
16770     cmovI_reg_g(dst, src, cr);
16771   %}
16772 %}
16773 
16774 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16775 %{
16776   predicate(UseAPX);
16777   match(Set dst (MinI src1 src2));
16778   effect(DEF dst, USE src1, USE src2);
16779   flag(PD::Flag_ndd_demotable_opr1);
16780 
16781   ins_cost(200);
16782   expand %{
16783     rFlagsReg cr;
16784     compI_rReg(cr, src1, src2);
16785     cmovI_reg_g_ndd(dst, src1, src2, cr);
16786   %}
16787 %}
16788 
16789 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16790 %{
16791   predicate(!UseAPX);
16792   effect(USE_DEF dst, USE src, USE cr);
16793 
16794   format %{ "cmovllt $dst, $src\t# max" %}
16795   ins_encode %{
16796     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16797   %}
16798   ins_pipe(pipe_cmov_reg);
16799 %}
16800 
16801 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16802 %{
16803   predicate(UseAPX);
16804   effect(DEF dst, USE src1, USE src2, USE cr);
16805 
16806   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16807   ins_encode %{
16808     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16809   %}
16810   ins_pipe(pipe_cmov_reg);
16811 %}
16812 
16813 instruct maxI_rReg(rRegI dst, rRegI src)
16814 %{
16815   predicate(!UseAPX);
16816   match(Set dst (MaxI dst src));
16817 
16818   ins_cost(200);
16819   expand %{
16820     rFlagsReg cr;
16821     compI_rReg(cr, dst, src);
16822     cmovI_reg_l(dst, src, cr);
16823   %}
16824 %}
16825 
16826 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16827 %{
16828   predicate(UseAPX);
16829   match(Set dst (MaxI src1 src2));
16830   effect(DEF dst, USE src1, USE src2);
16831   flag(PD::Flag_ndd_demotable_opr1);
16832 
16833   ins_cost(200);
16834   expand %{
16835     rFlagsReg cr;
16836     compI_rReg(cr, src1, src2);
16837     cmovI_reg_l_ndd(dst, src1, src2, cr);
16838   %}
16839 %}
16840 
16841 // ============================================================================
16842 // Branch Instructions
16843 
16844 // Jump Direct - Label defines a relative address from JMP+1
16845 instruct jmpDir(label labl)
16846 %{
16847   match(Goto);
16848   effect(USE labl);
16849 
16850   ins_cost(300);
16851   format %{ "jmp     $labl" %}
16852   size(5);
16853   ins_encode %{
16854     Label* L = $labl$$label;
16855     __ jmp(*L, false); // Always long jump
16856   %}
16857   ins_pipe(pipe_jmp);
16858 %}
16859 
16860 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16861 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16862 %{
16863   match(If cop cr);
16864   effect(USE labl);
16865 
16866   ins_cost(300);
16867   format %{ "j$cop     $labl" %}
16868   size(6);
16869   ins_encode %{
16870     Label* L = $labl$$label;
16871     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16872   %}
16873   ins_pipe(pipe_jcc);
16874 %}
16875 
16876 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16877 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16878 %{
16879   match(CountedLoopEnd cop cr);
16880   effect(USE labl);
16881 
16882   ins_cost(300);
16883   format %{ "j$cop     $labl\t# loop end" %}
16884   size(6);
16885   ins_encode %{
16886     Label* L = $labl$$label;
16887     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16888   %}
16889   ins_pipe(pipe_jcc);
16890 %}
16891 
16892 // Jump Direct Conditional - using unsigned comparison
16893 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16894   match(If cop cmp);
16895   effect(USE labl);
16896 
16897   ins_cost(300);
16898   format %{ "j$cop,u   $labl" %}
16899   size(6);
16900   ins_encode %{
16901     Label* L = $labl$$label;
16902     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16903   %}
16904   ins_pipe(pipe_jcc);
16905 %}
16906 
16907 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16908   match(If cop cmp);
16909   effect(USE labl);
16910 
16911   ins_cost(200);
16912   format %{ "j$cop,u   $labl" %}
16913   size(6);
16914   ins_encode %{
16915     Label* L = $labl$$label;
16916     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16917   %}
16918   ins_pipe(pipe_jcc);
16919 %}
16920 
16921 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16922   match(If cop cmp);
16923   effect(USE labl);
16924 
16925   ins_cost(200);
16926   format %{ $$template
16927     if ($cop$$cmpcode == Assembler::notEqual) {
16928       $$emit$$"jp,u    $labl\n\t"
16929       $$emit$$"j$cop,u   $labl"
16930     } else {
16931       $$emit$$"jp,u    done\n\t"
16932       $$emit$$"j$cop,u   $labl\n\t"
16933       $$emit$$"done:"
16934     }
16935   %}
16936   ins_encode %{
16937     Label* l = $labl$$label;
16938     if ($cop$$cmpcode == Assembler::notEqual) {
16939       __ jcc(Assembler::parity, *l, false);
16940       __ jcc(Assembler::notEqual, *l, false);
16941     } else if ($cop$$cmpcode == Assembler::equal) {
16942       Label done;
16943       __ jccb(Assembler::parity, done);
16944       __ jcc(Assembler::equal, *l, false);
16945       __ bind(done);
16946     } else {
16947        ShouldNotReachHere();
16948     }
16949   %}
16950   ins_pipe(pipe_jcc);
16951 %}
16952 
16953 // Jump Direct Conditional - using signed and unsigned comparison
16954 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16955   match(If cop cmp);
16956   effect(USE labl);
16957 
16958   ins_cost(200);
16959   format %{ "j$cop,su   $labl" %}
16960   size(6);
16961   ins_encode %{
16962     Label* L = $labl$$label;
16963     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16964   %}
16965   ins_pipe(pipe_jcc);
16966 %}
16967 
16968 // ============================================================================
16969 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16970 // superklass array for an instance of the superklass.  Set a hidden
16971 // internal cache on a hit (cache is checked with exposed code in
16972 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16973 // encoding ALSO sets flags.
16974 
16975 instruct partialSubtypeCheck(rdi_RegP result,
16976                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16977                              rFlagsReg cr)
16978 %{
16979   match(Set result (PartialSubtypeCheck sub super));
16980   predicate(!UseSecondarySupersTable);
16981   effect(KILL rcx, KILL cr);
16982 
16983   ins_cost(1100);  // slightly larger than the next version
16984   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16985             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16986             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16987             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16988             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16989             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16990             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16991     "miss:\t" %}
16992 
16993   ins_encode %{
16994     Label miss;
16995     // NB: Callers may assume that, when $result is a valid register,
16996     // check_klass_subtype_slow_path_linear sets it to a nonzero
16997     // value.
16998     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16999                                             $rcx$$Register, $result$$Register,
17000                                             nullptr, &miss,
17001                                             /*set_cond_codes:*/ true);
17002     __ xorptr($result$$Register, $result$$Register);
17003     __ bind(miss);
17004   %}
17005 
17006   ins_pipe(pipe_slow);
17007 %}
17008 
17009 // ============================================================================
17010 // Two versions of hashtable-based partialSubtypeCheck, both used when
17011 // we need to search for a super class in the secondary supers array.
17012 // The first is used when we don't know _a priori_ the class being
17013 // searched for. The second, far more common, is used when we do know:
17014 // this is used for instanceof, checkcast, and any case where C2 can
17015 // determine it by constant propagation.
17016 
17017 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17018                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17019                                        rFlagsReg cr)
17020 %{
17021   match(Set result (PartialSubtypeCheck sub super));
17022   predicate(UseSecondarySupersTable);
17023   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17024 
17025   ins_cost(1000);
17026   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17027 
17028   ins_encode %{
17029     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17030 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17031   %}
17032 
17033   ins_pipe(pipe_slow);
17034 %}
17035 
17036 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17037                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17038                                        rFlagsReg cr)
17039 %{
17040   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17041   predicate(UseSecondarySupersTable);
17042   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17043 
17044   ins_cost(700);  // smaller than the next version
17045   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17046 
17047   ins_encode %{
17048     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17049     if (InlineSecondarySupersTest) {
17050       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17051                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17052                                        super_klass_slot);
17053     } else {
17054       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17055     }
17056   %}
17057 
17058   ins_pipe(pipe_slow);
17059 %}
17060 
17061 // ============================================================================
17062 // Branch Instructions -- short offset versions
17063 //
17064 // These instructions are used to replace jumps of a long offset (the default
17065 // match) with jumps of a shorter offset.  These instructions are all tagged
17066 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17067 // match rules in general matching.  Instead, the ADLC generates a conversion
17068 // method in the MachNode which can be used to do in-place replacement of the
17069 // long variant with the shorter variant.  The compiler will determine if a
17070 // branch can be taken by the is_short_branch_offset() predicate in the machine
17071 // specific code section of the file.
17072 
17073 // Jump Direct - Label defines a relative address from JMP+1
17074 instruct jmpDir_short(label labl) %{
17075   match(Goto);
17076   effect(USE labl);
17077 
17078   ins_cost(300);
17079   format %{ "jmp,s   $labl" %}
17080   size(2);
17081   ins_encode %{
17082     Label* L = $labl$$label;
17083     __ jmpb(*L);
17084   %}
17085   ins_pipe(pipe_jmp);
17086   ins_short_branch(1);
17087 %}
17088 
17089 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17090 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17091   match(If cop cr);
17092   effect(USE labl);
17093 
17094   ins_cost(300);
17095   format %{ "j$cop,s   $labl" %}
17096   size(2);
17097   ins_encode %{
17098     Label* L = $labl$$label;
17099     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17100   %}
17101   ins_pipe(pipe_jcc);
17102   ins_short_branch(1);
17103 %}
17104 
17105 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17106 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17107   match(CountedLoopEnd cop cr);
17108   effect(USE labl);
17109 
17110   ins_cost(300);
17111   format %{ "j$cop,s   $labl\t# loop end" %}
17112   size(2);
17113   ins_encode %{
17114     Label* L = $labl$$label;
17115     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17116   %}
17117   ins_pipe(pipe_jcc);
17118   ins_short_branch(1);
17119 %}
17120 
17121 // Jump Direct Conditional - using unsigned comparison
17122 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17123   match(If cop cmp);
17124   effect(USE labl);
17125 
17126   ins_cost(300);
17127   format %{ "j$cop,us  $labl" %}
17128   size(2);
17129   ins_encode %{
17130     Label* L = $labl$$label;
17131     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17132   %}
17133   ins_pipe(pipe_jcc);
17134   ins_short_branch(1);
17135 %}
17136 
17137 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17138   match(If cop cmp);
17139   effect(USE labl);
17140 
17141   ins_cost(300);
17142   format %{ "j$cop,us  $labl" %}
17143   size(2);
17144   ins_encode %{
17145     Label* L = $labl$$label;
17146     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17147   %}
17148   ins_pipe(pipe_jcc);
17149   ins_short_branch(1);
17150 %}
17151 
17152 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17153   match(If cop cmp);
17154   effect(USE labl);
17155 
17156   ins_cost(300);
17157   format %{ $$template
17158     if ($cop$$cmpcode == Assembler::notEqual) {
17159       $$emit$$"jp,u,s  $labl\n\t"
17160       $$emit$$"j$cop,u,s  $labl"
17161     } else {
17162       $$emit$$"jp,u,s  done\n\t"
17163       $$emit$$"j$cop,u,s  $labl\n\t"
17164       $$emit$$"done:"
17165     }
17166   %}
17167   size(4);
17168   ins_encode %{
17169     Label* l = $labl$$label;
17170     if ($cop$$cmpcode == Assembler::notEqual) {
17171       __ jccb(Assembler::parity, *l);
17172       __ jccb(Assembler::notEqual, *l);
17173     } else if ($cop$$cmpcode == Assembler::equal) {
17174       Label done;
17175       __ jccb(Assembler::parity, done);
17176       __ jccb(Assembler::equal, *l);
17177       __ bind(done);
17178     } else {
17179        ShouldNotReachHere();
17180     }
17181   %}
17182   ins_pipe(pipe_jcc);
17183   ins_short_branch(1);
17184 %}
17185 
17186 // Jump Direct Conditional - using signed and unsigned comparison
17187 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17188   match(If cop cmp);
17189   effect(USE labl);
17190 
17191   ins_cost(300);
17192   format %{ "j$cop,sus  $labl" %}
17193   size(2);
17194   ins_encode %{
17195     Label* L = $labl$$label;
17196     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17197   %}
17198   ins_pipe(pipe_jcc);
17199   ins_short_branch(1);
17200 %}
17201 
17202 // ============================================================================
17203 // inlined locking and unlocking
17204 
17205 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17206   match(Set cr (FastLock object box));
17207   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17208   ins_cost(300);
17209   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17210   ins_encode %{
17211     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17212   %}
17213   ins_pipe(pipe_slow);
17214 %}
17215 
17216 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17217   match(Set cr (FastUnlock object rax_reg));
17218   effect(TEMP tmp, USE_KILL rax_reg);
17219   ins_cost(300);
17220   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17221   ins_encode %{
17222     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17223   %}
17224   ins_pipe(pipe_slow);
17225 %}
17226 
17227 
17228 // ============================================================================
17229 // Safepoint Instructions
17230 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17231 %{
17232   match(SafePoint poll);
17233   effect(KILL cr, USE poll);
17234 
17235   format %{ "testl   rax, [$poll]\t"
17236             "# Safepoint: poll for GC" %}
17237   ins_cost(125);
17238   ins_encode %{
17239     __ relocate(relocInfo::poll_type);
17240     address pre_pc = __ pc();
17241     __ testl(rax, Address($poll$$Register, 0));
17242     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17243   %}
17244   ins_pipe(ialu_reg_mem);
17245 %}
17246 
17247 instruct mask_all_evexL(kReg dst, rRegL src) %{
17248   match(Set dst (MaskAll src));
17249   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17250   ins_encode %{
17251     int mask_len = Matcher::vector_length(this);
17252     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17253   %}
17254   ins_pipe( pipe_slow );
17255 %}
17256 
17257 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17258   predicate(Matcher::vector_length(n) > 32);
17259   match(Set dst (MaskAll src));
17260   effect(TEMP tmp);
17261   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17262   ins_encode %{
17263     int mask_len = Matcher::vector_length(this);
17264     __ movslq($tmp$$Register, $src$$Register);
17265     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17266   %}
17267   ins_pipe( pipe_slow );
17268 %}
17269 
17270 // ============================================================================
17271 // Procedure Call/Return Instructions
17272 // Call Java Static Instruction
17273 // Note: If this code changes, the corresponding ret_addr_offset() and
17274 //       compute_padding() functions will have to be adjusted.
17275 instruct CallStaticJavaDirect(method meth) %{
17276   match(CallStaticJava);
17277   effect(USE meth);
17278 
17279   ins_cost(300);
17280   format %{ "call,static " %}
17281   opcode(0xE8); /* E8 cd */
17282   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17283   ins_pipe(pipe_slow);
17284   ins_alignment(4);
17285 %}
17286 
17287 // Call Java Dynamic Instruction
17288 // Note: If this code changes, the corresponding ret_addr_offset() and
17289 //       compute_padding() functions will have to be adjusted.
17290 instruct CallDynamicJavaDirect(method meth)
17291 %{
17292   match(CallDynamicJava);
17293   effect(USE meth);
17294 
17295   ins_cost(300);
17296   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17297             "call,dynamic " %}
17298   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17299   ins_pipe(pipe_slow);
17300   ins_alignment(4);
17301 %}
17302 
17303 // Call Runtime Instruction
17304 instruct CallRuntimeDirect(method meth)
17305 %{
17306   match(CallRuntime);
17307   effect(USE meth);
17308 
17309   ins_cost(300);
17310   format %{ "call,runtime " %}
17311   ins_encode(clear_avx, Java_To_Runtime(meth));
17312   ins_pipe(pipe_slow);
17313 %}
17314 
17315 // Call runtime without safepoint
17316 instruct CallLeafDirect(method meth)
17317 %{
17318   match(CallLeaf);
17319   effect(USE meth);
17320 
17321   ins_cost(300);
17322   format %{ "call_leaf,runtime " %}
17323   ins_encode(clear_avx, Java_To_Runtime(meth));
17324   ins_pipe(pipe_slow);
17325 %}
17326 
17327 // Call runtime without safepoint and with vector arguments
17328 instruct CallLeafDirectVector(method meth)
17329 %{
17330   match(CallLeafVector);
17331   effect(USE meth);
17332 
17333   ins_cost(300);
17334   format %{ "call_leaf,vector " %}
17335   ins_encode(Java_To_Runtime(meth));
17336   ins_pipe(pipe_slow);
17337 %}
17338 
17339 // Call runtime without safepoint
17340 instruct CallLeafNoFPDirect(method meth)
17341 %{
17342   match(CallLeafNoFP);
17343   effect(USE meth);
17344 
17345   ins_cost(300);
17346   format %{ "call_leaf_nofp,runtime " %}
17347   ins_encode(clear_avx, Java_To_Runtime(meth));
17348   ins_pipe(pipe_slow);
17349 %}
17350 
17351 // Return Instruction
17352 // Remove the return address & jump to it.
17353 // Notice: We always emit a nop after a ret to make sure there is room
17354 // for safepoint patching
17355 instruct Ret()
17356 %{
17357   match(Return);
17358 
17359   format %{ "ret" %}
17360   ins_encode %{
17361     __ ret(0);
17362   %}
17363   ins_pipe(pipe_jmp);
17364 %}
17365 
17366 // Tail Call; Jump from runtime stub to Java code.
17367 // Also known as an 'interprocedural jump'.
17368 // Target of jump will eventually return to caller.
17369 // TailJump below removes the return address.
17370 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17371 // emitted just above the TailCall which has reset rbp to the caller state.
17372 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17373 %{
17374   match(TailCall jump_target method_ptr);
17375 
17376   ins_cost(300);
17377   format %{ "jmp     $jump_target\t# rbx holds method" %}
17378   ins_encode %{
17379     __ jmp($jump_target$$Register);
17380   %}
17381   ins_pipe(pipe_jmp);
17382 %}
17383 
17384 // Tail Jump; remove the return address; jump to target.
17385 // TailCall above leaves the return address around.
17386 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17387 %{
17388   match(TailJump jump_target ex_oop);
17389 
17390   ins_cost(300);
17391   format %{ "popq    rdx\t# pop return address\n\t"
17392             "jmp     $jump_target" %}
17393   ins_encode %{
17394     __ popq(as_Register(RDX_enc));
17395     __ jmp($jump_target$$Register);
17396   %}
17397   ins_pipe(pipe_jmp);
17398 %}
17399 
17400 // Forward exception.
17401 instruct ForwardExceptionjmp()
17402 %{
17403   match(ForwardException);
17404 
17405   format %{ "jmp     forward_exception_stub" %}
17406   ins_encode %{
17407     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17408   %}
17409   ins_pipe(pipe_jmp);
17410 %}
17411 
17412 // Create exception oop: created by stack-crawling runtime code.
17413 // Created exception is now available to this handler, and is setup
17414 // just prior to jumping to this handler.  No code emitted.
17415 instruct CreateException(rax_RegP ex_oop)
17416 %{
17417   match(Set ex_oop (CreateEx));
17418 
17419   size(0);
17420   // use the following format syntax
17421   format %{ "# exception oop is in rax; no code emitted" %}
17422   ins_encode();
17423   ins_pipe(empty);
17424 %}
17425 
17426 // Rethrow exception:
17427 // The exception oop will come in the first argument position.
17428 // Then JUMP (not call) to the rethrow stub code.
17429 instruct RethrowException()
17430 %{
17431   match(Rethrow);
17432 
17433   // use the following format syntax
17434   format %{ "jmp     rethrow_stub" %}
17435   ins_encode %{
17436     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17437   %}
17438   ins_pipe(pipe_jmp);
17439 %}
17440 
17441 // ============================================================================
17442 // This name is KNOWN by the ADLC and cannot be changed.
17443 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17444 // for this guy.
17445 instruct tlsLoadP(r15_RegP dst) %{
17446   match(Set dst (ThreadLocal));
17447   effect(DEF dst);
17448 
17449   size(0);
17450   format %{ "# TLS is in R15" %}
17451   ins_encode( /*empty encoding*/ );
17452   ins_pipe(ialu_reg_reg);
17453 %}
17454 
17455 instruct addF_reg(regF dst, regF src) %{
17456   predicate(UseAVX == 0);
17457   match(Set dst (AddF dst src));
17458 
17459   format %{ "addss   $dst, $src" %}
17460   ins_cost(150);
17461   ins_encode %{
17462     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17463   %}
17464   ins_pipe(pipe_slow);
17465 %}
17466 
17467 instruct addF_mem(regF dst, memory src) %{
17468   predicate(UseAVX == 0);
17469   match(Set dst (AddF dst (LoadF src)));
17470 
17471   format %{ "addss   $dst, $src" %}
17472   ins_cost(150);
17473   ins_encode %{
17474     __ addss($dst$$XMMRegister, $src$$Address);
17475   %}
17476   ins_pipe(pipe_slow);
17477 %}
17478 
17479 instruct addF_imm(regF dst, immF con) %{
17480   predicate(UseAVX == 0);
17481   match(Set dst (AddF dst con));
17482   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17483   ins_cost(150);
17484   ins_encode %{
17485     __ addss($dst$$XMMRegister, $constantaddress($con));
17486   %}
17487   ins_pipe(pipe_slow);
17488 %}
17489 
17490 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17491   predicate(UseAVX > 0);
17492   match(Set dst (AddF src1 src2));
17493 
17494   format %{ "vaddss  $dst, $src1, $src2" %}
17495   ins_cost(150);
17496   ins_encode %{
17497     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17498   %}
17499   ins_pipe(pipe_slow);
17500 %}
17501 
17502 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17503   predicate(UseAVX > 0);
17504   match(Set dst (AddF src1 (LoadF src2)));
17505 
17506   format %{ "vaddss  $dst, $src1, $src2" %}
17507   ins_cost(150);
17508   ins_encode %{
17509     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17510   %}
17511   ins_pipe(pipe_slow);
17512 %}
17513 
17514 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17515   predicate(UseAVX > 0);
17516   match(Set dst (AddF src con));
17517 
17518   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17519   ins_cost(150);
17520   ins_encode %{
17521     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17522   %}
17523   ins_pipe(pipe_slow);
17524 %}
17525 
17526 instruct addD_reg(regD dst, regD src) %{
17527   predicate(UseAVX == 0);
17528   match(Set dst (AddD dst src));
17529 
17530   format %{ "addsd   $dst, $src" %}
17531   ins_cost(150);
17532   ins_encode %{
17533     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17534   %}
17535   ins_pipe(pipe_slow);
17536 %}
17537 
17538 instruct addD_mem(regD dst, memory src) %{
17539   predicate(UseAVX == 0);
17540   match(Set dst (AddD dst (LoadD src)));
17541 
17542   format %{ "addsd   $dst, $src" %}
17543   ins_cost(150);
17544   ins_encode %{
17545     __ addsd($dst$$XMMRegister, $src$$Address);
17546   %}
17547   ins_pipe(pipe_slow);
17548 %}
17549 
17550 instruct addD_imm(regD dst, immD con) %{
17551   predicate(UseAVX == 0);
17552   match(Set dst (AddD dst con));
17553   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17554   ins_cost(150);
17555   ins_encode %{
17556     __ addsd($dst$$XMMRegister, $constantaddress($con));
17557   %}
17558   ins_pipe(pipe_slow);
17559 %}
17560 
17561 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17562   predicate(UseAVX > 0);
17563   match(Set dst (AddD src1 src2));
17564 
17565   format %{ "vaddsd  $dst, $src1, $src2" %}
17566   ins_cost(150);
17567   ins_encode %{
17568     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17569   %}
17570   ins_pipe(pipe_slow);
17571 %}
17572 
17573 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17574   predicate(UseAVX > 0);
17575   match(Set dst (AddD src1 (LoadD src2)));
17576 
17577   format %{ "vaddsd  $dst, $src1, $src2" %}
17578   ins_cost(150);
17579   ins_encode %{
17580     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17581   %}
17582   ins_pipe(pipe_slow);
17583 %}
17584 
17585 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17586   predicate(UseAVX > 0);
17587   match(Set dst (AddD src con));
17588 
17589   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17590   ins_cost(150);
17591   ins_encode %{
17592     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17593   %}
17594   ins_pipe(pipe_slow);
17595 %}
17596 
17597 instruct subF_reg(regF dst, regF src) %{
17598   predicate(UseAVX == 0);
17599   match(Set dst (SubF dst src));
17600 
17601   format %{ "subss   $dst, $src" %}
17602   ins_cost(150);
17603   ins_encode %{
17604     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17605   %}
17606   ins_pipe(pipe_slow);
17607 %}
17608 
17609 instruct subF_mem(regF dst, memory src) %{
17610   predicate(UseAVX == 0);
17611   match(Set dst (SubF dst (LoadF src)));
17612 
17613   format %{ "subss   $dst, $src" %}
17614   ins_cost(150);
17615   ins_encode %{
17616     __ subss($dst$$XMMRegister, $src$$Address);
17617   %}
17618   ins_pipe(pipe_slow);
17619 %}
17620 
17621 instruct subF_imm(regF dst, immF con) %{
17622   predicate(UseAVX == 0);
17623   match(Set dst (SubF dst con));
17624   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17625   ins_cost(150);
17626   ins_encode %{
17627     __ subss($dst$$XMMRegister, $constantaddress($con));
17628   %}
17629   ins_pipe(pipe_slow);
17630 %}
17631 
17632 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17633   predicate(UseAVX > 0);
17634   match(Set dst (SubF src1 src2));
17635 
17636   format %{ "vsubss  $dst, $src1, $src2" %}
17637   ins_cost(150);
17638   ins_encode %{
17639     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17640   %}
17641   ins_pipe(pipe_slow);
17642 %}
17643 
17644 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17645   predicate(UseAVX > 0);
17646   match(Set dst (SubF src1 (LoadF src2)));
17647 
17648   format %{ "vsubss  $dst, $src1, $src2" %}
17649   ins_cost(150);
17650   ins_encode %{
17651     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17652   %}
17653   ins_pipe(pipe_slow);
17654 %}
17655 
17656 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17657   predicate(UseAVX > 0);
17658   match(Set dst (SubF src con));
17659 
17660   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17661   ins_cost(150);
17662   ins_encode %{
17663     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17664   %}
17665   ins_pipe(pipe_slow);
17666 %}
17667 
17668 instruct subD_reg(regD dst, regD src) %{
17669   predicate(UseAVX == 0);
17670   match(Set dst (SubD dst src));
17671 
17672   format %{ "subsd   $dst, $src" %}
17673   ins_cost(150);
17674   ins_encode %{
17675     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17676   %}
17677   ins_pipe(pipe_slow);
17678 %}
17679 
17680 instruct subD_mem(regD dst, memory src) %{
17681   predicate(UseAVX == 0);
17682   match(Set dst (SubD dst (LoadD src)));
17683 
17684   format %{ "subsd   $dst, $src" %}
17685   ins_cost(150);
17686   ins_encode %{
17687     __ subsd($dst$$XMMRegister, $src$$Address);
17688   %}
17689   ins_pipe(pipe_slow);
17690 %}
17691 
17692 instruct subD_imm(regD dst, immD con) %{
17693   predicate(UseAVX == 0);
17694   match(Set dst (SubD dst con));
17695   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17696   ins_cost(150);
17697   ins_encode %{
17698     __ subsd($dst$$XMMRegister, $constantaddress($con));
17699   %}
17700   ins_pipe(pipe_slow);
17701 %}
17702 
17703 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17704   predicate(UseAVX > 0);
17705   match(Set dst (SubD src1 src2));
17706 
17707   format %{ "vsubsd  $dst, $src1, $src2" %}
17708   ins_cost(150);
17709   ins_encode %{
17710     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17711   %}
17712   ins_pipe(pipe_slow);
17713 %}
17714 
17715 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17716   predicate(UseAVX > 0);
17717   match(Set dst (SubD src1 (LoadD src2)));
17718 
17719   format %{ "vsubsd  $dst, $src1, $src2" %}
17720   ins_cost(150);
17721   ins_encode %{
17722     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17723   %}
17724   ins_pipe(pipe_slow);
17725 %}
17726 
17727 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17728   predicate(UseAVX > 0);
17729   match(Set dst (SubD src con));
17730 
17731   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17732   ins_cost(150);
17733   ins_encode %{
17734     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17735   %}
17736   ins_pipe(pipe_slow);
17737 %}
17738 
17739 instruct mulF_reg(regF dst, regF src) %{
17740   predicate(UseAVX == 0);
17741   match(Set dst (MulF dst src));
17742 
17743   format %{ "mulss   $dst, $src" %}
17744   ins_cost(150);
17745   ins_encode %{
17746     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17747   %}
17748   ins_pipe(pipe_slow);
17749 %}
17750 
17751 instruct mulF_mem(regF dst, memory src) %{
17752   predicate(UseAVX == 0);
17753   match(Set dst (MulF dst (LoadF src)));
17754 
17755   format %{ "mulss   $dst, $src" %}
17756   ins_cost(150);
17757   ins_encode %{
17758     __ mulss($dst$$XMMRegister, $src$$Address);
17759   %}
17760   ins_pipe(pipe_slow);
17761 %}
17762 
17763 instruct mulF_imm(regF dst, immF con) %{
17764   predicate(UseAVX == 0);
17765   match(Set dst (MulF dst con));
17766   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17767   ins_cost(150);
17768   ins_encode %{
17769     __ mulss($dst$$XMMRegister, $constantaddress($con));
17770   %}
17771   ins_pipe(pipe_slow);
17772 %}
17773 
17774 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17775   predicate(UseAVX > 0);
17776   match(Set dst (MulF src1 src2));
17777 
17778   format %{ "vmulss  $dst, $src1, $src2" %}
17779   ins_cost(150);
17780   ins_encode %{
17781     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17782   %}
17783   ins_pipe(pipe_slow);
17784 %}
17785 
17786 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17787   predicate(UseAVX > 0);
17788   match(Set dst (MulF src1 (LoadF src2)));
17789 
17790   format %{ "vmulss  $dst, $src1, $src2" %}
17791   ins_cost(150);
17792   ins_encode %{
17793     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17794   %}
17795   ins_pipe(pipe_slow);
17796 %}
17797 
17798 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17799   predicate(UseAVX > 0);
17800   match(Set dst (MulF src con));
17801 
17802   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17803   ins_cost(150);
17804   ins_encode %{
17805     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17806   %}
17807   ins_pipe(pipe_slow);
17808 %}
17809 
17810 instruct mulD_reg(regD dst, regD src) %{
17811   predicate(UseAVX == 0);
17812   match(Set dst (MulD dst src));
17813 
17814   format %{ "mulsd   $dst, $src" %}
17815   ins_cost(150);
17816   ins_encode %{
17817     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17818   %}
17819   ins_pipe(pipe_slow);
17820 %}
17821 
17822 instruct mulD_mem(regD dst, memory src) %{
17823   predicate(UseAVX == 0);
17824   match(Set dst (MulD dst (LoadD src)));
17825 
17826   format %{ "mulsd   $dst, $src" %}
17827   ins_cost(150);
17828   ins_encode %{
17829     __ mulsd($dst$$XMMRegister, $src$$Address);
17830   %}
17831   ins_pipe(pipe_slow);
17832 %}
17833 
17834 instruct mulD_imm(regD dst, immD con) %{
17835   predicate(UseAVX == 0);
17836   match(Set dst (MulD dst con));
17837   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17838   ins_cost(150);
17839   ins_encode %{
17840     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17841   %}
17842   ins_pipe(pipe_slow);
17843 %}
17844 
17845 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17846   predicate(UseAVX > 0);
17847   match(Set dst (MulD src1 src2));
17848 
17849   format %{ "vmulsd  $dst, $src1, $src2" %}
17850   ins_cost(150);
17851   ins_encode %{
17852     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17853   %}
17854   ins_pipe(pipe_slow);
17855 %}
17856 
17857 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17858   predicate(UseAVX > 0);
17859   match(Set dst (MulD src1 (LoadD src2)));
17860 
17861   format %{ "vmulsd  $dst, $src1, $src2" %}
17862   ins_cost(150);
17863   ins_encode %{
17864     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17865   %}
17866   ins_pipe(pipe_slow);
17867 %}
17868 
17869 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17870   predicate(UseAVX > 0);
17871   match(Set dst (MulD src con));
17872 
17873   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17874   ins_cost(150);
17875   ins_encode %{
17876     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17877   %}
17878   ins_pipe(pipe_slow);
17879 %}
17880 
17881 instruct divF_reg(regF dst, regF src) %{
17882   predicate(UseAVX == 0);
17883   match(Set dst (DivF dst src));
17884 
17885   format %{ "divss   $dst, $src" %}
17886   ins_cost(150);
17887   ins_encode %{
17888     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17889   %}
17890   ins_pipe(pipe_slow);
17891 %}
17892 
17893 instruct divF_mem(regF dst, memory src) %{
17894   predicate(UseAVX == 0);
17895   match(Set dst (DivF dst (LoadF src)));
17896 
17897   format %{ "divss   $dst, $src" %}
17898   ins_cost(150);
17899   ins_encode %{
17900     __ divss($dst$$XMMRegister, $src$$Address);
17901   %}
17902   ins_pipe(pipe_slow);
17903 %}
17904 
17905 instruct divF_imm(regF dst, immF con) %{
17906   predicate(UseAVX == 0);
17907   match(Set dst (DivF dst con));
17908   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17909   ins_cost(150);
17910   ins_encode %{
17911     __ divss($dst$$XMMRegister, $constantaddress($con));
17912   %}
17913   ins_pipe(pipe_slow);
17914 %}
17915 
17916 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17917   predicate(UseAVX > 0);
17918   match(Set dst (DivF src1 src2));
17919 
17920   format %{ "vdivss  $dst, $src1, $src2" %}
17921   ins_cost(150);
17922   ins_encode %{
17923     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17924   %}
17925   ins_pipe(pipe_slow);
17926 %}
17927 
17928 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17929   predicate(UseAVX > 0);
17930   match(Set dst (DivF src1 (LoadF src2)));
17931 
17932   format %{ "vdivss  $dst, $src1, $src2" %}
17933   ins_cost(150);
17934   ins_encode %{
17935     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17936   %}
17937   ins_pipe(pipe_slow);
17938 %}
17939 
17940 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17941   predicate(UseAVX > 0);
17942   match(Set dst (DivF src con));
17943 
17944   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17945   ins_cost(150);
17946   ins_encode %{
17947     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17948   %}
17949   ins_pipe(pipe_slow);
17950 %}
17951 
17952 instruct divD_reg(regD dst, regD src) %{
17953   predicate(UseAVX == 0);
17954   match(Set dst (DivD dst src));
17955 
17956   format %{ "divsd   $dst, $src" %}
17957   ins_cost(150);
17958   ins_encode %{
17959     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17960   %}
17961   ins_pipe(pipe_slow);
17962 %}
17963 
17964 instruct divD_mem(regD dst, memory src) %{
17965   predicate(UseAVX == 0);
17966   match(Set dst (DivD dst (LoadD src)));
17967 
17968   format %{ "divsd   $dst, $src" %}
17969   ins_cost(150);
17970   ins_encode %{
17971     __ divsd($dst$$XMMRegister, $src$$Address);
17972   %}
17973   ins_pipe(pipe_slow);
17974 %}
17975 
17976 instruct divD_imm(regD dst, immD con) %{
17977   predicate(UseAVX == 0);
17978   match(Set dst (DivD dst con));
17979   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17980   ins_cost(150);
17981   ins_encode %{
17982     __ divsd($dst$$XMMRegister, $constantaddress($con));
17983   %}
17984   ins_pipe(pipe_slow);
17985 %}
17986 
17987 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17988   predicate(UseAVX > 0);
17989   match(Set dst (DivD src1 src2));
17990 
17991   format %{ "vdivsd  $dst, $src1, $src2" %}
17992   ins_cost(150);
17993   ins_encode %{
17994     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17995   %}
17996   ins_pipe(pipe_slow);
17997 %}
17998 
17999 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18000   predicate(UseAVX > 0);
18001   match(Set dst (DivD src1 (LoadD src2)));
18002 
18003   format %{ "vdivsd  $dst, $src1, $src2" %}
18004   ins_cost(150);
18005   ins_encode %{
18006     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18007   %}
18008   ins_pipe(pipe_slow);
18009 %}
18010 
18011 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18012   predicate(UseAVX > 0);
18013   match(Set dst (DivD src con));
18014 
18015   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18016   ins_cost(150);
18017   ins_encode %{
18018     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18019   %}
18020   ins_pipe(pipe_slow);
18021 %}
18022 
18023 instruct absF_reg(regF dst) %{
18024   predicate(UseAVX == 0);
18025   match(Set dst (AbsF dst));
18026   ins_cost(150);
18027   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18028   ins_encode %{
18029     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18030   %}
18031   ins_pipe(pipe_slow);
18032 %}
18033 
18034 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18035   predicate(UseAVX > 0);
18036   match(Set dst (AbsF src));
18037   ins_cost(150);
18038   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18039   ins_encode %{
18040     int vlen_enc = Assembler::AVX_128bit;
18041     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18042               ExternalAddress(float_signmask()), vlen_enc);
18043   %}
18044   ins_pipe(pipe_slow);
18045 %}
18046 
18047 instruct absD_reg(regD dst) %{
18048   predicate(UseAVX == 0);
18049   match(Set dst (AbsD dst));
18050   ins_cost(150);
18051   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18052             "# abs double by sign masking" %}
18053   ins_encode %{
18054     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18055   %}
18056   ins_pipe(pipe_slow);
18057 %}
18058 
18059 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18060   predicate(UseAVX > 0);
18061   match(Set dst (AbsD src));
18062   ins_cost(150);
18063   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18064             "# abs double by sign masking" %}
18065   ins_encode %{
18066     int vlen_enc = Assembler::AVX_128bit;
18067     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18068               ExternalAddress(double_signmask()), vlen_enc);
18069   %}
18070   ins_pipe(pipe_slow);
18071 %}
18072 
18073 instruct negF_reg(regF dst) %{
18074   predicate(UseAVX == 0);
18075   match(Set dst (NegF dst));
18076   ins_cost(150);
18077   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18078   ins_encode %{
18079     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18080   %}
18081   ins_pipe(pipe_slow);
18082 %}
18083 
18084 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18085   predicate(UseAVX > 0);
18086   match(Set dst (NegF src));
18087   ins_cost(150);
18088   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18089   ins_encode %{
18090     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18091                  ExternalAddress(float_signflip()));
18092   %}
18093   ins_pipe(pipe_slow);
18094 %}
18095 
18096 instruct negD_reg(regD dst) %{
18097   predicate(UseAVX == 0);
18098   match(Set dst (NegD dst));
18099   ins_cost(150);
18100   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18101             "# neg double by sign flipping" %}
18102   ins_encode %{
18103     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18104   %}
18105   ins_pipe(pipe_slow);
18106 %}
18107 
18108 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18109   predicate(UseAVX > 0);
18110   match(Set dst (NegD src));
18111   ins_cost(150);
18112   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18113             "# neg double by sign flipping" %}
18114   ins_encode %{
18115     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18116                  ExternalAddress(double_signflip()));
18117   %}
18118   ins_pipe(pipe_slow);
18119 %}
18120 
18121 // sqrtss instruction needs destination register to be pre initialized for best performance
18122 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18123 instruct sqrtF_reg(regF dst) %{
18124   match(Set dst (SqrtF dst));
18125   format %{ "sqrtss  $dst, $dst" %}
18126   ins_encode %{
18127     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18128   %}
18129   ins_pipe(pipe_slow);
18130 %}
18131 
18132 // sqrtsd instruction needs destination register to be pre initialized for best performance
18133 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18134 instruct sqrtD_reg(regD dst) %{
18135   match(Set dst (SqrtD dst));
18136   format %{ "sqrtsd  $dst, $dst" %}
18137   ins_encode %{
18138     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18139   %}
18140   ins_pipe(pipe_slow);
18141 %}
18142 
18143 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18144   effect(TEMP tmp);
18145   match(Set dst (ConvF2HF src));
18146   ins_cost(125);
18147   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18148   ins_encode %{
18149     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18150   %}
18151   ins_pipe( pipe_slow );
18152 %}
18153 
18154 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18155   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18156   effect(TEMP ktmp, TEMP rtmp);
18157   match(Set mem (StoreC mem (ConvF2HF src)));
18158   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18159   ins_encode %{
18160     __ movl($rtmp$$Register, 0x1);
18161     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18162     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18163   %}
18164   ins_pipe( pipe_slow );
18165 %}
18166 
18167 instruct vconvF2HF(vec dst, vec src) %{
18168   match(Set dst (VectorCastF2HF src));
18169   format %{ "vector_conv_F2HF $dst $src" %}
18170   ins_encode %{
18171     int vlen_enc = vector_length_encoding(this, $src);
18172     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18173   %}
18174   ins_pipe( pipe_slow );
18175 %}
18176 
18177 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18178   predicate(n->as_StoreVector()->memory_size() >= 16);
18179   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18180   format %{ "vcvtps2ph $mem,$src" %}
18181   ins_encode %{
18182     int vlen_enc = vector_length_encoding(this, $src);
18183     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18184   %}
18185   ins_pipe( pipe_slow );
18186 %}
18187 
18188 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18189   match(Set dst (ConvHF2F src));
18190   format %{ "vcvtph2ps $dst,$src" %}
18191   ins_encode %{
18192     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18193   %}
18194   ins_pipe( pipe_slow );
18195 %}
18196 
18197 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18198   match(Set dst (VectorCastHF2F (LoadVector mem)));
18199   format %{ "vcvtph2ps $dst,$mem" %}
18200   ins_encode %{
18201     int vlen_enc = vector_length_encoding(this);
18202     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18203   %}
18204   ins_pipe( pipe_slow );
18205 %}
18206 
18207 instruct vconvHF2F(vec dst, vec src) %{
18208   match(Set dst (VectorCastHF2F src));
18209   ins_cost(125);
18210   format %{ "vector_conv_HF2F $dst,$src" %}
18211   ins_encode %{
18212     int vlen_enc = vector_length_encoding(this);
18213     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18214   %}
18215   ins_pipe( pipe_slow );
18216 %}
18217 
18218 // ---------------------------------------- VectorReinterpret ------------------------------------
18219 instruct reinterpret_mask(kReg dst) %{
18220   predicate(n->bottom_type()->isa_vectmask() &&
18221             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18222   match(Set dst (VectorReinterpret dst));
18223   ins_cost(125);
18224   format %{ "vector_reinterpret $dst\t!" %}
18225   ins_encode %{
18226     // empty
18227   %}
18228   ins_pipe( pipe_slow );
18229 %}
18230 
18231 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18232   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18233             n->bottom_type()->isa_vectmask() &&
18234             n->in(1)->bottom_type()->isa_vectmask() &&
18235             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18236             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18237   match(Set dst (VectorReinterpret src));
18238   effect(TEMP xtmp);
18239   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18240   ins_encode %{
18241      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18242      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18243      assert(src_sz == dst_sz , "src and dst size mismatch");
18244      int vlen_enc = vector_length_encoding(src_sz);
18245      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18246      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18247   %}
18248   ins_pipe( pipe_slow );
18249 %}
18250 
18251 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18252   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18253             n->bottom_type()->isa_vectmask() &&
18254             n->in(1)->bottom_type()->isa_vectmask() &&
18255             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18256              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18257             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18258   match(Set dst (VectorReinterpret src));
18259   effect(TEMP xtmp);
18260   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18261   ins_encode %{
18262      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18263      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18264      assert(src_sz == dst_sz , "src and dst size mismatch");
18265      int vlen_enc = vector_length_encoding(src_sz);
18266      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18267      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18268   %}
18269   ins_pipe( pipe_slow );
18270 %}
18271 
18272 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18273   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18274             n->bottom_type()->isa_vectmask() &&
18275             n->in(1)->bottom_type()->isa_vectmask() &&
18276             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18277              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18278             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18279   match(Set dst (VectorReinterpret src));
18280   effect(TEMP xtmp);
18281   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18282   ins_encode %{
18283      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18284      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18285      assert(src_sz == dst_sz , "src and dst size mismatch");
18286      int vlen_enc = vector_length_encoding(src_sz);
18287      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18288      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18289   %}
18290   ins_pipe( pipe_slow );
18291 %}
18292 
18293 instruct reinterpret(vec dst) %{
18294   predicate(!n->bottom_type()->isa_vectmask() &&
18295             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18296   match(Set dst (VectorReinterpret dst));
18297   ins_cost(125);
18298   format %{ "vector_reinterpret $dst\t!" %}
18299   ins_encode %{
18300     // empty
18301   %}
18302   ins_pipe( pipe_slow );
18303 %}
18304 
18305 instruct reinterpret_expand(vec dst, vec src) %{
18306   predicate(UseAVX == 0 &&
18307             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18308   match(Set dst (VectorReinterpret src));
18309   ins_cost(125);
18310   effect(TEMP dst);
18311   format %{ "vector_reinterpret_expand $dst,$src" %}
18312   ins_encode %{
18313     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18314     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18315 
18316     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18317     if (src_vlen_in_bytes == 4) {
18318       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18319     } else {
18320       assert(src_vlen_in_bytes == 8, "");
18321       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18322     }
18323     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18324   %}
18325   ins_pipe( pipe_slow );
18326 %}
18327 
18328 instruct vreinterpret_expand4(legVec dst, vec src) %{
18329   predicate(UseAVX > 0 &&
18330             !n->bottom_type()->isa_vectmask() &&
18331             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18332             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18333   match(Set dst (VectorReinterpret src));
18334   ins_cost(125);
18335   format %{ "vector_reinterpret_expand $dst,$src" %}
18336   ins_encode %{
18337     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18338   %}
18339   ins_pipe( pipe_slow );
18340 %}
18341 
18342 
18343 instruct vreinterpret_expand(legVec dst, vec src) %{
18344   predicate(UseAVX > 0 &&
18345             !n->bottom_type()->isa_vectmask() &&
18346             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18347             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18348   match(Set dst (VectorReinterpret src));
18349   ins_cost(125);
18350   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18351   ins_encode %{
18352     switch (Matcher::vector_length_in_bytes(this, $src)) {
18353       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18354       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18355       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18356       default: ShouldNotReachHere();
18357     }
18358   %}
18359   ins_pipe( pipe_slow );
18360 %}
18361 
18362 instruct reinterpret_shrink(vec dst, legVec src) %{
18363   predicate(!n->bottom_type()->isa_vectmask() &&
18364             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18365   match(Set dst (VectorReinterpret src));
18366   ins_cost(125);
18367   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18368   ins_encode %{
18369     switch (Matcher::vector_length_in_bytes(this)) {
18370       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18371       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18372       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18373       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18374       default: ShouldNotReachHere();
18375     }
18376   %}
18377   ins_pipe( pipe_slow );
18378 %}
18379 
18380 // ----------------------------------------------------------------------------------------------------
18381 
18382 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18383   match(Set dst (RoundDoubleMode src rmode));
18384   format %{ "roundsd $dst,$src" %}
18385   ins_cost(150);
18386   ins_encode %{
18387     assert(UseSSE >= 4, "required");
18388     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18389       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18390     }
18391     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18392   %}
18393   ins_pipe(pipe_slow);
18394 %}
18395 
18396 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18397   match(Set dst (RoundDoubleMode con rmode));
18398   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18399   ins_cost(150);
18400   ins_encode %{
18401     assert(UseSSE >= 4, "required");
18402     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18403   %}
18404   ins_pipe(pipe_slow);
18405 %}
18406 
18407 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18408   predicate(Matcher::vector_length(n) < 8);
18409   match(Set dst (RoundDoubleModeV src rmode));
18410   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18411   ins_encode %{
18412     assert(UseAVX > 0, "required");
18413     int vlen_enc = vector_length_encoding(this);
18414     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18415   %}
18416   ins_pipe( pipe_slow );
18417 %}
18418 
18419 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18420   predicate(Matcher::vector_length(n) == 8);
18421   match(Set dst (RoundDoubleModeV src rmode));
18422   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18423   ins_encode %{
18424     assert(UseAVX > 2, "required");
18425     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18426   %}
18427   ins_pipe( pipe_slow );
18428 %}
18429 
18430 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18431   predicate(Matcher::vector_length(n) < 8);
18432   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18433   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18434   ins_encode %{
18435     assert(UseAVX > 0, "required");
18436     int vlen_enc = vector_length_encoding(this);
18437     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18438   %}
18439   ins_pipe( pipe_slow );
18440 %}
18441 
18442 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18443   predicate(Matcher::vector_length(n) == 8);
18444   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18445   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18446   ins_encode %{
18447     assert(UseAVX > 2, "required");
18448     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18449   %}
18450   ins_pipe( pipe_slow );
18451 %}
18452 
18453 instruct onspinwait() %{
18454   match(OnSpinWait);
18455   ins_cost(200);
18456 
18457   format %{
18458     $$template
18459     $$emit$$"pause\t! membar_onspinwait"
18460   %}
18461   ins_encode %{
18462     __ pause();
18463   %}
18464   ins_pipe(pipe_slow);
18465 %}
18466 
18467 // a * b + c
18468 instruct fmaD_reg(regD a, regD b, regD c) %{
18469   match(Set c (FmaD  c (Binary a b)));
18470   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18471   ins_cost(150);
18472   ins_encode %{
18473     assert(UseFMA, "Needs FMA instructions support.");
18474     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18475   %}
18476   ins_pipe( pipe_slow );
18477 %}
18478 
18479 // a * b + c
18480 instruct fmaF_reg(regF a, regF b, regF c) %{
18481   match(Set c (FmaF  c (Binary a b)));
18482   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18483   ins_cost(150);
18484   ins_encode %{
18485     assert(UseFMA, "Needs FMA instructions support.");
18486     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18487   %}
18488   ins_pipe( pipe_slow );
18489 %}
18490 
18491 // ====================VECTOR INSTRUCTIONS=====================================
18492 
18493 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18494 instruct MoveVec2Leg(legVec dst, vec src) %{
18495   match(Set dst src);
18496   format %{ "" %}
18497   ins_encode %{
18498     ShouldNotReachHere();
18499   %}
18500   ins_pipe( fpu_reg_reg );
18501 %}
18502 
18503 instruct MoveLeg2Vec(vec dst, legVec src) %{
18504   match(Set dst src);
18505   format %{ "" %}
18506   ins_encode %{
18507     ShouldNotReachHere();
18508   %}
18509   ins_pipe( fpu_reg_reg );
18510 %}
18511 
18512 // ============================================================================
18513 
18514 // Load vectors generic operand pattern
18515 instruct loadV(vec dst, memory mem) %{
18516   match(Set dst (LoadVector mem));
18517   ins_cost(125);
18518   format %{ "load_vector $dst,$mem" %}
18519   ins_encode %{
18520     BasicType bt = Matcher::vector_element_basic_type(this);
18521     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18522   %}
18523   ins_pipe( pipe_slow );
18524 %}
18525 
18526 // Store vectors generic operand pattern.
18527 instruct storeV(memory mem, vec src) %{
18528   match(Set mem (StoreVector mem src));
18529   ins_cost(145);
18530   format %{ "store_vector $mem,$src\n\t" %}
18531   ins_encode %{
18532     switch (Matcher::vector_length_in_bytes(this, $src)) {
18533       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18534       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18535       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18536       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18537       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18538       default: ShouldNotReachHere();
18539     }
18540   %}
18541   ins_pipe( pipe_slow );
18542 %}
18543 
18544 // ---------------------------------------- Gather ------------------------------------
18545 
18546 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18547 
18548 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18549   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18550             Matcher::vector_length_in_bytes(n) <= 32);
18551   match(Set dst (LoadVectorGather mem idx));
18552   effect(TEMP dst, TEMP tmp, TEMP mask);
18553   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18554   ins_encode %{
18555     int vlen_enc = vector_length_encoding(this);
18556     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18557     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18558     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18559     __ lea($tmp$$Register, $mem$$Address);
18560     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18561   %}
18562   ins_pipe( pipe_slow );
18563 %}
18564 
18565 
18566 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18567   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18568             !is_subword_type(Matcher::vector_element_basic_type(n)));
18569   match(Set dst (LoadVectorGather mem idx));
18570   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18571   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18572   ins_encode %{
18573     int vlen_enc = vector_length_encoding(this);
18574     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18575     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18576     __ lea($tmp$$Register, $mem$$Address);
18577     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18578   %}
18579   ins_pipe( pipe_slow );
18580 %}
18581 
18582 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18583   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18584             !is_subword_type(Matcher::vector_element_basic_type(n)));
18585   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18586   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18587   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18588   ins_encode %{
18589     assert(UseAVX > 2, "sanity");
18590     int vlen_enc = vector_length_encoding(this);
18591     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18592     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18593     // Note: Since gather instruction partially updates the opmask register used
18594     // for predication hense moving mask operand to a temporary.
18595     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18596     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18597     __ lea($tmp$$Register, $mem$$Address);
18598     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18599   %}
18600   ins_pipe( pipe_slow );
18601 %}
18602 
18603 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18604   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18605   match(Set dst (LoadVectorGather mem idx_base));
18606   effect(TEMP tmp, TEMP rtmp);
18607   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18608   ins_encode %{
18609     int vlen_enc = vector_length_encoding(this);
18610     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18611     __ lea($tmp$$Register, $mem$$Address);
18612     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18613   %}
18614   ins_pipe( pipe_slow );
18615 %}
18616 
18617 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18618                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18619   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18620   match(Set dst (LoadVectorGather mem idx_base));
18621   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18622   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18623   ins_encode %{
18624     int vlen_enc = vector_length_encoding(this);
18625     int vector_len = Matcher::vector_length(this);
18626     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18627     __ lea($tmp$$Register, $mem$$Address);
18628     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18629     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18630                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18631   %}
18632   ins_pipe( pipe_slow );
18633 %}
18634 
18635 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18636   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18637   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18638   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18639   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18640   ins_encode %{
18641     int vlen_enc = vector_length_encoding(this);
18642     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18643     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18644     __ lea($tmp$$Register, $mem$$Address);
18645     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18646     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18647   %}
18648   ins_pipe( pipe_slow );
18649 %}
18650 
18651 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18652                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18653   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18654   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18655   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18656   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18657   ins_encode %{
18658     int vlen_enc = vector_length_encoding(this);
18659     int vector_len = Matcher::vector_length(this);
18660     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18661     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18662     __ lea($tmp$$Register, $mem$$Address);
18663     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18664     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18665     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18666                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18667   %}
18668   ins_pipe( pipe_slow );
18669 %}
18670 
18671 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18672   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18673   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18674   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18675   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18676   ins_encode %{
18677     int vlen_enc = vector_length_encoding(this);
18678     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18679     __ lea($tmp$$Register, $mem$$Address);
18680     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18681     if (elem_bt == T_SHORT) {
18682       __ movl($mask_idx$$Register, 0x55555555);
18683       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18684     }
18685     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18686     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18687   %}
18688   ins_pipe( pipe_slow );
18689 %}
18690 
18691 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18692                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18693   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18694   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18695   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18696   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18697   ins_encode %{
18698     int vlen_enc = vector_length_encoding(this);
18699     int vector_len = Matcher::vector_length(this);
18700     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18701     __ lea($tmp$$Register, $mem$$Address);
18702     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18703     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18704     if (elem_bt == T_SHORT) {
18705       __ movl($mask_idx$$Register, 0x55555555);
18706       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18707     }
18708     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18709     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18710                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18711   %}
18712   ins_pipe( pipe_slow );
18713 %}
18714 
18715 // ====================Scatter=======================================
18716 
18717 // Scatter INT, LONG, FLOAT, DOUBLE
18718 
18719 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18720   predicate(UseAVX > 2);
18721   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18722   effect(TEMP tmp, TEMP ktmp);
18723   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18724   ins_encode %{
18725     int vlen_enc = vector_length_encoding(this, $src);
18726     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18727 
18728     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18729     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18730 
18731     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18732     __ lea($tmp$$Register, $mem$$Address);
18733     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18734   %}
18735   ins_pipe( pipe_slow );
18736 %}
18737 
18738 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18739   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18740   effect(TEMP tmp, TEMP ktmp);
18741   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18742   ins_encode %{
18743     int vlen_enc = vector_length_encoding(this, $src);
18744     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18745     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18746     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18747     // Note: Since scatter instruction partially updates the opmask register used
18748     // for predication hense moving mask operand to a temporary.
18749     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18750     __ lea($tmp$$Register, $mem$$Address);
18751     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18752   %}
18753   ins_pipe( pipe_slow );
18754 %}
18755 
18756 // ====================REPLICATE=======================================
18757 
18758 // Replicate byte scalar to be vector
18759 instruct vReplB_reg(vec dst, rRegI src) %{
18760   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18761   match(Set dst (Replicate src));
18762   format %{ "replicateB $dst,$src" %}
18763   ins_encode %{
18764     uint vlen = Matcher::vector_length(this);
18765     if (UseAVX >= 2) {
18766       int vlen_enc = vector_length_encoding(this);
18767       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18768         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18769         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18770       } else {
18771         __ movdl($dst$$XMMRegister, $src$$Register);
18772         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18773       }
18774     } else {
18775        assert(UseAVX < 2, "");
18776       __ movdl($dst$$XMMRegister, $src$$Register);
18777       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18778       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18779       if (vlen >= 16) {
18780         assert(vlen == 16, "");
18781         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18782       }
18783     }
18784   %}
18785   ins_pipe( pipe_slow );
18786 %}
18787 
18788 instruct ReplB_mem(vec dst, memory mem) %{
18789   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18790   match(Set dst (Replicate (LoadB mem)));
18791   format %{ "replicateB $dst,$mem" %}
18792   ins_encode %{
18793     int vlen_enc = vector_length_encoding(this);
18794     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18795   %}
18796   ins_pipe( pipe_slow );
18797 %}
18798 
18799 // ====================ReplicateS=======================================
18800 
18801 instruct vReplS_reg(vec dst, rRegI src) %{
18802   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18803   match(Set dst (Replicate src));
18804   format %{ "replicateS $dst,$src" %}
18805   ins_encode %{
18806     uint vlen = Matcher::vector_length(this);
18807     int vlen_enc = vector_length_encoding(this);
18808     if (UseAVX >= 2) {
18809       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18810         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18811         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18812       } else {
18813         __ movdl($dst$$XMMRegister, $src$$Register);
18814         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18815       }
18816     } else {
18817       assert(UseAVX < 2, "");
18818       __ movdl($dst$$XMMRegister, $src$$Register);
18819       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18820       if (vlen >= 8) {
18821         assert(vlen == 8, "");
18822         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18823       }
18824     }
18825   %}
18826   ins_pipe( pipe_slow );
18827 %}
18828 
18829 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18830   match(Set dst (Replicate con));
18831   effect(TEMP rtmp);
18832   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18833   ins_encode %{
18834     int vlen_enc = vector_length_encoding(this);
18835     BasicType bt = Matcher::vector_element_basic_type(this);
18836     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18837     __ movl($rtmp$$Register, $con$$constant);
18838     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18839   %}
18840   ins_pipe( pipe_slow );
18841 %}
18842 
18843 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18844   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18845   match(Set dst (Replicate src));
18846   effect(TEMP rtmp);
18847   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18848   ins_encode %{
18849     int vlen_enc = vector_length_encoding(this);
18850     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18851     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18852   %}
18853   ins_pipe( pipe_slow );
18854 %}
18855 
18856 instruct ReplS_mem(vec dst, memory mem) %{
18857   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18858   match(Set dst (Replicate (LoadS mem)));
18859   format %{ "replicateS $dst,$mem" %}
18860   ins_encode %{
18861     int vlen_enc = vector_length_encoding(this);
18862     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18863   %}
18864   ins_pipe( pipe_slow );
18865 %}
18866 
18867 // ====================ReplicateI=======================================
18868 
18869 instruct ReplI_reg(vec dst, rRegI src) %{
18870   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18871   match(Set dst (Replicate src));
18872   format %{ "replicateI $dst,$src" %}
18873   ins_encode %{
18874     uint vlen = Matcher::vector_length(this);
18875     int vlen_enc = vector_length_encoding(this);
18876     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18877       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18878     } else if (VM_Version::supports_avx2()) {
18879       __ movdl($dst$$XMMRegister, $src$$Register);
18880       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18881     } else {
18882       __ movdl($dst$$XMMRegister, $src$$Register);
18883       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18884     }
18885   %}
18886   ins_pipe( pipe_slow );
18887 %}
18888 
18889 instruct ReplI_mem(vec dst, memory mem) %{
18890   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18891   match(Set dst (Replicate (LoadI mem)));
18892   format %{ "replicateI $dst,$mem" %}
18893   ins_encode %{
18894     int vlen_enc = vector_length_encoding(this);
18895     if (VM_Version::supports_avx2()) {
18896       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18897     } else if (VM_Version::supports_avx()) {
18898       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18899     } else {
18900       __ movdl($dst$$XMMRegister, $mem$$Address);
18901       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18902     }
18903   %}
18904   ins_pipe( pipe_slow );
18905 %}
18906 
18907 instruct ReplI_imm(vec dst, immI con) %{
18908   predicate(Matcher::is_non_long_integral_vector(n));
18909   match(Set dst (Replicate con));
18910   format %{ "replicateI $dst,$con" %}
18911   ins_encode %{
18912     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18913                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18914                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18915     BasicType bt = Matcher::vector_element_basic_type(this);
18916     int vlen = Matcher::vector_length_in_bytes(this);
18917     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18918   %}
18919   ins_pipe( pipe_slow );
18920 %}
18921 
18922 // Replicate scalar zero to be vector
18923 instruct ReplI_zero(vec dst, immI_0 zero) %{
18924   predicate(Matcher::is_non_long_integral_vector(n));
18925   match(Set dst (Replicate zero));
18926   format %{ "replicateI $dst,$zero" %}
18927   ins_encode %{
18928     int vlen_enc = vector_length_encoding(this);
18929     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18930       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18931     } else {
18932       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18933     }
18934   %}
18935   ins_pipe( fpu_reg_reg );
18936 %}
18937 
18938 instruct ReplI_M1(vec dst, immI_M1 con) %{
18939   predicate(Matcher::is_non_long_integral_vector(n));
18940   match(Set dst (Replicate con));
18941   format %{ "vallones $dst" %}
18942   ins_encode %{
18943     int vector_len = vector_length_encoding(this);
18944     __ vallones($dst$$XMMRegister, vector_len);
18945   %}
18946   ins_pipe( pipe_slow );
18947 %}
18948 
18949 // ====================ReplicateL=======================================
18950 
18951 // Replicate long (8 byte) scalar to be vector
18952 instruct ReplL_reg(vec dst, rRegL src) %{
18953   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18954   match(Set dst (Replicate src));
18955   format %{ "replicateL $dst,$src" %}
18956   ins_encode %{
18957     int vlen = Matcher::vector_length(this);
18958     int vlen_enc = vector_length_encoding(this);
18959     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18960       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18961     } else if (VM_Version::supports_avx2()) {
18962       __ movdq($dst$$XMMRegister, $src$$Register);
18963       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18964     } else {
18965       __ movdq($dst$$XMMRegister, $src$$Register);
18966       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18967     }
18968   %}
18969   ins_pipe( pipe_slow );
18970 %}
18971 
18972 instruct ReplL_mem(vec dst, memory mem) %{
18973   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18974   match(Set dst (Replicate (LoadL mem)));
18975   format %{ "replicateL $dst,$mem" %}
18976   ins_encode %{
18977     int vlen_enc = vector_length_encoding(this);
18978     if (VM_Version::supports_avx2()) {
18979       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18980     } else if (VM_Version::supports_sse3()) {
18981       __ movddup($dst$$XMMRegister, $mem$$Address);
18982     } else {
18983       __ movq($dst$$XMMRegister, $mem$$Address);
18984       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18985     }
18986   %}
18987   ins_pipe( pipe_slow );
18988 %}
18989 
18990 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18991 instruct ReplL_imm(vec dst, immL con) %{
18992   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18993   match(Set dst (Replicate con));
18994   format %{ "replicateL $dst,$con" %}
18995   ins_encode %{
18996     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18997     int vlen = Matcher::vector_length_in_bytes(this);
18998     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18999   %}
19000   ins_pipe( pipe_slow );
19001 %}
19002 
19003 instruct ReplL_zero(vec dst, immL0 zero) %{
19004   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19005   match(Set dst (Replicate zero));
19006   format %{ "replicateL $dst,$zero" %}
19007   ins_encode %{
19008     int vlen_enc = vector_length_encoding(this);
19009     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19010       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19011     } else {
19012       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19013     }
19014   %}
19015   ins_pipe( fpu_reg_reg );
19016 %}
19017 
19018 instruct ReplL_M1(vec dst, immL_M1 con) %{
19019   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19020   match(Set dst (Replicate con));
19021   format %{ "vallones $dst" %}
19022   ins_encode %{
19023     int vector_len = vector_length_encoding(this);
19024     __ vallones($dst$$XMMRegister, vector_len);
19025   %}
19026   ins_pipe( pipe_slow );
19027 %}
19028 
19029 // ====================ReplicateF=======================================
19030 
19031 instruct vReplF_reg(vec dst, vlRegF src) %{
19032   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19033   match(Set dst (Replicate src));
19034   format %{ "replicateF $dst,$src" %}
19035   ins_encode %{
19036     uint vlen = Matcher::vector_length(this);
19037     int vlen_enc = vector_length_encoding(this);
19038     if (vlen <= 4) {
19039       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19040     } else if (VM_Version::supports_avx2()) {
19041       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19042     } else {
19043       assert(vlen == 8, "sanity");
19044       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19045       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19046     }
19047   %}
19048   ins_pipe( pipe_slow );
19049 %}
19050 
19051 instruct ReplF_reg(vec dst, vlRegF src) %{
19052   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19053   match(Set dst (Replicate src));
19054   format %{ "replicateF $dst,$src" %}
19055   ins_encode %{
19056     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19057   %}
19058   ins_pipe( pipe_slow );
19059 %}
19060 
19061 instruct ReplF_mem(vec dst, memory mem) %{
19062   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19063   match(Set dst (Replicate (LoadF mem)));
19064   format %{ "replicateF $dst,$mem" %}
19065   ins_encode %{
19066     int vlen_enc = vector_length_encoding(this);
19067     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19068   %}
19069   ins_pipe( pipe_slow );
19070 %}
19071 
19072 // Replicate float scalar immediate to be vector by loading from const table.
19073 instruct ReplF_imm(vec dst, immF con) %{
19074   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19075   match(Set dst (Replicate con));
19076   format %{ "replicateF $dst,$con" %}
19077   ins_encode %{
19078     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19079                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19080     int vlen = Matcher::vector_length_in_bytes(this);
19081     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19082   %}
19083   ins_pipe( pipe_slow );
19084 %}
19085 
19086 instruct ReplF_zero(vec dst, immF0 zero) %{
19087   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19088   match(Set dst (Replicate zero));
19089   format %{ "replicateF $dst,$zero" %}
19090   ins_encode %{
19091     int vlen_enc = vector_length_encoding(this);
19092     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19093       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19094     } else {
19095       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19096     }
19097   %}
19098   ins_pipe( fpu_reg_reg );
19099 %}
19100 
19101 // ====================ReplicateD=======================================
19102 
19103 // Replicate double (8 bytes) scalar to be vector
19104 instruct vReplD_reg(vec dst, vlRegD src) %{
19105   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19106   match(Set dst (Replicate src));
19107   format %{ "replicateD $dst,$src" %}
19108   ins_encode %{
19109     uint vlen = Matcher::vector_length(this);
19110     int vlen_enc = vector_length_encoding(this);
19111     if (vlen <= 2) {
19112       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19113     } else if (VM_Version::supports_avx2()) {
19114       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19115     } else {
19116       assert(vlen == 4, "sanity");
19117       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19118       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19119     }
19120   %}
19121   ins_pipe( pipe_slow );
19122 %}
19123 
19124 instruct ReplD_reg(vec dst, vlRegD src) %{
19125   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19126   match(Set dst (Replicate src));
19127   format %{ "replicateD $dst,$src" %}
19128   ins_encode %{
19129     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19130   %}
19131   ins_pipe( pipe_slow );
19132 %}
19133 
19134 instruct ReplD_mem(vec dst, memory mem) %{
19135   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19136   match(Set dst (Replicate (LoadD mem)));
19137   format %{ "replicateD $dst,$mem" %}
19138   ins_encode %{
19139     if (Matcher::vector_length(this) >= 4) {
19140       int vlen_enc = vector_length_encoding(this);
19141       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19142     } else {
19143       __ movddup($dst$$XMMRegister, $mem$$Address);
19144     }
19145   %}
19146   ins_pipe( pipe_slow );
19147 %}
19148 
19149 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19150 instruct ReplD_imm(vec dst, immD con) %{
19151   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19152   match(Set dst (Replicate con));
19153   format %{ "replicateD $dst,$con" %}
19154   ins_encode %{
19155     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19156     int vlen = Matcher::vector_length_in_bytes(this);
19157     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19158   %}
19159   ins_pipe( pipe_slow );
19160 %}
19161 
19162 instruct ReplD_zero(vec dst, immD0 zero) %{
19163   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19164   match(Set dst (Replicate zero));
19165   format %{ "replicateD $dst,$zero" %}
19166   ins_encode %{
19167     int vlen_enc = vector_length_encoding(this);
19168     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19169       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19170     } else {
19171       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19172     }
19173   %}
19174   ins_pipe( fpu_reg_reg );
19175 %}
19176 
19177 // ====================VECTOR INSERT=======================================
19178 
19179 instruct insert(vec dst, rRegI val, immU8 idx) %{
19180   predicate(Matcher::vector_length_in_bytes(n) < 32);
19181   match(Set dst (VectorInsert (Binary dst val) idx));
19182   format %{ "vector_insert $dst,$val,$idx" %}
19183   ins_encode %{
19184     assert(UseSSE >= 4, "required");
19185     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19186 
19187     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19188 
19189     assert(is_integral_type(elem_bt), "");
19190     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19191 
19192     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19193   %}
19194   ins_pipe( pipe_slow );
19195 %}
19196 
19197 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19198   predicate(Matcher::vector_length_in_bytes(n) == 32);
19199   match(Set dst (VectorInsert (Binary src val) idx));
19200   effect(TEMP vtmp);
19201   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19202   ins_encode %{
19203     int vlen_enc = Assembler::AVX_256bit;
19204     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19205     int elem_per_lane = 16/type2aelembytes(elem_bt);
19206     int log2epr = log2(elem_per_lane);
19207 
19208     assert(is_integral_type(elem_bt), "sanity");
19209     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19210 
19211     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19212     uint y_idx = ($idx$$constant >> log2epr) & 1;
19213     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19214     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19215     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19216   %}
19217   ins_pipe( pipe_slow );
19218 %}
19219 
19220 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19221   predicate(Matcher::vector_length_in_bytes(n) == 64);
19222   match(Set dst (VectorInsert (Binary src val) idx));
19223   effect(TEMP vtmp);
19224   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19225   ins_encode %{
19226     assert(UseAVX > 2, "sanity");
19227 
19228     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19229     int elem_per_lane = 16/type2aelembytes(elem_bt);
19230     int log2epr = log2(elem_per_lane);
19231 
19232     assert(is_integral_type(elem_bt), "");
19233     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19234 
19235     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19236     uint y_idx = ($idx$$constant >> log2epr) & 3;
19237     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19238     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19239     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19240   %}
19241   ins_pipe( pipe_slow );
19242 %}
19243 
19244 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19245   predicate(Matcher::vector_length(n) == 2);
19246   match(Set dst (VectorInsert (Binary dst val) idx));
19247   format %{ "vector_insert $dst,$val,$idx" %}
19248   ins_encode %{
19249     assert(UseSSE >= 4, "required");
19250     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19251     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19252 
19253     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19254   %}
19255   ins_pipe( pipe_slow );
19256 %}
19257 
19258 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19259   predicate(Matcher::vector_length(n) == 4);
19260   match(Set dst (VectorInsert (Binary src val) idx));
19261   effect(TEMP vtmp);
19262   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19263   ins_encode %{
19264     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19265     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19266 
19267     uint x_idx = $idx$$constant & right_n_bits(1);
19268     uint y_idx = ($idx$$constant >> 1) & 1;
19269     int vlen_enc = Assembler::AVX_256bit;
19270     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19271     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19272     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19273   %}
19274   ins_pipe( pipe_slow );
19275 %}
19276 
19277 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19278   predicate(Matcher::vector_length(n) == 8);
19279   match(Set dst (VectorInsert (Binary src val) idx));
19280   effect(TEMP vtmp);
19281   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19282   ins_encode %{
19283     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19284     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19285 
19286     uint x_idx = $idx$$constant & right_n_bits(1);
19287     uint y_idx = ($idx$$constant >> 1) & 3;
19288     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19289     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19290     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19291   %}
19292   ins_pipe( pipe_slow );
19293 %}
19294 
19295 instruct insertF(vec dst, regF val, immU8 idx) %{
19296   predicate(Matcher::vector_length(n) < 8);
19297   match(Set dst (VectorInsert (Binary dst val) idx));
19298   format %{ "vector_insert $dst,$val,$idx" %}
19299   ins_encode %{
19300     assert(UseSSE >= 4, "sanity");
19301 
19302     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19303     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19304 
19305     uint x_idx = $idx$$constant & right_n_bits(2);
19306     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19307   %}
19308   ins_pipe( pipe_slow );
19309 %}
19310 
19311 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19312   predicate(Matcher::vector_length(n) >= 8);
19313   match(Set dst (VectorInsert (Binary src val) idx));
19314   effect(TEMP vtmp);
19315   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19316   ins_encode %{
19317     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19318     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19319 
19320     int vlen = Matcher::vector_length(this);
19321     uint x_idx = $idx$$constant & right_n_bits(2);
19322     if (vlen == 8) {
19323       uint y_idx = ($idx$$constant >> 2) & 1;
19324       int vlen_enc = Assembler::AVX_256bit;
19325       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19326       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19327       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19328     } else {
19329       assert(vlen == 16, "sanity");
19330       uint y_idx = ($idx$$constant >> 2) & 3;
19331       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19332       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19333       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19334     }
19335   %}
19336   ins_pipe( pipe_slow );
19337 %}
19338 
19339 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19340   predicate(Matcher::vector_length(n) == 2);
19341   match(Set dst (VectorInsert (Binary dst val) idx));
19342   effect(TEMP tmp);
19343   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19344   ins_encode %{
19345     assert(UseSSE >= 4, "sanity");
19346     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19347     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19348 
19349     __ movq($tmp$$Register, $val$$XMMRegister);
19350     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19351   %}
19352   ins_pipe( pipe_slow );
19353 %}
19354 
19355 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19356   predicate(Matcher::vector_length(n) == 4);
19357   match(Set dst (VectorInsert (Binary src val) idx));
19358   effect(TEMP vtmp, TEMP tmp);
19359   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19360   ins_encode %{
19361     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19362     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19363 
19364     uint x_idx = $idx$$constant & right_n_bits(1);
19365     uint y_idx = ($idx$$constant >> 1) & 1;
19366     int vlen_enc = Assembler::AVX_256bit;
19367     __ movq($tmp$$Register, $val$$XMMRegister);
19368     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19369     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19370     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19371   %}
19372   ins_pipe( pipe_slow );
19373 %}
19374 
19375 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19376   predicate(Matcher::vector_length(n) == 8);
19377   match(Set dst (VectorInsert (Binary src val) idx));
19378   effect(TEMP tmp, TEMP vtmp);
19379   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19380   ins_encode %{
19381     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19382     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19383 
19384     uint x_idx = $idx$$constant & right_n_bits(1);
19385     uint y_idx = ($idx$$constant >> 1) & 3;
19386     __ movq($tmp$$Register, $val$$XMMRegister);
19387     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19388     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19389     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19390   %}
19391   ins_pipe( pipe_slow );
19392 %}
19393 
19394 // ====================REDUCTION ARITHMETIC=======================================
19395 
19396 // =======================Int Reduction==========================================
19397 
19398 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19399   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19400   match(Set dst (AddReductionVI src1 src2));
19401   match(Set dst (MulReductionVI src1 src2));
19402   match(Set dst (AndReductionV  src1 src2));
19403   match(Set dst ( OrReductionV  src1 src2));
19404   match(Set dst (XorReductionV  src1 src2));
19405   match(Set dst (MinReductionV  src1 src2));
19406   match(Set dst (MaxReductionV  src1 src2));
19407   match(Set dst (UMinReductionV  src1 src2));
19408   match(Set dst (UMaxReductionV  src1 src2));
19409   effect(TEMP vtmp1, TEMP vtmp2);
19410   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19411   ins_encode %{
19412     int opcode = this->ideal_Opcode();
19413     int vlen = Matcher::vector_length(this, $src2);
19414     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19415   %}
19416   ins_pipe( pipe_slow );
19417 %}
19418 
19419 // =======================Long Reduction==========================================
19420 
19421 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19422   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19423   match(Set dst (AddReductionVL src1 src2));
19424   match(Set dst (MulReductionVL src1 src2));
19425   match(Set dst (AndReductionV  src1 src2));
19426   match(Set dst ( OrReductionV  src1 src2));
19427   match(Set dst (XorReductionV  src1 src2));
19428   match(Set dst (MinReductionV  src1 src2));
19429   match(Set dst (MaxReductionV  src1 src2));
19430   match(Set dst (UMinReductionV  src1 src2));
19431   match(Set dst (UMaxReductionV  src1 src2));
19432   effect(TEMP vtmp1, TEMP vtmp2);
19433   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19434   ins_encode %{
19435     int opcode = this->ideal_Opcode();
19436     int vlen = Matcher::vector_length(this, $src2);
19437     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19438   %}
19439   ins_pipe( pipe_slow );
19440 %}
19441 
19442 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19443   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19444   match(Set dst (AddReductionVL src1 src2));
19445   match(Set dst (MulReductionVL src1 src2));
19446   match(Set dst (AndReductionV  src1 src2));
19447   match(Set dst ( OrReductionV  src1 src2));
19448   match(Set dst (XorReductionV  src1 src2));
19449   match(Set dst (MinReductionV  src1 src2));
19450   match(Set dst (MaxReductionV  src1 src2));
19451   match(Set dst (UMinReductionV  src1 src2));
19452   match(Set dst (UMaxReductionV  src1 src2));
19453   effect(TEMP vtmp1, TEMP vtmp2);
19454   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19455   ins_encode %{
19456     int opcode = this->ideal_Opcode();
19457     int vlen = Matcher::vector_length(this, $src2);
19458     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19459   %}
19460   ins_pipe( pipe_slow );
19461 %}
19462 
19463 // =======================Float Reduction==========================================
19464 
19465 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19466   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19467   match(Set dst (AddReductionVF dst src));
19468   match(Set dst (MulReductionVF dst src));
19469   effect(TEMP dst, TEMP vtmp);
19470   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19471   ins_encode %{
19472     int opcode = this->ideal_Opcode();
19473     int vlen = Matcher::vector_length(this, $src);
19474     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19475   %}
19476   ins_pipe( pipe_slow );
19477 %}
19478 
19479 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19480   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19481   match(Set dst (AddReductionVF dst src));
19482   match(Set dst (MulReductionVF dst src));
19483   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19484   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19485   ins_encode %{
19486     int opcode = this->ideal_Opcode();
19487     int vlen = Matcher::vector_length(this, $src);
19488     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19489   %}
19490   ins_pipe( pipe_slow );
19491 %}
19492 
19493 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19494   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19495   match(Set dst (AddReductionVF dst src));
19496   match(Set dst (MulReductionVF dst src));
19497   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19498   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19499   ins_encode %{
19500     int opcode = this->ideal_Opcode();
19501     int vlen = Matcher::vector_length(this, $src);
19502     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19503   %}
19504   ins_pipe( pipe_slow );
19505 %}
19506 
19507 
19508 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19509   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19510   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19511   // src1 contains reduction identity
19512   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19513   match(Set dst (AddReductionVF src1 src2));
19514   match(Set dst (MulReductionVF src1 src2));
19515   effect(TEMP dst);
19516   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19517   ins_encode %{
19518     int opcode = this->ideal_Opcode();
19519     int vlen = Matcher::vector_length(this, $src2);
19520     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19521   %}
19522   ins_pipe( pipe_slow );
19523 %}
19524 
19525 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19526   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19527   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19528   // src1 contains reduction identity
19529   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19530   match(Set dst (AddReductionVF src1 src2));
19531   match(Set dst (MulReductionVF src1 src2));
19532   effect(TEMP dst, TEMP vtmp);
19533   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19534   ins_encode %{
19535     int opcode = this->ideal_Opcode();
19536     int vlen = Matcher::vector_length(this, $src2);
19537     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19538   %}
19539   ins_pipe( pipe_slow );
19540 %}
19541 
19542 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19543   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19544   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19545   // src1 contains reduction identity
19546   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19547   match(Set dst (AddReductionVF src1 src2));
19548   match(Set dst (MulReductionVF src1 src2));
19549   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19550   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19551   ins_encode %{
19552     int opcode = this->ideal_Opcode();
19553     int vlen = Matcher::vector_length(this, $src2);
19554     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19555   %}
19556   ins_pipe( pipe_slow );
19557 %}
19558 
19559 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19560   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19561   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19562   // src1 contains reduction identity
19563   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19564   match(Set dst (AddReductionVF src1 src2));
19565   match(Set dst (MulReductionVF src1 src2));
19566   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19567   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19568   ins_encode %{
19569     int opcode = this->ideal_Opcode();
19570     int vlen = Matcher::vector_length(this, $src2);
19571     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572   %}
19573   ins_pipe( pipe_slow );
19574 %}
19575 
19576 // =======================Double Reduction==========================================
19577 
19578 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19579   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19580   match(Set dst (AddReductionVD dst src));
19581   match(Set dst (MulReductionVD dst src));
19582   effect(TEMP dst, TEMP vtmp);
19583   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19584   ins_encode %{
19585     int opcode = this->ideal_Opcode();
19586     int vlen = Matcher::vector_length(this, $src);
19587     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19588 %}
19589   ins_pipe( pipe_slow );
19590 %}
19591 
19592 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19593   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19594   match(Set dst (AddReductionVD dst src));
19595   match(Set dst (MulReductionVD dst src));
19596   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19597   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19598   ins_encode %{
19599     int opcode = this->ideal_Opcode();
19600     int vlen = Matcher::vector_length(this, $src);
19601     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19602   %}
19603   ins_pipe( pipe_slow );
19604 %}
19605 
19606 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19607   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19608   match(Set dst (AddReductionVD dst src));
19609   match(Set dst (MulReductionVD dst src));
19610   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19611   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19612   ins_encode %{
19613     int opcode = this->ideal_Opcode();
19614     int vlen = Matcher::vector_length(this, $src);
19615     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19616   %}
19617   ins_pipe( pipe_slow );
19618 %}
19619 
19620 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19621   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19622   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19623   // src1 contains reduction identity
19624   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19625   match(Set dst (AddReductionVD src1 src2));
19626   match(Set dst (MulReductionVD src1 src2));
19627   effect(TEMP dst);
19628   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19629   ins_encode %{
19630     int opcode = this->ideal_Opcode();
19631     int vlen = Matcher::vector_length(this, $src2);
19632     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19633 %}
19634   ins_pipe( pipe_slow );
19635 %}
19636 
19637 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19638   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19639   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19640   // src1 contains reduction identity
19641   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19642   match(Set dst (AddReductionVD src1 src2));
19643   match(Set dst (MulReductionVD src1 src2));
19644   effect(TEMP dst, TEMP vtmp);
19645   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19646   ins_encode %{
19647     int opcode = this->ideal_Opcode();
19648     int vlen = Matcher::vector_length(this, $src2);
19649     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19650   %}
19651   ins_pipe( pipe_slow );
19652 %}
19653 
19654 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19655   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19656   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19657   // src1 contains reduction identity
19658   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19659   match(Set dst (AddReductionVD src1 src2));
19660   match(Set dst (MulReductionVD src1 src2));
19661   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19662   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19663   ins_encode %{
19664     int opcode = this->ideal_Opcode();
19665     int vlen = Matcher::vector_length(this, $src2);
19666     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19667   %}
19668   ins_pipe( pipe_slow );
19669 %}
19670 
19671 // =======================Byte Reduction==========================================
19672 
19673 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19674   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19675   match(Set dst (AddReductionVI src1 src2));
19676   match(Set dst (AndReductionV  src1 src2));
19677   match(Set dst ( OrReductionV  src1 src2));
19678   match(Set dst (XorReductionV  src1 src2));
19679   match(Set dst (MinReductionV  src1 src2));
19680   match(Set dst (MaxReductionV  src1 src2));
19681   match(Set dst (UMinReductionV  src1 src2));
19682   match(Set dst (UMaxReductionV  src1 src2));
19683   effect(TEMP vtmp1, TEMP vtmp2);
19684   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19685   ins_encode %{
19686     int opcode = this->ideal_Opcode();
19687     int vlen = Matcher::vector_length(this, $src2);
19688     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19689   %}
19690   ins_pipe( pipe_slow );
19691 %}
19692 
19693 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19694   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19695   match(Set dst (AddReductionVI src1 src2));
19696   match(Set dst (AndReductionV  src1 src2));
19697   match(Set dst ( OrReductionV  src1 src2));
19698   match(Set dst (XorReductionV  src1 src2));
19699   match(Set dst (MinReductionV  src1 src2));
19700   match(Set dst (MaxReductionV  src1 src2));
19701   match(Set dst (UMinReductionV  src1 src2));
19702   match(Set dst (UMaxReductionV  src1 src2));
19703   effect(TEMP vtmp1, TEMP vtmp2);
19704   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19705   ins_encode %{
19706     int opcode = this->ideal_Opcode();
19707     int vlen = Matcher::vector_length(this, $src2);
19708     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19709   %}
19710   ins_pipe( pipe_slow );
19711 %}
19712 
19713 // =======================Short Reduction==========================================
19714 
19715 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19716   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19717   match(Set dst (AddReductionVI src1 src2));
19718   match(Set dst (MulReductionVI src1 src2));
19719   match(Set dst (AndReductionV  src1 src2));
19720   match(Set dst ( OrReductionV  src1 src2));
19721   match(Set dst (XorReductionV  src1 src2));
19722   match(Set dst (MinReductionV  src1 src2));
19723   match(Set dst (MaxReductionV  src1 src2));
19724   match(Set dst (UMinReductionV  src1 src2));
19725   match(Set dst (UMaxReductionV  src1 src2));
19726   effect(TEMP vtmp1, TEMP vtmp2);
19727   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19728   ins_encode %{
19729     int opcode = this->ideal_Opcode();
19730     int vlen = Matcher::vector_length(this, $src2);
19731     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19732   %}
19733   ins_pipe( pipe_slow );
19734 %}
19735 
19736 // =======================Mul Reduction==========================================
19737 
19738 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19739   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19740             Matcher::vector_length(n->in(2)) <= 32); // src2
19741   match(Set dst (MulReductionVI src1 src2));
19742   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19743   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19744   ins_encode %{
19745     int opcode = this->ideal_Opcode();
19746     int vlen = Matcher::vector_length(this, $src2);
19747     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19748   %}
19749   ins_pipe( pipe_slow );
19750 %}
19751 
19752 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19753   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19754             Matcher::vector_length(n->in(2)) == 64); // src2
19755   match(Set dst (MulReductionVI src1 src2));
19756   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19757   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19758   ins_encode %{
19759     int opcode = this->ideal_Opcode();
19760     int vlen = Matcher::vector_length(this, $src2);
19761     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19762   %}
19763   ins_pipe( pipe_slow );
19764 %}
19765 
19766 //--------------------Min/Max Float Reduction --------------------
19767 // Float Min Reduction
19768 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19769                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19770   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19771             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19772              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19773             Matcher::vector_length(n->in(2)) == 2);
19774   match(Set dst (MinReductionV src1 src2));
19775   match(Set dst (MaxReductionV src1 src2));
19776   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19777   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19778   ins_encode %{
19779     assert(UseAVX > 0, "sanity");
19780 
19781     int opcode = this->ideal_Opcode();
19782     int vlen = Matcher::vector_length(this, $src2);
19783     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19784                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19785   %}
19786   ins_pipe( pipe_slow );
19787 %}
19788 
19789 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19790                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19791   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19792             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19793              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19794             Matcher::vector_length(n->in(2)) >= 4);
19795   match(Set dst (MinReductionV src1 src2));
19796   match(Set dst (MaxReductionV src1 src2));
19797   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19798   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19799   ins_encode %{
19800     assert(UseAVX > 0, "sanity");
19801 
19802     int opcode = this->ideal_Opcode();
19803     int vlen = Matcher::vector_length(this, $src2);
19804     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19805                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19806   %}
19807   ins_pipe( pipe_slow );
19808 %}
19809 
19810 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19811                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19812   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19813             Matcher::vector_length(n->in(2)) == 2);
19814   match(Set dst (MinReductionV dst src));
19815   match(Set dst (MaxReductionV dst src));
19816   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19817   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19818   ins_encode %{
19819     assert(UseAVX > 0, "sanity");
19820 
19821     int opcode = this->ideal_Opcode();
19822     int vlen = Matcher::vector_length(this, $src);
19823     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19824                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19825   %}
19826   ins_pipe( pipe_slow );
19827 %}
19828 
19829 
19830 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19831                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19832   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19833             Matcher::vector_length(n->in(2)) >= 4);
19834   match(Set dst (MinReductionV dst src));
19835   match(Set dst (MaxReductionV dst src));
19836   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19837   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19838   ins_encode %{
19839     assert(UseAVX > 0, "sanity");
19840 
19841     int opcode = this->ideal_Opcode();
19842     int vlen = Matcher::vector_length(this, $src);
19843     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19844                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19845   %}
19846   ins_pipe( pipe_slow );
19847 %}
19848 
19849 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19850   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19851             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19852              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19853             Matcher::vector_length(n->in(2)) == 2);
19854   match(Set dst (MinReductionV src1 src2));
19855   match(Set dst (MaxReductionV src1 src2));
19856   effect(TEMP dst, TEMP xtmp1);
19857   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19858   ins_encode %{
19859     int opcode = this->ideal_Opcode();
19860     int vlen = Matcher::vector_length(this, $src2);
19861     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19862                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19863   %}
19864   ins_pipe( pipe_slow );
19865 %}
19866 
19867 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19868   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19869             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19870              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19871             Matcher::vector_length(n->in(2)) >= 4);
19872   match(Set dst (MinReductionV src1 src2));
19873   match(Set dst (MaxReductionV src1 src2));
19874   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19875   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19876   ins_encode %{
19877     int opcode = this->ideal_Opcode();
19878     int vlen = Matcher::vector_length(this, $src2);
19879     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19880                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19881   %}
19882   ins_pipe( pipe_slow );
19883 %}
19884 
19885 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19886   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19887             Matcher::vector_length(n->in(2)) == 2);
19888   match(Set dst (MinReductionV dst src));
19889   match(Set dst (MaxReductionV dst src));
19890   effect(TEMP dst, TEMP xtmp1);
19891   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19892   ins_encode %{
19893     int opcode = this->ideal_Opcode();
19894     int vlen = Matcher::vector_length(this, $src);
19895     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19896                          $xtmp1$$XMMRegister);
19897   %}
19898   ins_pipe( pipe_slow );
19899 %}
19900 
19901 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19902   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19903             Matcher::vector_length(n->in(2)) >= 4);
19904   match(Set dst (MinReductionV dst src));
19905   match(Set dst (MaxReductionV dst src));
19906   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19907   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19908   ins_encode %{
19909     int opcode = this->ideal_Opcode();
19910     int vlen = Matcher::vector_length(this, $src);
19911     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19912                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19913   %}
19914   ins_pipe( pipe_slow );
19915 %}
19916 
19917 //--------------------Min Double Reduction --------------------
19918 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19919                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19920   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19921             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19922              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19923             Matcher::vector_length(n->in(2)) == 2);
19924   match(Set dst (MinReductionV src1 src2));
19925   match(Set dst (MaxReductionV src1 src2));
19926   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19927   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19928   ins_encode %{
19929     assert(UseAVX > 0, "sanity");
19930 
19931     int opcode = this->ideal_Opcode();
19932     int vlen = Matcher::vector_length(this, $src2);
19933     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19934                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19935   %}
19936   ins_pipe( pipe_slow );
19937 %}
19938 
19939 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19940                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19941   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19942             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19943              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19944             Matcher::vector_length(n->in(2)) >= 4);
19945   match(Set dst (MinReductionV src1 src2));
19946   match(Set dst (MaxReductionV src1 src2));
19947   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19948   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19949   ins_encode %{
19950     assert(UseAVX > 0, "sanity");
19951 
19952     int opcode = this->ideal_Opcode();
19953     int vlen = Matcher::vector_length(this, $src2);
19954     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19955                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19956   %}
19957   ins_pipe( pipe_slow );
19958 %}
19959 
19960 
19961 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19962                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19963   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19964             Matcher::vector_length(n->in(2)) == 2);
19965   match(Set dst (MinReductionV dst src));
19966   match(Set dst (MaxReductionV dst src));
19967   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19968   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19969   ins_encode %{
19970     assert(UseAVX > 0, "sanity");
19971 
19972     int opcode = this->ideal_Opcode();
19973     int vlen = Matcher::vector_length(this, $src);
19974     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19975                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19976   %}
19977   ins_pipe( pipe_slow );
19978 %}
19979 
19980 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19981                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19982   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19983             Matcher::vector_length(n->in(2)) >= 4);
19984   match(Set dst (MinReductionV dst src));
19985   match(Set dst (MaxReductionV dst src));
19986   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19987   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19988   ins_encode %{
19989     assert(UseAVX > 0, "sanity");
19990 
19991     int opcode = this->ideal_Opcode();
19992     int vlen = Matcher::vector_length(this, $src);
19993     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19994                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19995   %}
19996   ins_pipe( pipe_slow );
19997 %}
19998 
19999 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20000   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20001             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20002              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20003             Matcher::vector_length(n->in(2)) == 2);
20004   match(Set dst (MinReductionV src1 src2));
20005   match(Set dst (MaxReductionV src1 src2));
20006   effect(TEMP dst, TEMP xtmp1);
20007   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20008   ins_encode %{
20009     int opcode = this->ideal_Opcode();
20010     int vlen = Matcher::vector_length(this, $src2);
20011     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20012                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20013   %}
20014   ins_pipe( pipe_slow );
20015 %}
20016 
20017 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20018   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20019             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20020              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20021             Matcher::vector_length(n->in(2)) >= 4);
20022   match(Set dst (MinReductionV src1 src2));
20023   match(Set dst (MaxReductionV src1 src2));
20024   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20025   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20026   ins_encode %{
20027     int opcode = this->ideal_Opcode();
20028     int vlen = Matcher::vector_length(this, $src2);
20029     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20030                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 
20036 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20037   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20038             Matcher::vector_length(n->in(2)) == 2);
20039   match(Set dst (MinReductionV dst src));
20040   match(Set dst (MaxReductionV dst src));
20041   effect(TEMP dst, TEMP xtmp1);
20042   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20043   ins_encode %{
20044     int opcode = this->ideal_Opcode();
20045     int vlen = Matcher::vector_length(this, $src);
20046     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20047                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20048   %}
20049   ins_pipe( pipe_slow );
20050 %}
20051 
20052 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20053   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20054             Matcher::vector_length(n->in(2)) >= 4);
20055   match(Set dst (MinReductionV dst src));
20056   match(Set dst (MaxReductionV dst src));
20057   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20058   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20059   ins_encode %{
20060     int opcode = this->ideal_Opcode();
20061     int vlen = Matcher::vector_length(this, $src);
20062     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20063                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20064   %}
20065   ins_pipe( pipe_slow );
20066 %}
20067 
20068 // ====================VECTOR ARITHMETIC=======================================
20069 
20070 // --------------------------------- ADD --------------------------------------
20071 
20072 // Bytes vector add
20073 instruct vaddB(vec dst, vec src) %{
20074   predicate(UseAVX == 0);
20075   match(Set dst (AddVB dst src));
20076   format %{ "paddb   $dst,$src\t! add packedB" %}
20077   ins_encode %{
20078     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20079   %}
20080   ins_pipe( pipe_slow );
20081 %}
20082 
20083 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20084   predicate(UseAVX > 0);
20085   match(Set dst (AddVB src1 src2));
20086   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20087   ins_encode %{
20088     int vlen_enc = vector_length_encoding(this);
20089     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20090   %}
20091   ins_pipe( pipe_slow );
20092 %}
20093 
20094 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20095   predicate((UseAVX > 0) &&
20096             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20097   match(Set dst (AddVB src (LoadVector mem)));
20098   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20099   ins_encode %{
20100     int vlen_enc = vector_length_encoding(this);
20101     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20102   %}
20103   ins_pipe( pipe_slow );
20104 %}
20105 
20106 // Shorts/Chars vector add
20107 instruct vaddS(vec dst, vec src) %{
20108   predicate(UseAVX == 0);
20109   match(Set dst (AddVS dst src));
20110   format %{ "paddw   $dst,$src\t! add packedS" %}
20111   ins_encode %{
20112     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20113   %}
20114   ins_pipe( pipe_slow );
20115 %}
20116 
20117 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20118   predicate(UseAVX > 0);
20119   match(Set dst (AddVS src1 src2));
20120   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20121   ins_encode %{
20122     int vlen_enc = vector_length_encoding(this);
20123     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20124   %}
20125   ins_pipe( pipe_slow );
20126 %}
20127 
20128 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20129   predicate((UseAVX > 0) &&
20130             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20131   match(Set dst (AddVS src (LoadVector mem)));
20132   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20133   ins_encode %{
20134     int vlen_enc = vector_length_encoding(this);
20135     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20136   %}
20137   ins_pipe( pipe_slow );
20138 %}
20139 
20140 // Integers vector add
20141 instruct vaddI(vec dst, vec src) %{
20142   predicate(UseAVX == 0);
20143   match(Set dst (AddVI dst src));
20144   format %{ "paddd   $dst,$src\t! add packedI" %}
20145   ins_encode %{
20146     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20147   %}
20148   ins_pipe( pipe_slow );
20149 %}
20150 
20151 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20152   predicate(UseAVX > 0);
20153   match(Set dst (AddVI src1 src2));
20154   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20155   ins_encode %{
20156     int vlen_enc = vector_length_encoding(this);
20157     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20158   %}
20159   ins_pipe( pipe_slow );
20160 %}
20161 
20162 
20163 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20164   predicate((UseAVX > 0) &&
20165             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20166   match(Set dst (AddVI src (LoadVector mem)));
20167   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20168   ins_encode %{
20169     int vlen_enc = vector_length_encoding(this);
20170     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20171   %}
20172   ins_pipe( pipe_slow );
20173 %}
20174 
20175 // Longs vector add
20176 instruct vaddL(vec dst, vec src) %{
20177   predicate(UseAVX == 0);
20178   match(Set dst (AddVL dst src));
20179   format %{ "paddq   $dst,$src\t! add packedL" %}
20180   ins_encode %{
20181     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20182   %}
20183   ins_pipe( pipe_slow );
20184 %}
20185 
20186 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20187   predicate(UseAVX > 0);
20188   match(Set dst (AddVL src1 src2));
20189   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20190   ins_encode %{
20191     int vlen_enc = vector_length_encoding(this);
20192     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20193   %}
20194   ins_pipe( pipe_slow );
20195 %}
20196 
20197 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20198   predicate((UseAVX > 0) &&
20199             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20200   match(Set dst (AddVL src (LoadVector mem)));
20201   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20202   ins_encode %{
20203     int vlen_enc = vector_length_encoding(this);
20204     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20205   %}
20206   ins_pipe( pipe_slow );
20207 %}
20208 
20209 // Floats vector add
20210 instruct vaddF(vec dst, vec src) %{
20211   predicate(UseAVX == 0);
20212   match(Set dst (AddVF dst src));
20213   format %{ "addps   $dst,$src\t! add packedF" %}
20214   ins_encode %{
20215     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20216   %}
20217   ins_pipe( pipe_slow );
20218 %}
20219 
20220 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20221   predicate(UseAVX > 0);
20222   match(Set dst (AddVF src1 src2));
20223   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20224   ins_encode %{
20225     int vlen_enc = vector_length_encoding(this);
20226     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20227   %}
20228   ins_pipe( pipe_slow );
20229 %}
20230 
20231 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20232   predicate((UseAVX > 0) &&
20233             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20234   match(Set dst (AddVF src (LoadVector mem)));
20235   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20236   ins_encode %{
20237     int vlen_enc = vector_length_encoding(this);
20238     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20239   %}
20240   ins_pipe( pipe_slow );
20241 %}
20242 
20243 // Doubles vector add
20244 instruct vaddD(vec dst, vec src) %{
20245   predicate(UseAVX == 0);
20246   match(Set dst (AddVD dst src));
20247   format %{ "addpd   $dst,$src\t! add packedD" %}
20248   ins_encode %{
20249     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20250   %}
20251   ins_pipe( pipe_slow );
20252 %}
20253 
20254 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20255   predicate(UseAVX > 0);
20256   match(Set dst (AddVD src1 src2));
20257   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20258   ins_encode %{
20259     int vlen_enc = vector_length_encoding(this);
20260     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20261   %}
20262   ins_pipe( pipe_slow );
20263 %}
20264 
20265 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20266   predicate((UseAVX > 0) &&
20267             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20268   match(Set dst (AddVD src (LoadVector mem)));
20269   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20270   ins_encode %{
20271     int vlen_enc = vector_length_encoding(this);
20272     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20273   %}
20274   ins_pipe( pipe_slow );
20275 %}
20276 
20277 // --------------------------------- SUB --------------------------------------
20278 
20279 // Bytes vector sub
20280 instruct vsubB(vec dst, vec src) %{
20281   predicate(UseAVX == 0);
20282   match(Set dst (SubVB dst src));
20283   format %{ "psubb   $dst,$src\t! sub packedB" %}
20284   ins_encode %{
20285     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20286   %}
20287   ins_pipe( pipe_slow );
20288 %}
20289 
20290 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20291   predicate(UseAVX > 0);
20292   match(Set dst (SubVB src1 src2));
20293   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20294   ins_encode %{
20295     int vlen_enc = vector_length_encoding(this);
20296     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20297   %}
20298   ins_pipe( pipe_slow );
20299 %}
20300 
20301 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20302   predicate((UseAVX > 0) &&
20303             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20304   match(Set dst (SubVB src (LoadVector mem)));
20305   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20306   ins_encode %{
20307     int vlen_enc = vector_length_encoding(this);
20308     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20309   %}
20310   ins_pipe( pipe_slow );
20311 %}
20312 
20313 // Shorts/Chars vector sub
20314 instruct vsubS(vec dst, vec src) %{
20315   predicate(UseAVX == 0);
20316   match(Set dst (SubVS dst src));
20317   format %{ "psubw   $dst,$src\t! sub packedS" %}
20318   ins_encode %{
20319     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20320   %}
20321   ins_pipe( pipe_slow );
20322 %}
20323 
20324 
20325 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20326   predicate(UseAVX > 0);
20327   match(Set dst (SubVS src1 src2));
20328   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20329   ins_encode %{
20330     int vlen_enc = vector_length_encoding(this);
20331     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20332   %}
20333   ins_pipe( pipe_slow );
20334 %}
20335 
20336 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20337   predicate((UseAVX > 0) &&
20338             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20339   match(Set dst (SubVS src (LoadVector mem)));
20340   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20341   ins_encode %{
20342     int vlen_enc = vector_length_encoding(this);
20343     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20344   %}
20345   ins_pipe( pipe_slow );
20346 %}
20347 
20348 // Integers vector sub
20349 instruct vsubI(vec dst, vec src) %{
20350   predicate(UseAVX == 0);
20351   match(Set dst (SubVI dst src));
20352   format %{ "psubd   $dst,$src\t! sub packedI" %}
20353   ins_encode %{
20354     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20355   %}
20356   ins_pipe( pipe_slow );
20357 %}
20358 
20359 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20360   predicate(UseAVX > 0);
20361   match(Set dst (SubVI src1 src2));
20362   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20363   ins_encode %{
20364     int vlen_enc = vector_length_encoding(this);
20365     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20366   %}
20367   ins_pipe( pipe_slow );
20368 %}
20369 
20370 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20371   predicate((UseAVX > 0) &&
20372             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20373   match(Set dst (SubVI src (LoadVector mem)));
20374   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20375   ins_encode %{
20376     int vlen_enc = vector_length_encoding(this);
20377     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20378   %}
20379   ins_pipe( pipe_slow );
20380 %}
20381 
20382 // Longs vector sub
20383 instruct vsubL(vec dst, vec src) %{
20384   predicate(UseAVX == 0);
20385   match(Set dst (SubVL dst src));
20386   format %{ "psubq   $dst,$src\t! sub packedL" %}
20387   ins_encode %{
20388     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20389   %}
20390   ins_pipe( pipe_slow );
20391 %}
20392 
20393 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20394   predicate(UseAVX > 0);
20395   match(Set dst (SubVL src1 src2));
20396   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20397   ins_encode %{
20398     int vlen_enc = vector_length_encoding(this);
20399     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20400   %}
20401   ins_pipe( pipe_slow );
20402 %}
20403 
20404 
20405 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20406   predicate((UseAVX > 0) &&
20407             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20408   match(Set dst (SubVL src (LoadVector mem)));
20409   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20410   ins_encode %{
20411     int vlen_enc = vector_length_encoding(this);
20412     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20413   %}
20414   ins_pipe( pipe_slow );
20415 %}
20416 
20417 // Floats vector sub
20418 instruct vsubF(vec dst, vec src) %{
20419   predicate(UseAVX == 0);
20420   match(Set dst (SubVF dst src));
20421   format %{ "subps   $dst,$src\t! sub packedF" %}
20422   ins_encode %{
20423     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20424   %}
20425   ins_pipe( pipe_slow );
20426 %}
20427 
20428 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20429   predicate(UseAVX > 0);
20430   match(Set dst (SubVF src1 src2));
20431   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20432   ins_encode %{
20433     int vlen_enc = vector_length_encoding(this);
20434     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20435   %}
20436   ins_pipe( pipe_slow );
20437 %}
20438 
20439 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20440   predicate((UseAVX > 0) &&
20441             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20442   match(Set dst (SubVF src (LoadVector mem)));
20443   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20444   ins_encode %{
20445     int vlen_enc = vector_length_encoding(this);
20446     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20447   %}
20448   ins_pipe( pipe_slow );
20449 %}
20450 
20451 // Doubles vector sub
20452 instruct vsubD(vec dst, vec src) %{
20453   predicate(UseAVX == 0);
20454   match(Set dst (SubVD dst src));
20455   format %{ "subpd   $dst,$src\t! sub packedD" %}
20456   ins_encode %{
20457     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20458   %}
20459   ins_pipe( pipe_slow );
20460 %}
20461 
20462 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20463   predicate(UseAVX > 0);
20464   match(Set dst (SubVD src1 src2));
20465   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20466   ins_encode %{
20467     int vlen_enc = vector_length_encoding(this);
20468     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20469   %}
20470   ins_pipe( pipe_slow );
20471 %}
20472 
20473 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20474   predicate((UseAVX > 0) &&
20475             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20476   match(Set dst (SubVD src (LoadVector mem)));
20477   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20478   ins_encode %{
20479     int vlen_enc = vector_length_encoding(this);
20480     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20481   %}
20482   ins_pipe( pipe_slow );
20483 %}
20484 
20485 // --------------------------------- MUL --------------------------------------
20486 
20487 // Byte vector mul
20488 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20489   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20490   match(Set dst (MulVB src1 src2));
20491   effect(TEMP dst, TEMP xtmp);
20492   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20493   ins_encode %{
20494     assert(UseSSE > 3, "required");
20495     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20496     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20497     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20498     __ psllw($dst$$XMMRegister, 8);
20499     __ psrlw($dst$$XMMRegister, 8);
20500     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20501   %}
20502   ins_pipe( pipe_slow );
20503 %}
20504 
20505 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20506   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20507   match(Set dst (MulVB src1 src2));
20508   effect(TEMP dst, TEMP xtmp);
20509   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20510   ins_encode %{
20511     assert(UseSSE > 3, "required");
20512     // Odd-index elements
20513     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20514     __ psrlw($dst$$XMMRegister, 8);
20515     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20516     __ psrlw($xtmp$$XMMRegister, 8);
20517     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20518     __ psllw($dst$$XMMRegister, 8);
20519     // Even-index elements
20520     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20521     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20522     __ psllw($xtmp$$XMMRegister, 8);
20523     __ psrlw($xtmp$$XMMRegister, 8);
20524     // Combine
20525     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20526   %}
20527   ins_pipe( pipe_slow );
20528 %}
20529 
20530 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20531   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20532   match(Set dst (MulVB src1 src2));
20533   effect(TEMP xtmp1, TEMP xtmp2);
20534   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20535   ins_encode %{
20536     int vlen_enc = vector_length_encoding(this);
20537     // Odd-index elements
20538     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20539     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20540     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20541     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20542     // Even-index elements
20543     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20544     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20545     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20546     // Combine
20547     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20548   %}
20549   ins_pipe( pipe_slow );
20550 %}
20551 
20552 // Shorts/Chars vector mul
20553 instruct vmulS(vec dst, vec src) %{
20554   predicate(UseAVX == 0);
20555   match(Set dst (MulVS dst src));
20556   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20557   ins_encode %{
20558     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20559   %}
20560   ins_pipe( pipe_slow );
20561 %}
20562 
20563 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20564   predicate(UseAVX > 0);
20565   match(Set dst (MulVS src1 src2));
20566   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20567   ins_encode %{
20568     int vlen_enc = vector_length_encoding(this);
20569     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20570   %}
20571   ins_pipe( pipe_slow );
20572 %}
20573 
20574 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20575   predicate((UseAVX > 0) &&
20576             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20577   match(Set dst (MulVS src (LoadVector mem)));
20578   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20579   ins_encode %{
20580     int vlen_enc = vector_length_encoding(this);
20581     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20582   %}
20583   ins_pipe( pipe_slow );
20584 %}
20585 
20586 // Integers vector mul
20587 instruct vmulI(vec dst, vec src) %{
20588   predicate(UseAVX == 0);
20589   match(Set dst (MulVI dst src));
20590   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20591   ins_encode %{
20592     assert(UseSSE > 3, "required");
20593     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20594   %}
20595   ins_pipe( pipe_slow );
20596 %}
20597 
20598 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20599   predicate(UseAVX > 0);
20600   match(Set dst (MulVI src1 src2));
20601   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20602   ins_encode %{
20603     int vlen_enc = vector_length_encoding(this);
20604     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20605   %}
20606   ins_pipe( pipe_slow );
20607 %}
20608 
20609 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20610   predicate((UseAVX > 0) &&
20611             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20612   match(Set dst (MulVI src (LoadVector mem)));
20613   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20614   ins_encode %{
20615     int vlen_enc = vector_length_encoding(this);
20616     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20617   %}
20618   ins_pipe( pipe_slow );
20619 %}
20620 
20621 // Longs vector mul
20622 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20623   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20624              VM_Version::supports_avx512dq()) ||
20625             VM_Version::supports_avx512vldq());
20626   match(Set dst (MulVL src1 src2));
20627   ins_cost(500);
20628   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20629   ins_encode %{
20630     assert(UseAVX > 2, "required");
20631     int vlen_enc = vector_length_encoding(this);
20632     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20633   %}
20634   ins_pipe( pipe_slow );
20635 %}
20636 
20637 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20638   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20639              VM_Version::supports_avx512dq()) ||
20640             (Matcher::vector_length_in_bytes(n) > 8 &&
20641              VM_Version::supports_avx512vldq()));
20642   match(Set dst (MulVL src (LoadVector mem)));
20643   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20644   ins_cost(500);
20645   ins_encode %{
20646     assert(UseAVX > 2, "required");
20647     int vlen_enc = vector_length_encoding(this);
20648     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20649   %}
20650   ins_pipe( pipe_slow );
20651 %}
20652 
20653 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20654   predicate(UseAVX == 0);
20655   match(Set dst (MulVL src1 src2));
20656   ins_cost(500);
20657   effect(TEMP dst, TEMP xtmp);
20658   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20659   ins_encode %{
20660     assert(VM_Version::supports_sse4_1(), "required");
20661     // Get the lo-hi products, only the lower 32 bits is in concerns
20662     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20663     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20664     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20665     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20666     __ psllq($dst$$XMMRegister, 32);
20667     // Get the lo-lo products
20668     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20669     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20670     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20671   %}
20672   ins_pipe( pipe_slow );
20673 %}
20674 
20675 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20676   predicate(UseAVX > 0 &&
20677             ((Matcher::vector_length_in_bytes(n) == 64 &&
20678               !VM_Version::supports_avx512dq()) ||
20679              (Matcher::vector_length_in_bytes(n) < 64 &&
20680               !VM_Version::supports_avx512vldq())));
20681   match(Set dst (MulVL src1 src2));
20682   effect(TEMP xtmp1, TEMP xtmp2);
20683   ins_cost(500);
20684   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20685   ins_encode %{
20686     int vlen_enc = vector_length_encoding(this);
20687     // Get the lo-hi products, only the lower 32 bits is in concerns
20688     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20689     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20690     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20691     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20692     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20693     // Get the lo-lo products
20694     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20695     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20696   %}
20697   ins_pipe( pipe_slow );
20698 %}
20699 
20700 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20701   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20702   match(Set dst (MulVL src1 src2));
20703   ins_cost(100);
20704   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20705   ins_encode %{
20706     int vlen_enc = vector_length_encoding(this);
20707     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708   %}
20709   ins_pipe( pipe_slow );
20710 %}
20711 
20712 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20713   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20714   match(Set dst (MulVL src1 src2));
20715   ins_cost(100);
20716   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20717   ins_encode %{
20718     int vlen_enc = vector_length_encoding(this);
20719     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20720   %}
20721   ins_pipe( pipe_slow );
20722 %}
20723 
20724 // Floats vector mul
20725 instruct vmulF(vec dst, vec src) %{
20726   predicate(UseAVX == 0);
20727   match(Set dst (MulVF dst src));
20728   format %{ "mulps   $dst,$src\t! mul packedF" %}
20729   ins_encode %{
20730     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20731   %}
20732   ins_pipe( pipe_slow );
20733 %}
20734 
20735 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20736   predicate(UseAVX > 0);
20737   match(Set dst (MulVF src1 src2));
20738   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20739   ins_encode %{
20740     int vlen_enc = vector_length_encoding(this);
20741     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20742   %}
20743   ins_pipe( pipe_slow );
20744 %}
20745 
20746 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20747   predicate((UseAVX > 0) &&
20748             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20749   match(Set dst (MulVF src (LoadVector mem)));
20750   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20751   ins_encode %{
20752     int vlen_enc = vector_length_encoding(this);
20753     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20754   %}
20755   ins_pipe( pipe_slow );
20756 %}
20757 
20758 // Doubles vector mul
20759 instruct vmulD(vec dst, vec src) %{
20760   predicate(UseAVX == 0);
20761   match(Set dst (MulVD dst src));
20762   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20763   ins_encode %{
20764     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20765   %}
20766   ins_pipe( pipe_slow );
20767 %}
20768 
20769 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20770   predicate(UseAVX > 0);
20771   match(Set dst (MulVD src1 src2));
20772   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20773   ins_encode %{
20774     int vlen_enc = vector_length_encoding(this);
20775     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20776   %}
20777   ins_pipe( pipe_slow );
20778 %}
20779 
20780 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20781   predicate((UseAVX > 0) &&
20782             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20783   match(Set dst (MulVD src (LoadVector mem)));
20784   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20785   ins_encode %{
20786     int vlen_enc = vector_length_encoding(this);
20787     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20788   %}
20789   ins_pipe( pipe_slow );
20790 %}
20791 
20792 // --------------------------------- DIV --------------------------------------
20793 
20794 // Floats vector div
20795 instruct vdivF(vec dst, vec src) %{
20796   predicate(UseAVX == 0);
20797   match(Set dst (DivVF dst src));
20798   format %{ "divps   $dst,$src\t! div packedF" %}
20799   ins_encode %{
20800     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20801   %}
20802   ins_pipe( pipe_slow );
20803 %}
20804 
20805 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20806   predicate(UseAVX > 0);
20807   match(Set dst (DivVF src1 src2));
20808   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20809   ins_encode %{
20810     int vlen_enc = vector_length_encoding(this);
20811     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20812   %}
20813   ins_pipe( pipe_slow );
20814 %}
20815 
20816 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20817   predicate((UseAVX > 0) &&
20818             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20819   match(Set dst (DivVF src (LoadVector mem)));
20820   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20821   ins_encode %{
20822     int vlen_enc = vector_length_encoding(this);
20823     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20824   %}
20825   ins_pipe( pipe_slow );
20826 %}
20827 
20828 // Doubles vector div
20829 instruct vdivD(vec dst, vec src) %{
20830   predicate(UseAVX == 0);
20831   match(Set dst (DivVD dst src));
20832   format %{ "divpd   $dst,$src\t! div packedD" %}
20833   ins_encode %{
20834     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20835   %}
20836   ins_pipe( pipe_slow );
20837 %}
20838 
20839 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20840   predicate(UseAVX > 0);
20841   match(Set dst (DivVD src1 src2));
20842   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20843   ins_encode %{
20844     int vlen_enc = vector_length_encoding(this);
20845     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20846   %}
20847   ins_pipe( pipe_slow );
20848 %}
20849 
20850 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20851   predicate((UseAVX > 0) &&
20852             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20853   match(Set dst (DivVD src (LoadVector mem)));
20854   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20855   ins_encode %{
20856     int vlen_enc = vector_length_encoding(this);
20857     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20858   %}
20859   ins_pipe( pipe_slow );
20860 %}
20861 
20862 // ------------------------------ MinMax ---------------------------------------
20863 
20864 // Byte, Short, Int vector Min/Max
20865 instruct minmax_reg_sse(vec dst, vec src) %{
20866   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20867             UseAVX == 0);
20868   match(Set dst (MinV dst src));
20869   match(Set dst (MaxV dst src));
20870   format %{ "vector_minmax  $dst,$src\t!  " %}
20871   ins_encode %{
20872     assert(UseSSE >= 4, "required");
20873 
20874     int opcode = this->ideal_Opcode();
20875     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20876     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20877   %}
20878   ins_pipe( pipe_slow );
20879 %}
20880 
20881 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20882   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20883             UseAVX > 0);
20884   match(Set dst (MinV src1 src2));
20885   match(Set dst (MaxV src1 src2));
20886   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20887   ins_encode %{
20888     int opcode = this->ideal_Opcode();
20889     int vlen_enc = vector_length_encoding(this);
20890     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20891 
20892     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20893   %}
20894   ins_pipe( pipe_slow );
20895 %}
20896 
20897 // Long vector Min/Max
20898 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20899   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20900             UseAVX == 0);
20901   match(Set dst (MinV dst src));
20902   match(Set dst (MaxV src dst));
20903   effect(TEMP dst, TEMP tmp);
20904   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20905   ins_encode %{
20906     assert(UseSSE >= 4, "required");
20907 
20908     int opcode = this->ideal_Opcode();
20909     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20910     assert(elem_bt == T_LONG, "sanity");
20911 
20912     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20913   %}
20914   ins_pipe( pipe_slow );
20915 %}
20916 
20917 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20918   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20919             UseAVX > 0 && !VM_Version::supports_avx512vl());
20920   match(Set dst (MinV src1 src2));
20921   match(Set dst (MaxV src1 src2));
20922   effect(TEMP dst);
20923   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20924   ins_encode %{
20925     int vlen_enc = vector_length_encoding(this);
20926     int opcode = this->ideal_Opcode();
20927     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20928     assert(elem_bt == T_LONG, "sanity");
20929 
20930     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20931   %}
20932   ins_pipe( pipe_slow );
20933 %}
20934 
20935 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20936   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20937             Matcher::vector_element_basic_type(n) == T_LONG);
20938   match(Set dst (MinV src1 src2));
20939   match(Set dst (MaxV src1 src2));
20940   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20941   ins_encode %{
20942     assert(UseAVX > 2, "required");
20943 
20944     int vlen_enc = vector_length_encoding(this);
20945     int opcode = this->ideal_Opcode();
20946     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20947     assert(elem_bt == T_LONG, "sanity");
20948 
20949     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20950   %}
20951   ins_pipe( pipe_slow );
20952 %}
20953 
20954 // Float/Double vector Min/Max
20955 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20956   predicate(VM_Version::supports_avx10_2() &&
20957             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20958   match(Set dst (MinV a b));
20959   match(Set dst (MaxV a b));
20960   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20961   ins_encode %{
20962     int vlen_enc = vector_length_encoding(this);
20963     int opcode = this->ideal_Opcode();
20964     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20965     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20966   %}
20967   ins_pipe( pipe_slow );
20968 %}
20969 
20970 // Float/Double vector Min/Max
20971 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20972   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20973             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20974             UseAVX > 0);
20975   match(Set dst (MinV a b));
20976   match(Set dst (MaxV a b));
20977   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20978   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20979   ins_encode %{
20980     assert(UseAVX > 0, "required");
20981 
20982     int opcode = this->ideal_Opcode();
20983     int vlen_enc = vector_length_encoding(this);
20984     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20985 
20986     __ vminmax_fp(opcode, elem_bt,
20987                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20988                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20989   %}
20990   ins_pipe( pipe_slow );
20991 %}
20992 
20993 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20994   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20995             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20996   match(Set dst (MinV a b));
20997   match(Set dst (MaxV a b));
20998   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20999   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21000   ins_encode %{
21001     assert(UseAVX > 2, "required");
21002 
21003     int opcode = this->ideal_Opcode();
21004     int vlen_enc = vector_length_encoding(this);
21005     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21006 
21007     __ evminmax_fp(opcode, elem_bt,
21008                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21009                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21010   %}
21011   ins_pipe( pipe_slow );
21012 %}
21013 
21014 // ------------------------------ Unsigned vector Min/Max ----------------------
21015 
21016 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21017   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21018   match(Set dst (UMinV a b));
21019   match(Set dst (UMaxV a b));
21020   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21021   ins_encode %{
21022     int opcode = this->ideal_Opcode();
21023     int vlen_enc = vector_length_encoding(this);
21024     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21025     assert(is_integral_type(elem_bt), "");
21026     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21027   %}
21028   ins_pipe( pipe_slow );
21029 %}
21030 
21031 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21032   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21033   match(Set dst (UMinV a (LoadVector b)));
21034   match(Set dst (UMaxV a (LoadVector b)));
21035   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21036   ins_encode %{
21037     int opcode = this->ideal_Opcode();
21038     int vlen_enc = vector_length_encoding(this);
21039     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21040     assert(is_integral_type(elem_bt), "");
21041     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21042   %}
21043   ins_pipe( pipe_slow );
21044 %}
21045 
21046 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21047   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21048   match(Set dst (UMinV a b));
21049   match(Set dst (UMaxV a b));
21050   effect(TEMP xtmp1, TEMP xtmp2);
21051   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21052   ins_encode %{
21053     int opcode = this->ideal_Opcode();
21054     int vlen_enc = vector_length_encoding(this);
21055     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21056   %}
21057   ins_pipe( pipe_slow );
21058 %}
21059 
21060 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21061   match(Set dst (UMinV (Binary dst src2) mask));
21062   match(Set dst (UMaxV (Binary dst src2) mask));
21063   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21064   ins_encode %{
21065     int vlen_enc = vector_length_encoding(this);
21066     BasicType bt = Matcher::vector_element_basic_type(this);
21067     int opc = this->ideal_Opcode();
21068     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21069                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21070   %}
21071   ins_pipe( pipe_slow );
21072 %}
21073 
21074 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21075   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21076   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21077   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21078   ins_encode %{
21079     int vlen_enc = vector_length_encoding(this);
21080     BasicType bt = Matcher::vector_element_basic_type(this);
21081     int opc = this->ideal_Opcode();
21082     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21083                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21084   %}
21085   ins_pipe( pipe_slow );
21086 %}
21087 
21088 // --------------------------------- Signum/CopySign ---------------------------
21089 
21090 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21091   match(Set dst (SignumF dst (Binary zero one)));
21092   effect(KILL cr);
21093   format %{ "signumF $dst, $dst" %}
21094   ins_encode %{
21095     int opcode = this->ideal_Opcode();
21096     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21097   %}
21098   ins_pipe( pipe_slow );
21099 %}
21100 
21101 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21102   match(Set dst (SignumD dst (Binary zero one)));
21103   effect(KILL cr);
21104   format %{ "signumD $dst, $dst" %}
21105   ins_encode %{
21106     int opcode = this->ideal_Opcode();
21107     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21108   %}
21109   ins_pipe( pipe_slow );
21110 %}
21111 
21112 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21113   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21114   match(Set dst (SignumVF src (Binary zero one)));
21115   match(Set dst (SignumVD src (Binary zero one)));
21116   effect(TEMP dst, TEMP xtmp1);
21117   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21118   ins_encode %{
21119     int opcode = this->ideal_Opcode();
21120     int vec_enc = vector_length_encoding(this);
21121     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21122                          $xtmp1$$XMMRegister, vec_enc);
21123   %}
21124   ins_pipe( pipe_slow );
21125 %}
21126 
21127 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21128   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21129   match(Set dst (SignumVF src (Binary zero one)));
21130   match(Set dst (SignumVD src (Binary zero one)));
21131   effect(TEMP dst, TEMP ktmp1);
21132   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21133   ins_encode %{
21134     int opcode = this->ideal_Opcode();
21135     int vec_enc = vector_length_encoding(this);
21136     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21137                           $ktmp1$$KRegister, vec_enc);
21138   %}
21139   ins_pipe( pipe_slow );
21140 %}
21141 
21142 // ---------------------------------------
21143 // For copySign use 0xE4 as writemask for vpternlog
21144 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21145 // C (xmm2) is set to 0x7FFFFFFF
21146 // Wherever xmm2 is 0, we want to pick from B (sign)
21147 // Wherever xmm2 is 1, we want to pick from A (src)
21148 //
21149 // A B C Result
21150 // 0 0 0 0
21151 // 0 0 1 0
21152 // 0 1 0 1
21153 // 0 1 1 0
21154 // 1 0 0 0
21155 // 1 0 1 1
21156 // 1 1 0 1
21157 // 1 1 1 1
21158 //
21159 // Result going from high bit to low bit is 0x11100100 = 0xe4
21160 // ---------------------------------------
21161 
21162 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21163   match(Set dst (CopySignF dst src));
21164   effect(TEMP tmp1, TEMP tmp2);
21165   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21166   ins_encode %{
21167     __ movl($tmp2$$Register, 0x7FFFFFFF);
21168     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21169     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21170   %}
21171   ins_pipe( pipe_slow );
21172 %}
21173 
21174 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21175   match(Set dst (CopySignD dst (Binary src zero)));
21176   ins_cost(100);
21177   effect(TEMP tmp1, TEMP tmp2);
21178   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21179   ins_encode %{
21180     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21181     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21182     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21183   %}
21184   ins_pipe( pipe_slow );
21185 %}
21186 
21187 //----------------------------- CompressBits/ExpandBits ------------------------
21188 
21189 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21190   predicate(n->bottom_type()->isa_int());
21191   match(Set dst (CompressBits src mask));
21192   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21193   ins_encode %{
21194     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21195   %}
21196   ins_pipe( pipe_slow );
21197 %}
21198 
21199 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21200   predicate(n->bottom_type()->isa_int());
21201   match(Set dst (ExpandBits src mask));
21202   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21203   ins_encode %{
21204     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21205   %}
21206   ins_pipe( pipe_slow );
21207 %}
21208 
21209 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21210   predicate(n->bottom_type()->isa_int());
21211   match(Set dst (CompressBits src (LoadI mask)));
21212   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21213   ins_encode %{
21214     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21215   %}
21216   ins_pipe( pipe_slow );
21217 %}
21218 
21219 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21220   predicate(n->bottom_type()->isa_int());
21221   match(Set dst (ExpandBits src (LoadI mask)));
21222   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21223   ins_encode %{
21224     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21225   %}
21226   ins_pipe( pipe_slow );
21227 %}
21228 
21229 // --------------------------------- Sqrt --------------------------------------
21230 
21231 instruct vsqrtF_reg(vec dst, vec src) %{
21232   match(Set dst (SqrtVF src));
21233   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21234   ins_encode %{
21235     assert(UseAVX > 0, "required");
21236     int vlen_enc = vector_length_encoding(this);
21237     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21238   %}
21239   ins_pipe( pipe_slow );
21240 %}
21241 
21242 instruct vsqrtF_mem(vec dst, memory mem) %{
21243   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21244   match(Set dst (SqrtVF (LoadVector mem)));
21245   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21246   ins_encode %{
21247     assert(UseAVX > 0, "required");
21248     int vlen_enc = vector_length_encoding(this);
21249     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21250   %}
21251   ins_pipe( pipe_slow );
21252 %}
21253 
21254 // Floating point vector sqrt
21255 instruct vsqrtD_reg(vec dst, vec src) %{
21256   match(Set dst (SqrtVD src));
21257   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21258   ins_encode %{
21259     assert(UseAVX > 0, "required");
21260     int vlen_enc = vector_length_encoding(this);
21261     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21262   %}
21263   ins_pipe( pipe_slow );
21264 %}
21265 
21266 instruct vsqrtD_mem(vec dst, memory mem) %{
21267   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21268   match(Set dst (SqrtVD (LoadVector mem)));
21269   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21270   ins_encode %{
21271     assert(UseAVX > 0, "required");
21272     int vlen_enc = vector_length_encoding(this);
21273     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21274   %}
21275   ins_pipe( pipe_slow );
21276 %}
21277 
21278 // ------------------------------ Shift ---------------------------------------
21279 
21280 // Left and right shift count vectors are the same on x86
21281 // (only lowest bits of xmm reg are used for count).
21282 instruct vshiftcnt(vec dst, rRegI cnt) %{
21283   match(Set dst (LShiftCntV cnt));
21284   match(Set dst (RShiftCntV cnt));
21285   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21286   ins_encode %{
21287     __ movdl($dst$$XMMRegister, $cnt$$Register);
21288   %}
21289   ins_pipe( pipe_slow );
21290 %}
21291 
21292 // Byte vector shift
21293 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21294   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21295   match(Set dst ( LShiftVB src shift));
21296   match(Set dst ( RShiftVB src shift));
21297   match(Set dst (URShiftVB src shift));
21298   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21299   format %{"vector_byte_shift $dst,$src,$shift" %}
21300   ins_encode %{
21301     assert(UseSSE > 3, "required");
21302     int opcode = this->ideal_Opcode();
21303     bool sign = (opcode != Op_URShiftVB);
21304     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21305     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21306     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21307     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21308     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21309   %}
21310   ins_pipe( pipe_slow );
21311 %}
21312 
21313 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21314   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21315             UseAVX <= 1);
21316   match(Set dst ( LShiftVB src shift));
21317   match(Set dst ( RShiftVB src shift));
21318   match(Set dst (URShiftVB src shift));
21319   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21320   format %{"vector_byte_shift $dst,$src,$shift" %}
21321   ins_encode %{
21322     assert(UseSSE > 3, "required");
21323     int opcode = this->ideal_Opcode();
21324     bool sign = (opcode != Op_URShiftVB);
21325     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21326     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21327     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21328     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21329     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21330     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21331     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21332     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21333     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21334   %}
21335   ins_pipe( pipe_slow );
21336 %}
21337 
21338 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21339   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21340             UseAVX > 1);
21341   match(Set dst ( LShiftVB src shift));
21342   match(Set dst ( RShiftVB src shift));
21343   match(Set dst (URShiftVB src shift));
21344   effect(TEMP dst, TEMP tmp);
21345   format %{"vector_byte_shift $dst,$src,$shift" %}
21346   ins_encode %{
21347     int opcode = this->ideal_Opcode();
21348     bool sign = (opcode != Op_URShiftVB);
21349     int vlen_enc = Assembler::AVX_256bit;
21350     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21351     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21352     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21353     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21354     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21355   %}
21356   ins_pipe( pipe_slow );
21357 %}
21358 
21359 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21360   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21361   match(Set dst ( LShiftVB src shift));
21362   match(Set dst ( RShiftVB src shift));
21363   match(Set dst (URShiftVB src shift));
21364   effect(TEMP dst, TEMP tmp);
21365   format %{"vector_byte_shift $dst,$src,$shift" %}
21366   ins_encode %{
21367     assert(UseAVX > 1, "required");
21368     int opcode = this->ideal_Opcode();
21369     bool sign = (opcode != Op_URShiftVB);
21370     int vlen_enc = Assembler::AVX_256bit;
21371     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21372     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21373     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21374     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21375     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21377     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21378     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21379     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21380   %}
21381   ins_pipe( pipe_slow );
21382 %}
21383 
21384 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21385   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21386   match(Set dst ( LShiftVB src shift));
21387   match(Set dst  (RShiftVB src shift));
21388   match(Set dst (URShiftVB src shift));
21389   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21390   format %{"vector_byte_shift $dst,$src,$shift" %}
21391   ins_encode %{
21392     assert(UseAVX > 2, "required");
21393     int opcode = this->ideal_Opcode();
21394     bool sign = (opcode != Op_URShiftVB);
21395     int vlen_enc = Assembler::AVX_512bit;
21396     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21397     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21398     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21399     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21400     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21401     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21402     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21403     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21404     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21406     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21407     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21408   %}
21409   ins_pipe( pipe_slow );
21410 %}
21411 
21412 // Shorts vector logical right shift produces incorrect Java result
21413 // for negative data because java code convert short value into int with
21414 // sign extension before a shift. But char vectors are fine since chars are
21415 // unsigned values.
21416 // Shorts/Chars vector left shift
21417 instruct vshiftS(vec dst, vec src, vec shift) %{
21418   predicate(!n->as_ShiftV()->is_var_shift());
21419   match(Set dst ( LShiftVS src shift));
21420   match(Set dst ( RShiftVS src shift));
21421   match(Set dst (URShiftVS src shift));
21422   effect(TEMP dst, USE src, USE shift);
21423   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21424   ins_encode %{
21425     int opcode = this->ideal_Opcode();
21426     if (UseAVX > 0) {
21427       int vlen_enc = vector_length_encoding(this);
21428       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21429     } else {
21430       int vlen = Matcher::vector_length(this);
21431       if (vlen == 2) {
21432         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21433         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21434       } else if (vlen == 4) {
21435         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21436         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21437       } else {
21438         assert (vlen == 8, "sanity");
21439         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21440         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21441       }
21442     }
21443   %}
21444   ins_pipe( pipe_slow );
21445 %}
21446 
21447 // Integers vector left shift
21448 instruct vshiftI(vec dst, vec src, vec shift) %{
21449   predicate(!n->as_ShiftV()->is_var_shift());
21450   match(Set dst ( LShiftVI src shift));
21451   match(Set dst ( RShiftVI src shift));
21452   match(Set dst (URShiftVI src shift));
21453   effect(TEMP dst, USE src, USE shift);
21454   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21455   ins_encode %{
21456     int opcode = this->ideal_Opcode();
21457     if (UseAVX > 0) {
21458       int vlen_enc = vector_length_encoding(this);
21459       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21460     } else {
21461       int vlen = Matcher::vector_length(this);
21462       if (vlen == 2) {
21463         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21464         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21465       } else {
21466         assert(vlen == 4, "sanity");
21467         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21468         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21469       }
21470     }
21471   %}
21472   ins_pipe( pipe_slow );
21473 %}
21474 
21475 // Integers vector left constant shift
21476 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21477   match(Set dst (LShiftVI src (LShiftCntV shift)));
21478   match(Set dst (RShiftVI src (RShiftCntV shift)));
21479   match(Set dst (URShiftVI src (RShiftCntV shift)));
21480   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21481   ins_encode %{
21482     int opcode = this->ideal_Opcode();
21483     if (UseAVX > 0) {
21484       int vector_len = vector_length_encoding(this);
21485       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21486     } else {
21487       int vlen = Matcher::vector_length(this);
21488       if (vlen == 2) {
21489         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21490         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21491       } else {
21492         assert(vlen == 4, "sanity");
21493         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21494         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21495       }
21496     }
21497   %}
21498   ins_pipe( pipe_slow );
21499 %}
21500 
21501 // Longs vector shift
21502 instruct vshiftL(vec dst, vec src, vec shift) %{
21503   predicate(!n->as_ShiftV()->is_var_shift());
21504   match(Set dst ( LShiftVL src shift));
21505   match(Set dst (URShiftVL src shift));
21506   effect(TEMP dst, USE src, USE shift);
21507   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21508   ins_encode %{
21509     int opcode = this->ideal_Opcode();
21510     if (UseAVX > 0) {
21511       int vlen_enc = vector_length_encoding(this);
21512       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21513     } else {
21514       assert(Matcher::vector_length(this) == 2, "");
21515       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21516       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21517     }
21518   %}
21519   ins_pipe( pipe_slow );
21520 %}
21521 
21522 // Longs vector constant shift
21523 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21524   match(Set dst (LShiftVL src (LShiftCntV shift)));
21525   match(Set dst (URShiftVL src (RShiftCntV shift)));
21526   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21527   ins_encode %{
21528     int opcode = this->ideal_Opcode();
21529     if (UseAVX > 0) {
21530       int vector_len = vector_length_encoding(this);
21531       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21532     } else {
21533       assert(Matcher::vector_length(this) == 2, "");
21534       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21535       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21536     }
21537   %}
21538   ins_pipe( pipe_slow );
21539 %}
21540 
21541 // -------------------ArithmeticRightShift -----------------------------------
21542 // Long vector arithmetic right shift
21543 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21544   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21545   match(Set dst (RShiftVL src shift));
21546   effect(TEMP dst, TEMP tmp);
21547   format %{ "vshiftq $dst,$src,$shift" %}
21548   ins_encode %{
21549     uint vlen = Matcher::vector_length(this);
21550     if (vlen == 2) {
21551       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21552       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21553       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21554       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21555       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21556       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21557     } else {
21558       assert(vlen == 4, "sanity");
21559       assert(UseAVX > 1, "required");
21560       int vlen_enc = Assembler::AVX_256bit;
21561       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21563       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21564       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21565       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21566     }
21567   %}
21568   ins_pipe( pipe_slow );
21569 %}
21570 
21571 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21572   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21573   match(Set dst (RShiftVL src shift));
21574   format %{ "vshiftq $dst,$src,$shift" %}
21575   ins_encode %{
21576     int vlen_enc = vector_length_encoding(this);
21577     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21578   %}
21579   ins_pipe( pipe_slow );
21580 %}
21581 
21582 // ------------------- Variable Shift -----------------------------
21583 // Byte variable shift
21584 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21585   predicate(Matcher::vector_length(n) <= 8 &&
21586             n->as_ShiftV()->is_var_shift() &&
21587             !VM_Version::supports_avx512bw());
21588   match(Set dst ( LShiftVB src shift));
21589   match(Set dst ( RShiftVB src shift));
21590   match(Set dst (URShiftVB src shift));
21591   effect(TEMP dst, TEMP vtmp);
21592   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21593   ins_encode %{
21594     assert(UseAVX >= 2, "required");
21595 
21596     int opcode = this->ideal_Opcode();
21597     int vlen_enc = Assembler::AVX_128bit;
21598     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21599     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21600   %}
21601   ins_pipe( pipe_slow );
21602 %}
21603 
21604 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21605   predicate(Matcher::vector_length(n) == 16 &&
21606             n->as_ShiftV()->is_var_shift() &&
21607             !VM_Version::supports_avx512bw());
21608   match(Set dst ( LShiftVB src shift));
21609   match(Set dst ( RShiftVB src shift));
21610   match(Set dst (URShiftVB src shift));
21611   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21612   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21613   ins_encode %{
21614     assert(UseAVX >= 2, "required");
21615 
21616     int opcode = this->ideal_Opcode();
21617     int vlen_enc = Assembler::AVX_128bit;
21618     // Shift lower half and get word result in dst
21619     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21620 
21621     // Shift upper half and get word result in vtmp1
21622     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21623     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21624     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21625 
21626     // Merge and down convert the two word results to byte in dst
21627     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21628   %}
21629   ins_pipe( pipe_slow );
21630 %}
21631 
21632 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21633   predicate(Matcher::vector_length(n) == 32 &&
21634             n->as_ShiftV()->is_var_shift() &&
21635             !VM_Version::supports_avx512bw());
21636   match(Set dst ( LShiftVB src shift));
21637   match(Set dst ( RShiftVB src shift));
21638   match(Set dst (URShiftVB src shift));
21639   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21640   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21641   ins_encode %{
21642     assert(UseAVX >= 2, "required");
21643 
21644     int opcode = this->ideal_Opcode();
21645     int vlen_enc = Assembler::AVX_128bit;
21646     // Process lower 128 bits and get result in dst
21647     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21648     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21649     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21650     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21651     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21652 
21653     // Process higher 128 bits and get result in vtmp3
21654     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21655     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21656     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21657     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21658     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21659     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21660     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21661 
21662     // Merge the two results in dst
21663     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21664   %}
21665   ins_pipe( pipe_slow );
21666 %}
21667 
21668 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21669   predicate(Matcher::vector_length(n) <= 32 &&
21670             n->as_ShiftV()->is_var_shift() &&
21671             VM_Version::supports_avx512bw());
21672   match(Set dst ( LShiftVB src shift));
21673   match(Set dst ( RShiftVB src shift));
21674   match(Set dst (URShiftVB src shift));
21675   effect(TEMP dst, TEMP vtmp);
21676   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21677   ins_encode %{
21678     assert(UseAVX > 2, "required");
21679 
21680     int opcode = this->ideal_Opcode();
21681     int vlen_enc = vector_length_encoding(this);
21682     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21683   %}
21684   ins_pipe( pipe_slow );
21685 %}
21686 
21687 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21688   predicate(Matcher::vector_length(n) == 64 &&
21689             n->as_ShiftV()->is_var_shift() &&
21690             VM_Version::supports_avx512bw());
21691   match(Set dst ( LShiftVB src shift));
21692   match(Set dst ( RShiftVB src shift));
21693   match(Set dst (URShiftVB src shift));
21694   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21695   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21696   ins_encode %{
21697     assert(UseAVX > 2, "required");
21698 
21699     int opcode = this->ideal_Opcode();
21700     int vlen_enc = Assembler::AVX_256bit;
21701     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21702     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21703     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21704     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21705     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21706   %}
21707   ins_pipe( pipe_slow );
21708 %}
21709 
21710 // Short variable shift
21711 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21712   predicate(Matcher::vector_length(n) <= 8 &&
21713             n->as_ShiftV()->is_var_shift() &&
21714             !VM_Version::supports_avx512bw());
21715   match(Set dst ( LShiftVS src shift));
21716   match(Set dst ( RShiftVS src shift));
21717   match(Set dst (URShiftVS src shift));
21718   effect(TEMP dst, TEMP vtmp);
21719   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21720   ins_encode %{
21721     assert(UseAVX >= 2, "required");
21722 
21723     int opcode = this->ideal_Opcode();
21724     bool sign = (opcode != Op_URShiftVS);
21725     int vlen_enc = Assembler::AVX_256bit;
21726     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21727     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21728     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21729     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21730     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21731     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21732   %}
21733   ins_pipe( pipe_slow );
21734 %}
21735 
21736 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21737   predicate(Matcher::vector_length(n) == 16 &&
21738             n->as_ShiftV()->is_var_shift() &&
21739             !VM_Version::supports_avx512bw());
21740   match(Set dst ( LShiftVS src shift));
21741   match(Set dst ( RShiftVS src shift));
21742   match(Set dst (URShiftVS src shift));
21743   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21744   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21745   ins_encode %{
21746     assert(UseAVX >= 2, "required");
21747 
21748     int opcode = this->ideal_Opcode();
21749     bool sign = (opcode != Op_URShiftVS);
21750     int vlen_enc = Assembler::AVX_256bit;
21751     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21752     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21753     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21754     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21755     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21756 
21757     // Shift upper half, with result in dst using vtmp1 as TEMP
21758     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21759     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21760     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21762     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21763     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21764 
21765     // Merge lower and upper half result into dst
21766     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21767     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21768   %}
21769   ins_pipe( pipe_slow );
21770 %}
21771 
21772 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21773   predicate(n->as_ShiftV()->is_var_shift() &&
21774             VM_Version::supports_avx512bw());
21775   match(Set dst ( LShiftVS src shift));
21776   match(Set dst ( RShiftVS src shift));
21777   match(Set dst (URShiftVS src shift));
21778   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21779   ins_encode %{
21780     assert(UseAVX > 2, "required");
21781 
21782     int opcode = this->ideal_Opcode();
21783     int vlen_enc = vector_length_encoding(this);
21784     if (!VM_Version::supports_avx512vl()) {
21785       vlen_enc = Assembler::AVX_512bit;
21786     }
21787     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21788   %}
21789   ins_pipe( pipe_slow );
21790 %}
21791 
21792 //Integer variable shift
21793 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21794   predicate(n->as_ShiftV()->is_var_shift());
21795   match(Set dst ( LShiftVI src shift));
21796   match(Set dst ( RShiftVI src shift));
21797   match(Set dst (URShiftVI src shift));
21798   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21799   ins_encode %{
21800     assert(UseAVX >= 2, "required");
21801 
21802     int opcode = this->ideal_Opcode();
21803     int vlen_enc = vector_length_encoding(this);
21804     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21805   %}
21806   ins_pipe( pipe_slow );
21807 %}
21808 
21809 //Long variable shift
21810 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21811   predicate(n->as_ShiftV()->is_var_shift());
21812   match(Set dst ( LShiftVL src shift));
21813   match(Set dst (URShiftVL src shift));
21814   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21815   ins_encode %{
21816     assert(UseAVX >= 2, "required");
21817 
21818     int opcode = this->ideal_Opcode();
21819     int vlen_enc = vector_length_encoding(this);
21820     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21821   %}
21822   ins_pipe( pipe_slow );
21823 %}
21824 
21825 //Long variable right shift arithmetic
21826 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21827   predicate(Matcher::vector_length(n) <= 4 &&
21828             n->as_ShiftV()->is_var_shift() &&
21829             UseAVX == 2);
21830   match(Set dst (RShiftVL src shift));
21831   effect(TEMP dst, TEMP vtmp);
21832   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21833   ins_encode %{
21834     int opcode = this->ideal_Opcode();
21835     int vlen_enc = vector_length_encoding(this);
21836     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21837                  $vtmp$$XMMRegister);
21838   %}
21839   ins_pipe( pipe_slow );
21840 %}
21841 
21842 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21843   predicate(n->as_ShiftV()->is_var_shift() &&
21844             UseAVX > 2);
21845   match(Set dst (RShiftVL src shift));
21846   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21847   ins_encode %{
21848     int opcode = this->ideal_Opcode();
21849     int vlen_enc = vector_length_encoding(this);
21850     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21851   %}
21852   ins_pipe( pipe_slow );
21853 %}
21854 
21855 // --------------------------------- AND --------------------------------------
21856 
21857 instruct vand(vec dst, vec src) %{
21858   predicate(UseAVX == 0);
21859   match(Set dst (AndV dst src));
21860   format %{ "pand    $dst,$src\t! and vectors" %}
21861   ins_encode %{
21862     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21863   %}
21864   ins_pipe( pipe_slow );
21865 %}
21866 
21867 instruct vand_reg(vec dst, vec src1, vec src2) %{
21868   predicate(UseAVX > 0);
21869   match(Set dst (AndV src1 src2));
21870   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21871   ins_encode %{
21872     int vlen_enc = vector_length_encoding(this);
21873     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21874   %}
21875   ins_pipe( pipe_slow );
21876 %}
21877 
21878 instruct vand_mem(vec dst, vec src, memory mem) %{
21879   predicate((UseAVX > 0) &&
21880             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21881   match(Set dst (AndV src (LoadVector mem)));
21882   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21883   ins_encode %{
21884     int vlen_enc = vector_length_encoding(this);
21885     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21886   %}
21887   ins_pipe( pipe_slow );
21888 %}
21889 
21890 // --------------------------------- OR ---------------------------------------
21891 
21892 instruct vor(vec dst, vec src) %{
21893   predicate(UseAVX == 0);
21894   match(Set dst (OrV dst src));
21895   format %{ "por     $dst,$src\t! or vectors" %}
21896   ins_encode %{
21897     __ por($dst$$XMMRegister, $src$$XMMRegister);
21898   %}
21899   ins_pipe( pipe_slow );
21900 %}
21901 
21902 instruct vor_reg(vec dst, vec src1, vec src2) %{
21903   predicate(UseAVX > 0);
21904   match(Set dst (OrV src1 src2));
21905   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21906   ins_encode %{
21907     int vlen_enc = vector_length_encoding(this);
21908     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21909   %}
21910   ins_pipe( pipe_slow );
21911 %}
21912 
21913 instruct vor_mem(vec dst, vec src, memory mem) %{
21914   predicate((UseAVX > 0) &&
21915             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21916   match(Set dst (OrV src (LoadVector mem)));
21917   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21918   ins_encode %{
21919     int vlen_enc = vector_length_encoding(this);
21920     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21921   %}
21922   ins_pipe( pipe_slow );
21923 %}
21924 
21925 // --------------------------------- XOR --------------------------------------
21926 
21927 instruct vxor(vec dst, vec src) %{
21928   predicate(UseAVX == 0);
21929   match(Set dst (XorV dst src));
21930   format %{ "pxor    $dst,$src\t! xor vectors" %}
21931   ins_encode %{
21932     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21933   %}
21934   ins_pipe( pipe_slow );
21935 %}
21936 
21937 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21938   predicate(UseAVX > 0);
21939   match(Set dst (XorV src1 src2));
21940   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21941   ins_encode %{
21942     int vlen_enc = vector_length_encoding(this);
21943     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21944   %}
21945   ins_pipe( pipe_slow );
21946 %}
21947 
21948 instruct vxor_mem(vec dst, vec src, memory mem) %{
21949   predicate((UseAVX > 0) &&
21950             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21951   match(Set dst (XorV src (LoadVector mem)));
21952   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21953   ins_encode %{
21954     int vlen_enc = vector_length_encoding(this);
21955     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21956   %}
21957   ins_pipe( pipe_slow );
21958 %}
21959 
21960 // --------------------------------- VectorCast --------------------------------------
21961 
21962 instruct vcastBtoX(vec dst, vec src) %{
21963   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21964   match(Set dst (VectorCastB2X src));
21965   format %{ "vector_cast_b2x $dst,$src\t!" %}
21966   ins_encode %{
21967     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21968     int vlen_enc = vector_length_encoding(this);
21969     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21970   %}
21971   ins_pipe( pipe_slow );
21972 %}
21973 
21974 instruct vcastBtoD(legVec dst, legVec src) %{
21975   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21976   match(Set dst (VectorCastB2X src));
21977   format %{ "vector_cast_b2x $dst,$src\t!" %}
21978   ins_encode %{
21979     int vlen_enc = vector_length_encoding(this);
21980     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21981   %}
21982   ins_pipe( pipe_slow );
21983 %}
21984 
21985 instruct castStoX(vec dst, vec src) %{
21986   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21987             Matcher::vector_length(n->in(1)) <= 8 && // src
21988             Matcher::vector_element_basic_type(n) == T_BYTE);
21989   match(Set dst (VectorCastS2X src));
21990   format %{ "vector_cast_s2x $dst,$src" %}
21991   ins_encode %{
21992     assert(UseAVX > 0, "required");
21993 
21994     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21995     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21996   %}
21997   ins_pipe( pipe_slow );
21998 %}
21999 
22000 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22001   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22002             Matcher::vector_length(n->in(1)) == 16 && // src
22003             Matcher::vector_element_basic_type(n) == T_BYTE);
22004   effect(TEMP dst, TEMP vtmp);
22005   match(Set dst (VectorCastS2X src));
22006   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22007   ins_encode %{
22008     assert(UseAVX > 0, "required");
22009 
22010     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22011     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22012     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22013     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22014   %}
22015   ins_pipe( pipe_slow );
22016 %}
22017 
22018 instruct vcastStoX_evex(vec dst, vec src) %{
22019   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22020             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22021   match(Set dst (VectorCastS2X src));
22022   format %{ "vector_cast_s2x $dst,$src\t!" %}
22023   ins_encode %{
22024     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22025     int src_vlen_enc = vector_length_encoding(this, $src);
22026     int vlen_enc = vector_length_encoding(this);
22027     switch (to_elem_bt) {
22028       case T_BYTE:
22029         if (!VM_Version::supports_avx512vl()) {
22030           vlen_enc = Assembler::AVX_512bit;
22031         }
22032         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22033         break;
22034       case T_INT:
22035         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036         break;
22037       case T_FLOAT:
22038         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22039         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22040         break;
22041       case T_LONG:
22042         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043         break;
22044       case T_DOUBLE: {
22045         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22046         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22047         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22048         break;
22049       }
22050       default:
22051         ShouldNotReachHere();
22052     }
22053   %}
22054   ins_pipe( pipe_slow );
22055 %}
22056 
22057 instruct castItoX(vec dst, vec src) %{
22058   predicate(UseAVX <= 2 &&
22059             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22060             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22061   match(Set dst (VectorCastI2X src));
22062   format %{ "vector_cast_i2x $dst,$src" %}
22063   ins_encode %{
22064     assert(UseAVX > 0, "required");
22065 
22066     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22067     int vlen_enc = vector_length_encoding(this, $src);
22068 
22069     if (to_elem_bt == T_BYTE) {
22070       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22071       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22072       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22073     } else {
22074       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22075       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22076       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22077     }
22078   %}
22079   ins_pipe( pipe_slow );
22080 %}
22081 
22082 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22083   predicate(UseAVX <= 2 &&
22084             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22085             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22086   match(Set dst (VectorCastI2X src));
22087   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22088   effect(TEMP dst, TEMP vtmp);
22089   ins_encode %{
22090     assert(UseAVX > 0, "required");
22091 
22092     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22093     int vlen_enc = vector_length_encoding(this, $src);
22094 
22095     if (to_elem_bt == T_BYTE) {
22096       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22097       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22098       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22099       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22100     } else {
22101       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22102       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22103       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22104       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22105     }
22106   %}
22107   ins_pipe( pipe_slow );
22108 %}
22109 
22110 instruct vcastItoX_evex(vec dst, vec src) %{
22111   predicate(UseAVX > 2 ||
22112             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22113   match(Set dst (VectorCastI2X src));
22114   format %{ "vector_cast_i2x $dst,$src\t!" %}
22115   ins_encode %{
22116     assert(UseAVX > 0, "required");
22117 
22118     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22119     int src_vlen_enc = vector_length_encoding(this, $src);
22120     int dst_vlen_enc = vector_length_encoding(this);
22121     switch (dst_elem_bt) {
22122       case T_BYTE:
22123         if (!VM_Version::supports_avx512vl()) {
22124           src_vlen_enc = Assembler::AVX_512bit;
22125         }
22126         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22127         break;
22128       case T_SHORT:
22129         if (!VM_Version::supports_avx512vl()) {
22130           src_vlen_enc = Assembler::AVX_512bit;
22131         }
22132         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22133         break;
22134       case T_FLOAT:
22135         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136         break;
22137       case T_LONG:
22138         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22139         break;
22140       case T_DOUBLE:
22141         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22142         break;
22143       default:
22144         ShouldNotReachHere();
22145     }
22146   %}
22147   ins_pipe( pipe_slow );
22148 %}
22149 
22150 instruct vcastLtoBS(vec dst, vec src) %{
22151   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22152             UseAVX <= 2);
22153   match(Set dst (VectorCastL2X src));
22154   format %{ "vector_cast_l2x  $dst,$src" %}
22155   ins_encode %{
22156     assert(UseAVX > 0, "required");
22157 
22158     int vlen = Matcher::vector_length_in_bytes(this, $src);
22159     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22160     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22161                                                       : ExternalAddress(vector_int_to_short_mask());
22162     if (vlen <= 16) {
22163       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22164       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22165       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166     } else {
22167       assert(vlen <= 32, "required");
22168       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22169       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22170       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22171       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22172     }
22173     if (to_elem_bt == T_BYTE) {
22174       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22175     }
22176   %}
22177   ins_pipe( pipe_slow );
22178 %}
22179 
22180 instruct vcastLtoX_evex(vec dst, vec src) %{
22181   predicate(UseAVX > 2 ||
22182             (Matcher::vector_element_basic_type(n) == T_INT ||
22183              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22184              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22185   match(Set dst (VectorCastL2X src));
22186   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22187   ins_encode %{
22188     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22189     int vlen = Matcher::vector_length_in_bytes(this, $src);
22190     int vlen_enc = vector_length_encoding(this, $src);
22191     switch (to_elem_bt) {
22192       case T_BYTE:
22193         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22194           vlen_enc = Assembler::AVX_512bit;
22195         }
22196         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22197         break;
22198       case T_SHORT:
22199         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22200           vlen_enc = Assembler::AVX_512bit;
22201         }
22202         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22203         break;
22204       case T_INT:
22205         if (vlen == 8) {
22206           if ($dst$$XMMRegister != $src$$XMMRegister) {
22207             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22208           }
22209         } else if (vlen == 16) {
22210           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22211         } else if (vlen == 32) {
22212           if (UseAVX > 2) {
22213             if (!VM_Version::supports_avx512vl()) {
22214               vlen_enc = Assembler::AVX_512bit;
22215             }
22216             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22217           } else {
22218             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22219             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22220           }
22221         } else { // vlen == 64
22222           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22223         }
22224         break;
22225       case T_FLOAT:
22226         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22227         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22228         break;
22229       case T_DOUBLE:
22230         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22231         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22232         break;
22233 
22234       default: assert(false, "%s", type2name(to_elem_bt));
22235     }
22236   %}
22237   ins_pipe( pipe_slow );
22238 %}
22239 
22240 instruct vcastFtoD_reg(vec dst, vec src) %{
22241   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22242   match(Set dst (VectorCastF2X src));
22243   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22244   ins_encode %{
22245     int vlen_enc = vector_length_encoding(this);
22246     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22247   %}
22248   ins_pipe( pipe_slow );
22249 %}
22250 
22251 
22252 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22253   predicate(!VM_Version::supports_avx10_2() &&
22254             !VM_Version::supports_avx512vl() &&
22255             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22256             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22257             is_integral_type(Matcher::vector_element_basic_type(n)));
22258   match(Set dst (VectorCastF2X src));
22259   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22260   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22261   ins_encode %{
22262     int vlen_enc = vector_length_encoding(this, $src);
22263     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22264     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22265     // 32 bit addresses for register indirect addressing mode since stub constants
22266     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22267     // However, targets are free to increase this limit, but having a large code cache size
22268     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22269     // cap we save a temporary register allocation which in limiting case can prevent
22270     // spilling in high register pressure blocks.
22271     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22272                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22273                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22274   %}
22275   ins_pipe( pipe_slow );
22276 %}
22277 
22278 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22279   predicate(!VM_Version::supports_avx10_2() &&
22280             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22281             is_integral_type(Matcher::vector_element_basic_type(n)));
22282   match(Set dst (VectorCastF2X src));
22283   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22284   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22285   ins_encode %{
22286     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22287     if (to_elem_bt == T_LONG) {
22288       int vlen_enc = vector_length_encoding(this);
22289       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22290                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22291                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22292     } else {
22293       int vlen_enc = vector_length_encoding(this, $src);
22294       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22295                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22296                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22297     }
22298   %}
22299   ins_pipe( pipe_slow );
22300 %}
22301 
22302 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22303   predicate(VM_Version::supports_avx10_2() &&
22304             is_integral_type(Matcher::vector_element_basic_type(n)));
22305   match(Set dst (VectorCastF2X src));
22306   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22307   ins_encode %{
22308     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22309     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22310     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22311   %}
22312   ins_pipe( pipe_slow );
22313 %}
22314 
22315 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22316   predicate(VM_Version::supports_avx10_2() &&
22317             is_integral_type(Matcher::vector_element_basic_type(n)));
22318   match(Set dst (VectorCastF2X (LoadVector src)));
22319   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22320   ins_encode %{
22321     int vlen = Matcher::vector_length(this);
22322     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22323     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22324     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22325   %}
22326   ins_pipe( pipe_slow );
22327 %}
22328 
22329 instruct vcastDtoF_reg(vec dst, vec src) %{
22330   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22331   match(Set dst (VectorCastD2X src));
22332   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22333   ins_encode %{
22334     int vlen_enc = vector_length_encoding(this, $src);
22335     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22336   %}
22337   ins_pipe( pipe_slow );
22338 %}
22339 
22340 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22341   predicate(!VM_Version::supports_avx10_2() &&
22342             !VM_Version::supports_avx512vl() &&
22343             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22344             is_integral_type(Matcher::vector_element_basic_type(n)));
22345   match(Set dst (VectorCastD2X src));
22346   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22347   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22348   ins_encode %{
22349     int vlen_enc = vector_length_encoding(this, $src);
22350     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22351     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22352                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22353                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22354   %}
22355   ins_pipe( pipe_slow );
22356 %}
22357 
22358 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22359   predicate(!VM_Version::supports_avx10_2() &&
22360             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22361             is_integral_type(Matcher::vector_element_basic_type(n)));
22362   match(Set dst (VectorCastD2X src));
22363   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22364   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22365   ins_encode %{
22366     int vlen_enc = vector_length_encoding(this, $src);
22367     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22368     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22369                               ExternalAddress(vector_float_signflip());
22370     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22371                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22372   %}
22373   ins_pipe( pipe_slow );
22374 %}
22375 
22376 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22377   predicate(VM_Version::supports_avx10_2() &&
22378             is_integral_type(Matcher::vector_element_basic_type(n)));
22379   match(Set dst (VectorCastD2X src));
22380   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22381   ins_encode %{
22382     int vlen_enc = vector_length_encoding(this, $src);
22383     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22384     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22385   %}
22386   ins_pipe( pipe_slow );
22387 %}
22388 
22389 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22390   predicate(VM_Version::supports_avx10_2() &&
22391             is_integral_type(Matcher::vector_element_basic_type(n)));
22392   match(Set dst (VectorCastD2X (LoadVector src)));
22393   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22394   ins_encode %{
22395     int vlen = Matcher::vector_length(this);
22396     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22397     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22398     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22399   %}
22400   ins_pipe( pipe_slow );
22401 %}
22402 
22403 instruct vucast(vec dst, vec src) %{
22404   match(Set dst (VectorUCastB2X src));
22405   match(Set dst (VectorUCastS2X src));
22406   match(Set dst (VectorUCastI2X src));
22407   format %{ "vector_ucast $dst,$src\t!" %}
22408   ins_encode %{
22409     assert(UseAVX > 0, "required");
22410 
22411     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22412     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22413     int vlen_enc = vector_length_encoding(this);
22414     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22415   %}
22416   ins_pipe( pipe_slow );
22417 %}
22418 
22419 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22420   predicate(!VM_Version::supports_avx512vl() &&
22421             Matcher::vector_length_in_bytes(n) < 64 &&
22422             Matcher::vector_element_basic_type(n) == T_INT);
22423   match(Set dst (RoundVF src));
22424   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22425   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22426   ins_encode %{
22427     int vlen_enc = vector_length_encoding(this);
22428     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22429     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22430                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22431                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22432   %}
22433   ins_pipe( pipe_slow );
22434 %}
22435 
22436 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22437   predicate((VM_Version::supports_avx512vl() ||
22438              Matcher::vector_length_in_bytes(n) == 64) &&
22439              Matcher::vector_element_basic_type(n) == T_INT);
22440   match(Set dst (RoundVF src));
22441   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22442   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22443   ins_encode %{
22444     int vlen_enc = vector_length_encoding(this);
22445     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22446     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22447                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22448                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22449   %}
22450   ins_pipe( pipe_slow );
22451 %}
22452 
22453 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22454   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22455   match(Set dst (RoundVD src));
22456   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22457   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22458   ins_encode %{
22459     int vlen_enc = vector_length_encoding(this);
22460     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22461     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22462                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22463                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22464   %}
22465   ins_pipe( pipe_slow );
22466 %}
22467 
22468 // --------------------------------- VectorMaskCmp --------------------------------------
22469 
22470 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22471   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22472             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22473             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22474             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22475   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22476   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22477   ins_encode %{
22478     int vlen_enc = vector_length_encoding(this, $src1);
22479     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22480     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22481       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22482     } else {
22483       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22484     }
22485   %}
22486   ins_pipe( pipe_slow );
22487 %}
22488 
22489 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22490   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22491             n->bottom_type()->isa_vectmask() == nullptr &&
22492             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22493   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22494   effect(TEMP ktmp);
22495   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22496   ins_encode %{
22497     int vlen_enc = Assembler::AVX_512bit;
22498     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22499     KRegister mask = k0; // The comparison itself is not being masked.
22500     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22501       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22502       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22503     } else {
22504       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22505       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22506     }
22507   %}
22508   ins_pipe( pipe_slow );
22509 %}
22510 
22511 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22512   predicate(n->bottom_type()->isa_vectmask() &&
22513             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22514   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22515   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22516   ins_encode %{
22517     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22518     int vlen_enc = vector_length_encoding(this, $src1);
22519     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22520     KRegister mask = k0; // The comparison itself is not being masked.
22521     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22522       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22523     } else {
22524       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22525     }
22526   %}
22527   ins_pipe( pipe_slow );
22528 %}
22529 
22530 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22531   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22532             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22533             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22534             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22535             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22536             (n->in(2)->get_int() == BoolTest::eq ||
22537              n->in(2)->get_int() == BoolTest::lt ||
22538              n->in(2)->get_int() == BoolTest::gt)); // cond
22539   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22540   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22541   ins_encode %{
22542     int vlen_enc = vector_length_encoding(this, $src1);
22543     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22544     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22545     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22546   %}
22547   ins_pipe( pipe_slow );
22548 %}
22549 
22550 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22551   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22552             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22553             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22554             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22555             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22556             (n->in(2)->get_int() == BoolTest::ne ||
22557              n->in(2)->get_int() == BoolTest::le ||
22558              n->in(2)->get_int() == BoolTest::ge)); // cond
22559   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22560   effect(TEMP dst, TEMP xtmp);
22561   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22562   ins_encode %{
22563     int vlen_enc = vector_length_encoding(this, $src1);
22564     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22565     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22566     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22567   %}
22568   ins_pipe( pipe_slow );
22569 %}
22570 
22571 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22572   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22573             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22574             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22575             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22576             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22577   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22578   effect(TEMP dst, TEMP xtmp);
22579   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22580   ins_encode %{
22581     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22582     int vlen_enc = vector_length_encoding(this, $src1);
22583     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22584     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22585 
22586     if (vlen_enc == Assembler::AVX_128bit) {
22587       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22588     } else {
22589       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22590     }
22591     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22592     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22593     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22594   %}
22595   ins_pipe( pipe_slow );
22596 %}
22597 
22598 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22599   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22600              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22601              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22602   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22603   effect(TEMP ktmp);
22604   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22605   ins_encode %{
22606     assert(UseAVX > 2, "required");
22607 
22608     int vlen_enc = vector_length_encoding(this, $src1);
22609     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22610     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22611     KRegister mask = k0; // The comparison itself is not being masked.
22612     bool merge = false;
22613     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22614 
22615     switch (src1_elem_bt) {
22616       case T_INT: {
22617         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22618         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22619         break;
22620       }
22621       case T_LONG: {
22622         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22623         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22624         break;
22625       }
22626       default: assert(false, "%s", type2name(src1_elem_bt));
22627     }
22628   %}
22629   ins_pipe( pipe_slow );
22630 %}
22631 
22632 
22633 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22634   predicate(n->bottom_type()->isa_vectmask() &&
22635             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22636   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22637   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22638   ins_encode %{
22639     assert(UseAVX > 2, "required");
22640     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22641 
22642     int vlen_enc = vector_length_encoding(this, $src1);
22643     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22644     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22645     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22646 
22647     // Comparison i
22648     switch (src1_elem_bt) {
22649       case T_BYTE: {
22650         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22651         break;
22652       }
22653       case T_SHORT: {
22654         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22655         break;
22656       }
22657       case T_INT: {
22658         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22659         break;
22660       }
22661       case T_LONG: {
22662         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22663         break;
22664       }
22665       default: assert(false, "%s", type2name(src1_elem_bt));
22666     }
22667   %}
22668   ins_pipe( pipe_slow );
22669 %}
22670 
22671 // Extract
22672 
22673 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22674   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22675   match(Set dst (ExtractI src idx));
22676   match(Set dst (ExtractS src idx));
22677   match(Set dst (ExtractB src idx));
22678   format %{ "extractI $dst,$src,$idx\t!" %}
22679   ins_encode %{
22680     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22681 
22682     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22683     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22684   %}
22685   ins_pipe( pipe_slow );
22686 %}
22687 
22688 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22689   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22690             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22691   match(Set dst (ExtractI src idx));
22692   match(Set dst (ExtractS src idx));
22693   match(Set dst (ExtractB src idx));
22694   effect(TEMP vtmp);
22695   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22696   ins_encode %{
22697     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22698 
22699     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22700     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22701     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22702   %}
22703   ins_pipe( pipe_slow );
22704 %}
22705 
22706 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22707   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22708   match(Set dst (ExtractL src idx));
22709   format %{ "extractL $dst,$src,$idx\t!" %}
22710   ins_encode %{
22711     assert(UseSSE >= 4, "required");
22712     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22713 
22714     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22715   %}
22716   ins_pipe( pipe_slow );
22717 %}
22718 
22719 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22720   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22721             Matcher::vector_length(n->in(1)) == 8);  // src
22722   match(Set dst (ExtractL src idx));
22723   effect(TEMP vtmp);
22724   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22725   ins_encode %{
22726     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22727 
22728     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22729     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22730   %}
22731   ins_pipe( pipe_slow );
22732 %}
22733 
22734 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22735   predicate(Matcher::vector_length(n->in(1)) <= 4);
22736   match(Set dst (ExtractF src idx));
22737   effect(TEMP dst, TEMP vtmp);
22738   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22739   ins_encode %{
22740     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22741 
22742     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22743   %}
22744   ins_pipe( pipe_slow );
22745 %}
22746 
22747 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22748   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22749             Matcher::vector_length(n->in(1)/*src*/) == 16);
22750   match(Set dst (ExtractF src idx));
22751   effect(TEMP vtmp);
22752   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22753   ins_encode %{
22754     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22755 
22756     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22757     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22758   %}
22759   ins_pipe( pipe_slow );
22760 %}
22761 
22762 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22763   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22764   match(Set dst (ExtractD src idx));
22765   format %{ "extractD $dst,$src,$idx\t!" %}
22766   ins_encode %{
22767     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22768 
22769     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22770   %}
22771   ins_pipe( pipe_slow );
22772 %}
22773 
22774 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22775   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22776             Matcher::vector_length(n->in(1)) == 8);  // src
22777   match(Set dst (ExtractD src idx));
22778   effect(TEMP vtmp);
22779   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22780   ins_encode %{
22781     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22782 
22783     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22784     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22785   %}
22786   ins_pipe( pipe_slow );
22787 %}
22788 
22789 // --------------------------------- Vector Blend --------------------------------------
22790 
22791 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22792   predicate(UseAVX == 0);
22793   match(Set dst (VectorBlend (Binary dst src) mask));
22794   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22795   effect(TEMP tmp);
22796   ins_encode %{
22797     assert(UseSSE >= 4, "required");
22798 
22799     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22800       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22801     }
22802     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22803   %}
22804   ins_pipe( pipe_slow );
22805 %}
22806 
22807 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22808   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22809             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22810             Matcher::vector_length_in_bytes(n) <= 32 &&
22811             is_integral_type(Matcher::vector_element_basic_type(n)));
22812   match(Set dst (VectorBlend (Binary src1 src2) mask));
22813   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22814   ins_encode %{
22815     int vlen_enc = vector_length_encoding(this);
22816     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22817   %}
22818   ins_pipe( pipe_slow );
22819 %}
22820 
22821 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22822   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22823             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22824             Matcher::vector_length_in_bytes(n) <= 32 &&
22825             !is_integral_type(Matcher::vector_element_basic_type(n)));
22826   match(Set dst (VectorBlend (Binary src1 src2) mask));
22827   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22828   ins_encode %{
22829     int vlen_enc = vector_length_encoding(this);
22830     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22831   %}
22832   ins_pipe( pipe_slow );
22833 %}
22834 
22835 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22836   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22837             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22838             Matcher::vector_length_in_bytes(n) <= 32);
22839   match(Set dst (VectorBlend (Binary src1 src2) mask));
22840   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22841   effect(TEMP vtmp, TEMP dst);
22842   ins_encode %{
22843     int vlen_enc = vector_length_encoding(this);
22844     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22845     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22846     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22847   %}
22848   ins_pipe( pipe_slow );
22849 %}
22850 
22851 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22852   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22853             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22854   match(Set dst (VectorBlend (Binary src1 src2) mask));
22855   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22856   effect(TEMP ktmp);
22857   ins_encode %{
22858      int vlen_enc = Assembler::AVX_512bit;
22859      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22860     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22861     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22862   %}
22863   ins_pipe( pipe_slow );
22864 %}
22865 
22866 
22867 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22868   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22869             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22870              VM_Version::supports_avx512bw()));
22871   match(Set dst (VectorBlend (Binary src1 src2) mask));
22872   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22873   ins_encode %{
22874     int vlen_enc = vector_length_encoding(this);
22875     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22876     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22877   %}
22878   ins_pipe( pipe_slow );
22879 %}
22880 
22881 // --------------------------------- ABS --------------------------------------
22882 // a = |a|
22883 instruct vabsB_reg(vec dst, vec src) %{
22884   match(Set dst (AbsVB  src));
22885   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22886   ins_encode %{
22887     uint vlen = Matcher::vector_length(this);
22888     if (vlen <= 16) {
22889       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22890     } else {
22891       int vlen_enc = vector_length_encoding(this);
22892       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22893     }
22894   %}
22895   ins_pipe( pipe_slow );
22896 %}
22897 
22898 instruct vabsS_reg(vec dst, vec src) %{
22899   match(Set dst (AbsVS  src));
22900   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22901   ins_encode %{
22902     uint vlen = Matcher::vector_length(this);
22903     if (vlen <= 8) {
22904       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22905     } else {
22906       int vlen_enc = vector_length_encoding(this);
22907       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22908     }
22909   %}
22910   ins_pipe( pipe_slow );
22911 %}
22912 
22913 instruct vabsI_reg(vec dst, vec src) %{
22914   match(Set dst (AbsVI  src));
22915   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22916   ins_encode %{
22917     uint vlen = Matcher::vector_length(this);
22918     if (vlen <= 4) {
22919       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22920     } else {
22921       int vlen_enc = vector_length_encoding(this);
22922       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22923     }
22924   %}
22925   ins_pipe( pipe_slow );
22926 %}
22927 
22928 instruct vabsL_reg(vec dst, vec src) %{
22929   match(Set dst (AbsVL  src));
22930   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22931   ins_encode %{
22932     assert(UseAVX > 2, "required");
22933     int vlen_enc = vector_length_encoding(this);
22934     if (!VM_Version::supports_avx512vl()) {
22935       vlen_enc = Assembler::AVX_512bit;
22936     }
22937     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22938   %}
22939   ins_pipe( pipe_slow );
22940 %}
22941 
22942 // --------------------------------- ABSNEG --------------------------------------
22943 
22944 instruct vabsnegF(vec dst, vec src) %{
22945   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22946   match(Set dst (AbsVF src));
22947   match(Set dst (NegVF src));
22948   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22949   ins_cost(150);
22950   ins_encode %{
22951     int opcode = this->ideal_Opcode();
22952     int vlen = Matcher::vector_length(this);
22953     if (vlen == 2) {
22954       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22955     } else {
22956       assert(vlen == 8 || vlen == 16, "required");
22957       int vlen_enc = vector_length_encoding(this);
22958       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22959     }
22960   %}
22961   ins_pipe( pipe_slow );
22962 %}
22963 
22964 instruct vabsneg4F(vec dst) %{
22965   predicate(Matcher::vector_length(n) == 4);
22966   match(Set dst (AbsVF dst));
22967   match(Set dst (NegVF dst));
22968   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22969   ins_cost(150);
22970   ins_encode %{
22971     int opcode = this->ideal_Opcode();
22972     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22973   %}
22974   ins_pipe( pipe_slow );
22975 %}
22976 
22977 instruct vabsnegD(vec dst, vec src) %{
22978   match(Set dst (AbsVD  src));
22979   match(Set dst (NegVD  src));
22980   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22981   ins_encode %{
22982     int opcode = this->ideal_Opcode();
22983     uint vlen = Matcher::vector_length(this);
22984     if (vlen == 2) {
22985       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22986     } else {
22987       int vlen_enc = vector_length_encoding(this);
22988       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22989     }
22990   %}
22991   ins_pipe( pipe_slow );
22992 %}
22993 
22994 //------------------------------------- VectorTest --------------------------------------------
22995 
22996 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22997   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22998   match(Set cr (VectorTest src1 src2));
22999   effect(TEMP vtmp);
23000   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23001   ins_encode %{
23002     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23003     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23004     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23005   %}
23006   ins_pipe( pipe_slow );
23007 %}
23008 
23009 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23010   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23011   match(Set cr (VectorTest src1 src2));
23012   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23013   ins_encode %{
23014     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23015     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23016     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23017   %}
23018   ins_pipe( pipe_slow );
23019 %}
23020 
23021 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23022   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23023              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23024             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23025   match(Set cr (VectorTest src1 src2));
23026   effect(TEMP tmp);
23027   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23028   ins_encode %{
23029     uint masklen = Matcher::vector_length(this, $src1);
23030     __ kmovwl($tmp$$Register, $src1$$KRegister);
23031     __ andl($tmp$$Register, (1 << masklen) - 1);
23032     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23033   %}
23034   ins_pipe( pipe_slow );
23035 %}
23036 
23037 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23038   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23039              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23040             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23041   match(Set cr (VectorTest src1 src2));
23042   effect(TEMP tmp);
23043   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23044   ins_encode %{
23045     uint masklen = Matcher::vector_length(this, $src1);
23046     __ kmovwl($tmp$$Register, $src1$$KRegister);
23047     __ andl($tmp$$Register, (1 << masklen) - 1);
23048   %}
23049   ins_pipe( pipe_slow );
23050 %}
23051 
23052 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23053   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23054             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23055   match(Set cr (VectorTest src1 src2));
23056   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23057   ins_encode %{
23058     uint masklen = Matcher::vector_length(this, $src1);
23059     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23060   %}
23061   ins_pipe( pipe_slow );
23062 %}
23063 
23064 //------------------------------------- LoadMask --------------------------------------------
23065 
23066 instruct loadMask(legVec dst, legVec src) %{
23067   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23068   match(Set dst (VectorLoadMask src));
23069   effect(TEMP dst);
23070   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23071   ins_encode %{
23072     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23073     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23074     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23075   %}
23076   ins_pipe( pipe_slow );
23077 %}
23078 
23079 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23080   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23081   match(Set dst (VectorLoadMask src));
23082   effect(TEMP xtmp);
23083   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23084   ins_encode %{
23085     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23086                         true, Assembler::AVX_512bit);
23087   %}
23088   ins_pipe( pipe_slow );
23089 %}
23090 
23091 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23092   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23093   match(Set dst (VectorLoadMask src));
23094   effect(TEMP xtmp);
23095   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23096   ins_encode %{
23097     int vlen_enc = vector_length_encoding(in(1));
23098     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23099                         false, vlen_enc);
23100   %}
23101   ins_pipe( pipe_slow );
23102 %}
23103 
23104 //------------------------------------- StoreMask --------------------------------------------
23105 
23106 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23107   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23108   match(Set dst (VectorStoreMask src size));
23109   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23110   ins_encode %{
23111     int vlen = Matcher::vector_length(this);
23112     if (vlen <= 16 && UseAVX <= 2) {
23113       assert(UseSSE >= 3, "required");
23114       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23115     } else {
23116       assert(UseAVX > 0, "required");
23117       int src_vlen_enc = vector_length_encoding(this, $src);
23118       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23119     }
23120   %}
23121   ins_pipe( pipe_slow );
23122 %}
23123 
23124 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23125   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23126   match(Set dst (VectorStoreMask src size));
23127   effect(TEMP_DEF dst, TEMP xtmp);
23128   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23129   ins_encode %{
23130     int vlen_enc = Assembler::AVX_128bit;
23131     int vlen = Matcher::vector_length(this);
23132     if (vlen <= 8) {
23133       assert(UseSSE >= 3, "required");
23134       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23135       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23136       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23137     } else {
23138       assert(UseAVX > 0, "required");
23139       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23140       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23141       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23142     }
23143   %}
23144   ins_pipe( pipe_slow );
23145 %}
23146 
23147 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23148   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23149   match(Set dst (VectorStoreMask src size));
23150   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23151   effect(TEMP_DEF dst, TEMP xtmp);
23152   ins_encode %{
23153     int vlen_enc = Assembler::AVX_128bit;
23154     int vlen = Matcher::vector_length(this);
23155     if (vlen <= 4) {
23156       assert(UseSSE >= 3, "required");
23157       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23158       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23159       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23160       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23161     } else {
23162       assert(UseAVX > 0, "required");
23163       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23164       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23165       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23166       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23167       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23168     }
23169   %}
23170   ins_pipe( pipe_slow );
23171 %}
23172 
23173 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23174   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23175   match(Set dst (VectorStoreMask src size));
23176   effect(TEMP_DEF dst, TEMP xtmp);
23177   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23178   ins_encode %{
23179     assert(UseSSE >= 3, "required");
23180     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23181     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23182     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23183     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23184     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23185   %}
23186   ins_pipe( pipe_slow );
23187 %}
23188 
23189 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23190   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23191   match(Set dst (VectorStoreMask src size));
23192   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23193   effect(TEMP_DEF dst, TEMP vtmp);
23194   ins_encode %{
23195     int vlen_enc = Assembler::AVX_128bit;
23196     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23197     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23198     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23199     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23200     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23201     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23202     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23203   %}
23204   ins_pipe( pipe_slow );
23205 %}
23206 
23207 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23208   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23209   match(Set dst (VectorStoreMask src size));
23210   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23211   ins_encode %{
23212     int src_vlen_enc = vector_length_encoding(this, $src);
23213     int dst_vlen_enc = vector_length_encoding(this);
23214     if (!VM_Version::supports_avx512vl()) {
23215       src_vlen_enc = Assembler::AVX_512bit;
23216     }
23217     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23218     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23219   %}
23220   ins_pipe( pipe_slow );
23221 %}
23222 
23223 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23224   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23225   match(Set dst (VectorStoreMask src size));
23226   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23227   ins_encode %{
23228     int src_vlen_enc = vector_length_encoding(this, $src);
23229     int dst_vlen_enc = vector_length_encoding(this);
23230     if (!VM_Version::supports_avx512vl()) {
23231       src_vlen_enc = Assembler::AVX_512bit;
23232     }
23233     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23234     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23235   %}
23236   ins_pipe( pipe_slow );
23237 %}
23238 
23239 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23240   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23241   match(Set dst (VectorStoreMask mask size));
23242   effect(TEMP_DEF dst);
23243   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23244   ins_encode %{
23245     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23246     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23247                  false, Assembler::AVX_512bit, noreg);
23248     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23249   %}
23250   ins_pipe( pipe_slow );
23251 %}
23252 
23253 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23254   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23255   match(Set dst (VectorStoreMask mask size));
23256   effect(TEMP_DEF dst);
23257   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23258   ins_encode %{
23259     int dst_vlen_enc = vector_length_encoding(this);
23260     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23261     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23262   %}
23263   ins_pipe( pipe_slow );
23264 %}
23265 
23266 instruct vmaskcast_evex(kReg dst) %{
23267   match(Set dst (VectorMaskCast dst));
23268   ins_cost(0);
23269   format %{ "vector_mask_cast $dst" %}
23270   ins_encode %{
23271     // empty
23272   %}
23273   ins_pipe(empty);
23274 %}
23275 
23276 instruct vmaskcast(vec dst) %{
23277   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23278   match(Set dst (VectorMaskCast dst));
23279   ins_cost(0);
23280   format %{ "vector_mask_cast $dst" %}
23281   ins_encode %{
23282     // empty
23283   %}
23284   ins_pipe(empty);
23285 %}
23286 
23287 instruct vmaskcast_avx(vec dst, vec src) %{
23288   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23289   match(Set dst (VectorMaskCast src));
23290   format %{ "vector_mask_cast $dst, $src" %}
23291   ins_encode %{
23292     int vlen = Matcher::vector_length(this);
23293     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23294     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23295     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23296   %}
23297   ins_pipe(pipe_slow);
23298 %}
23299 
23300 //-------------------------------- Load Iota Indices ----------------------------------
23301 
23302 instruct loadIotaIndices(vec dst, immI_0 src) %{
23303   match(Set dst (VectorLoadConst src));
23304   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23305   ins_encode %{
23306      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23307      BasicType bt = Matcher::vector_element_basic_type(this);
23308      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23309   %}
23310   ins_pipe( pipe_slow );
23311 %}
23312 
23313 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23314   match(Set dst (PopulateIndex src1 src2));
23315   effect(TEMP dst, TEMP vtmp);
23316   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23317   ins_encode %{
23318      assert($src2$$constant == 1, "required");
23319      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23320      int vlen_enc = vector_length_encoding(this);
23321      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23322      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23323      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23324      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23330   match(Set dst (PopulateIndex src1 src2));
23331   effect(TEMP dst, TEMP vtmp);
23332   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23333   ins_encode %{
23334      assert($src2$$constant == 1, "required");
23335      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23336      int vlen_enc = vector_length_encoding(this);
23337      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23338      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23339      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23340      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23341   %}
23342   ins_pipe( pipe_slow );
23343 %}
23344 
23345 //-------------------------------- Rearrange ----------------------------------
23346 
23347 // LoadShuffle/Rearrange for Byte
23348 instruct rearrangeB(vec dst, vec shuffle) %{
23349   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23350             Matcher::vector_length(n) < 32);
23351   match(Set dst (VectorRearrange dst shuffle));
23352   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23353   ins_encode %{
23354     assert(UseSSE >= 4, "required");
23355     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23356   %}
23357   ins_pipe( pipe_slow );
23358 %}
23359 
23360 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23361   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23362             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23363   match(Set dst (VectorRearrange src shuffle));
23364   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23365   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23366   ins_encode %{
23367     assert(UseAVX >= 2, "required");
23368     // Swap src into vtmp1
23369     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23370     // Shuffle swapped src to get entries from other 128 bit lane
23371     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23372     // Shuffle original src to get entries from self 128 bit lane
23373     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23374     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23375     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23376     // Perform the blend
23377     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23378   %}
23379   ins_pipe( pipe_slow );
23380 %}
23381 
23382 
23383 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23384   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23385             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23386   match(Set dst (VectorRearrange src shuffle));
23387   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23388   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23389   ins_encode %{
23390     int vlen_enc = vector_length_encoding(this);
23391     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23392                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23393                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23394   %}
23395   ins_pipe( pipe_slow );
23396 %}
23397 
23398 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23399   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23400             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23401   match(Set dst (VectorRearrange src shuffle));
23402   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23403   ins_encode %{
23404     int vlen_enc = vector_length_encoding(this);
23405     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23406   %}
23407   ins_pipe( pipe_slow );
23408 %}
23409 
23410 // LoadShuffle/Rearrange for Short
23411 
23412 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23413   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23414             !VM_Version::supports_avx512bw());
23415   match(Set dst (VectorLoadShuffle src));
23416   effect(TEMP dst, TEMP vtmp);
23417   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23418   ins_encode %{
23419     // Create a byte shuffle mask from short shuffle mask
23420     // only byte shuffle instruction available on these platforms
23421     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23422     if (UseAVX == 0) {
23423       assert(vlen_in_bytes <= 16, "required");
23424       // Multiply each shuffle by two to get byte index
23425       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23426       __ psllw($vtmp$$XMMRegister, 1);
23427 
23428       // Duplicate to create 2 copies of byte index
23429       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23430       __ psllw($dst$$XMMRegister, 8);
23431       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23432 
23433       // Add one to get alternate byte index
23434       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23435       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23436     } else {
23437       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23438       int vlen_enc = vector_length_encoding(this);
23439       // Multiply each shuffle by two to get byte index
23440       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23441 
23442       // Duplicate to create 2 copies of byte index
23443       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23444       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23445 
23446       // Add one to get alternate byte index
23447       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23448     }
23449   %}
23450   ins_pipe( pipe_slow );
23451 %}
23452 
23453 instruct rearrangeS(vec dst, vec shuffle) %{
23454   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23455             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23456   match(Set dst (VectorRearrange dst shuffle));
23457   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23458   ins_encode %{
23459     assert(UseSSE >= 4, "required");
23460     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23461   %}
23462   ins_pipe( pipe_slow );
23463 %}
23464 
23465 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23466   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23467             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23468   match(Set dst (VectorRearrange src shuffle));
23469   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23470   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23471   ins_encode %{
23472     assert(UseAVX >= 2, "required");
23473     // Swap src into vtmp1
23474     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23475     // Shuffle swapped src to get entries from other 128 bit lane
23476     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23477     // Shuffle original src to get entries from self 128 bit lane
23478     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23479     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23480     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23481     // Perform the blend
23482     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23483   %}
23484   ins_pipe( pipe_slow );
23485 %}
23486 
23487 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23488   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23489             VM_Version::supports_avx512bw());
23490   match(Set dst (VectorRearrange src shuffle));
23491   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23492   ins_encode %{
23493     int vlen_enc = vector_length_encoding(this);
23494     if (!VM_Version::supports_avx512vl()) {
23495       vlen_enc = Assembler::AVX_512bit;
23496     }
23497     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23498   %}
23499   ins_pipe( pipe_slow );
23500 %}
23501 
23502 // LoadShuffle/Rearrange for Integer and Float
23503 
23504 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23505   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23506             Matcher::vector_length(n) == 4 && UseAVX == 0);
23507   match(Set dst (VectorLoadShuffle src));
23508   effect(TEMP dst, TEMP vtmp);
23509   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23510   ins_encode %{
23511     assert(UseSSE >= 4, "required");
23512 
23513     // Create a byte shuffle mask from int shuffle mask
23514     // only byte shuffle instruction available on these platforms
23515 
23516     // Duplicate and multiply each shuffle by 4
23517     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23518     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23519     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23520     __ psllw($vtmp$$XMMRegister, 2);
23521 
23522     // Duplicate again to create 4 copies of byte index
23523     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23524     __ psllw($dst$$XMMRegister, 8);
23525     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23526 
23527     // Add 3,2,1,0 to get alternate byte index
23528     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23529     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23530   %}
23531   ins_pipe( pipe_slow );
23532 %}
23533 
23534 instruct rearrangeI(vec dst, vec shuffle) %{
23535   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23536             UseAVX == 0);
23537   match(Set dst (VectorRearrange dst shuffle));
23538   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23539   ins_encode %{
23540     assert(UseSSE >= 4, "required");
23541     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23542   %}
23543   ins_pipe( pipe_slow );
23544 %}
23545 
23546 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23547   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23548             UseAVX > 0);
23549   match(Set dst (VectorRearrange src shuffle));
23550   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23551   ins_encode %{
23552     int vlen_enc = vector_length_encoding(this);
23553     BasicType bt = Matcher::vector_element_basic_type(this);
23554     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23555   %}
23556   ins_pipe( pipe_slow );
23557 %}
23558 
23559 // LoadShuffle/Rearrange for Long and Double
23560 
23561 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23562   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23563             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23564   match(Set dst (VectorLoadShuffle src));
23565   effect(TEMP dst, TEMP vtmp);
23566   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23567   ins_encode %{
23568     assert(UseAVX >= 2, "required");
23569 
23570     int vlen_enc = vector_length_encoding(this);
23571     // Create a double word shuffle mask from long shuffle mask
23572     // only double word shuffle instruction available on these platforms
23573 
23574     // Multiply each shuffle by two to get double word index
23575     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23576 
23577     // Duplicate each double word shuffle
23578     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23579     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23580 
23581     // Add one to get alternate double word index
23582     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23583   %}
23584   ins_pipe( pipe_slow );
23585 %}
23586 
23587 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23588   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23589             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23590   match(Set dst (VectorRearrange src shuffle));
23591   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23592   ins_encode %{
23593     assert(UseAVX >= 2, "required");
23594 
23595     int vlen_enc = vector_length_encoding(this);
23596     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23597   %}
23598   ins_pipe( pipe_slow );
23599 %}
23600 
23601 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23602   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23603             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23604   match(Set dst (VectorRearrange src shuffle));
23605   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23606   ins_encode %{
23607     assert(UseAVX > 2, "required");
23608 
23609     int vlen_enc = vector_length_encoding(this);
23610     if (vlen_enc == Assembler::AVX_128bit) {
23611       vlen_enc = Assembler::AVX_256bit;
23612     }
23613     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23614   %}
23615   ins_pipe( pipe_slow );
23616 %}
23617 
23618 // --------------------------------- FMA --------------------------------------
23619 // a * b + c
23620 
23621 instruct vfmaF_reg(vec a, vec b, vec c) %{
23622   match(Set c (FmaVF  c (Binary a b)));
23623   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23624   ins_cost(150);
23625   ins_encode %{
23626     assert(UseFMA, "not enabled");
23627     int vlen_enc = vector_length_encoding(this);
23628     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23629   %}
23630   ins_pipe( pipe_slow );
23631 %}
23632 
23633 instruct vfmaF_mem(vec a, memory b, vec c) %{
23634   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23635   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23636   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23637   ins_cost(150);
23638   ins_encode %{
23639     assert(UseFMA, "not enabled");
23640     int vlen_enc = vector_length_encoding(this);
23641     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23642   %}
23643   ins_pipe( pipe_slow );
23644 %}
23645 
23646 instruct vfmaD_reg(vec a, vec b, vec c) %{
23647   match(Set c (FmaVD  c (Binary a b)));
23648   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23649   ins_cost(150);
23650   ins_encode %{
23651     assert(UseFMA, "not enabled");
23652     int vlen_enc = vector_length_encoding(this);
23653     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23654   %}
23655   ins_pipe( pipe_slow );
23656 %}
23657 
23658 instruct vfmaD_mem(vec a, memory b, vec c) %{
23659   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23660   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23661   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23662   ins_cost(150);
23663   ins_encode %{
23664     assert(UseFMA, "not enabled");
23665     int vlen_enc = vector_length_encoding(this);
23666     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23667   %}
23668   ins_pipe( pipe_slow );
23669 %}
23670 
23671 // --------------------------------- Vector Multiply Add --------------------------------------
23672 
23673 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23674   predicate(UseAVX == 0);
23675   match(Set dst (MulAddVS2VI dst src1));
23676   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23677   ins_encode %{
23678     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23679   %}
23680   ins_pipe( pipe_slow );
23681 %}
23682 
23683 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23684   predicate(UseAVX > 0);
23685   match(Set dst (MulAddVS2VI src1 src2));
23686   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23687   ins_encode %{
23688     int vlen_enc = vector_length_encoding(this);
23689     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23690   %}
23691   ins_pipe( pipe_slow );
23692 %}
23693 
23694 // --------------------------------- Vector Multiply Add Add ----------------------------------
23695 
23696 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23697   predicate(VM_Version::supports_avx512_vnni());
23698   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23699   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23700   ins_encode %{
23701     assert(UseAVX > 2, "required");
23702     int vlen_enc = vector_length_encoding(this);
23703     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23704   %}
23705   ins_pipe( pipe_slow );
23706   ins_cost(10);
23707 %}
23708 
23709 // --------------------------------- PopCount --------------------------------------
23710 
23711 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23712   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23713   match(Set dst (PopCountVI src));
23714   match(Set dst (PopCountVL src));
23715   format %{ "vector_popcount_integral $dst, $src" %}
23716   ins_encode %{
23717     int opcode = this->ideal_Opcode();
23718     int vlen_enc = vector_length_encoding(this, $src);
23719     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23720     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23721   %}
23722   ins_pipe( pipe_slow );
23723 %}
23724 
23725 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23726   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23727   match(Set dst (PopCountVI src mask));
23728   match(Set dst (PopCountVL src mask));
23729   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23730   ins_encode %{
23731     int vlen_enc = vector_length_encoding(this, $src);
23732     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23733     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23734     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23735   %}
23736   ins_pipe( pipe_slow );
23737 %}
23738 
23739 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23740   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23741   match(Set dst (PopCountVI src));
23742   match(Set dst (PopCountVL src));
23743   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23744   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23745   ins_encode %{
23746     int opcode = this->ideal_Opcode();
23747     int vlen_enc = vector_length_encoding(this, $src);
23748     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23749     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23750                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23751   %}
23752   ins_pipe( pipe_slow );
23753 %}
23754 
23755 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23756 
23757 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23758   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23759                                               Matcher::vector_length_in_bytes(n->in(1))));
23760   match(Set dst (CountTrailingZerosV src));
23761   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23762   ins_cost(400);
23763   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23764   ins_encode %{
23765     int vlen_enc = vector_length_encoding(this, $src);
23766     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23767     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23768                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23769   %}
23770   ins_pipe( pipe_slow );
23771 %}
23772 
23773 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23774   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23775             VM_Version::supports_avx512cd() &&
23776             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23777   match(Set dst (CountTrailingZerosV src));
23778   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23779   ins_cost(400);
23780   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23781   ins_encode %{
23782     int vlen_enc = vector_length_encoding(this, $src);
23783     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23784     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23785                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23786   %}
23787   ins_pipe( pipe_slow );
23788 %}
23789 
23790 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23791   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23792   match(Set dst (CountTrailingZerosV src));
23793   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23794   ins_cost(400);
23795   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23796   ins_encode %{
23797     int vlen_enc = vector_length_encoding(this, $src);
23798     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23799     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23800                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23801                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23802   %}
23803   ins_pipe( pipe_slow );
23804 %}
23805 
23806 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23807   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23808   match(Set dst (CountTrailingZerosV src));
23809   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23810   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23811   ins_encode %{
23812     int vlen_enc = vector_length_encoding(this, $src);
23813     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23814     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23815                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23816   %}
23817   ins_pipe( pipe_slow );
23818 %}
23819 
23820 
23821 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23822 
23823 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23824   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23825   effect(TEMP dst);
23826   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23827   ins_encode %{
23828     int vector_len = vector_length_encoding(this);
23829     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23830   %}
23831   ins_pipe( pipe_slow );
23832 %}
23833 
23834 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23835   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23836   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23837   effect(TEMP dst);
23838   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23839   ins_encode %{
23840     int vector_len = vector_length_encoding(this);
23841     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23842   %}
23843   ins_pipe( pipe_slow );
23844 %}
23845 
23846 // --------------------------------- Rotation Operations ----------------------------------
23847 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23848   match(Set dst (RotateLeftV src shift));
23849   match(Set dst (RotateRightV src shift));
23850   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23851   ins_encode %{
23852     int opcode      = this->ideal_Opcode();
23853     int vector_len  = vector_length_encoding(this);
23854     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23855     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23856   %}
23857   ins_pipe( pipe_slow );
23858 %}
23859 
23860 instruct vprorate(vec dst, vec src, vec shift) %{
23861   match(Set dst (RotateLeftV src shift));
23862   match(Set dst (RotateRightV src shift));
23863   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23864   ins_encode %{
23865     int opcode      = this->ideal_Opcode();
23866     int vector_len  = vector_length_encoding(this);
23867     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23868     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23869   %}
23870   ins_pipe( pipe_slow );
23871 %}
23872 
23873 // ---------------------------------- Masked Operations ------------------------------------
23874 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23875   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23876   match(Set dst (LoadVectorMasked mem mask));
23877   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23878   ins_encode %{
23879     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23880     int vlen_enc = vector_length_encoding(this);
23881     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23882   %}
23883   ins_pipe( pipe_slow );
23884 %}
23885 
23886 
23887 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23888   predicate(n->in(3)->bottom_type()->isa_vectmask());
23889   match(Set dst (LoadVectorMasked mem mask));
23890   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23891   ins_encode %{
23892     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23893     int vector_len = vector_length_encoding(this);
23894     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23895   %}
23896   ins_pipe( pipe_slow );
23897 %}
23898 
23899 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23900   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23901   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23902   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23903   ins_encode %{
23904     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23905     int vlen_enc = vector_length_encoding(src_node);
23906     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23907     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23908   %}
23909   ins_pipe( pipe_slow );
23910 %}
23911 
23912 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23913   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23914   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23915   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23916   ins_encode %{
23917     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23918     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23919     int vlen_enc = vector_length_encoding(src_node);
23920     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23921   %}
23922   ins_pipe( pipe_slow );
23923 %}
23924 
23925 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23926   match(Set addr (VerifyVectorAlignment addr mask));
23927   effect(KILL cr);
23928   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23929   ins_encode %{
23930     Label Lskip;
23931     // check if masked bits of addr are zero
23932     __ testq($addr$$Register, $mask$$constant);
23933     __ jccb(Assembler::equal, Lskip);
23934     __ stop("verify_vector_alignment found a misaligned vector memory access");
23935     __ bind(Lskip);
23936   %}
23937   ins_pipe(pipe_slow);
23938 %}
23939 
23940 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23941   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23942   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23943   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23944   ins_encode %{
23945     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23946     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23947 
23948     Label DONE;
23949     int vlen_enc = vector_length_encoding(this, $src1);
23950     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23951 
23952     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23953     __ mov64($dst$$Register, -1L);
23954     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23955     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23956     __ jccb(Assembler::carrySet, DONE);
23957     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23958     __ notq($dst$$Register);
23959     __ tzcntq($dst$$Register, $dst$$Register);
23960     __ bind(DONE);
23961   %}
23962   ins_pipe( pipe_slow );
23963 %}
23964 
23965 
23966 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23967   match(Set dst (VectorMaskGen len));
23968   effect(TEMP temp, KILL cr);
23969   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23970   ins_encode %{
23971     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23972   %}
23973   ins_pipe( pipe_slow );
23974 %}
23975 
23976 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23977   match(Set dst (VectorMaskGen len));
23978   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23979   effect(TEMP temp);
23980   ins_encode %{
23981     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23982     __ kmovql($dst$$KRegister, $temp$$Register);
23983   %}
23984   ins_pipe( pipe_slow );
23985 %}
23986 
23987 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23988   predicate(n->in(1)->bottom_type()->isa_vectmask());
23989   match(Set dst (VectorMaskToLong mask));
23990   effect(TEMP dst, KILL cr);
23991   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23992   ins_encode %{
23993     int opcode = this->ideal_Opcode();
23994     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23995     int mask_len = Matcher::vector_length(this, $mask);
23996     int mask_size = mask_len * type2aelembytes(mbt);
23997     int vlen_enc = vector_length_encoding(this, $mask);
23998     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23999                              $dst$$Register, mask_len, mask_size, vlen_enc);
24000   %}
24001   ins_pipe( pipe_slow );
24002 %}
24003 
24004 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24005   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24006   match(Set dst (VectorMaskToLong mask));
24007   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24008   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24009   ins_encode %{
24010     int opcode = this->ideal_Opcode();
24011     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24012     int mask_len = Matcher::vector_length(this, $mask);
24013     int vlen_enc = vector_length_encoding(this, $mask);
24014     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24015                              $dst$$Register, mask_len, mbt, vlen_enc);
24016   %}
24017   ins_pipe( pipe_slow );
24018 %}
24019 
24020 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24021   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24022   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24023   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24024   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24025   ins_encode %{
24026     int opcode = this->ideal_Opcode();
24027     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24028     int mask_len = Matcher::vector_length(this, $mask);
24029     int vlen_enc = vector_length_encoding(this, $mask);
24030     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24031                              $dst$$Register, mask_len, mbt, vlen_enc);
24032   %}
24033   ins_pipe( pipe_slow );
24034 %}
24035 
24036 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24037   predicate(n->in(1)->bottom_type()->isa_vectmask());
24038   match(Set dst (VectorMaskTrueCount mask));
24039   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24040   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24041   ins_encode %{
24042     int opcode = this->ideal_Opcode();
24043     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24044     int mask_len = Matcher::vector_length(this, $mask);
24045     int mask_size = mask_len * type2aelembytes(mbt);
24046     int vlen_enc = vector_length_encoding(this, $mask);
24047     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24048                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24049   %}
24050   ins_pipe( pipe_slow );
24051 %}
24052 
24053 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24054   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24055   match(Set dst (VectorMaskTrueCount mask));
24056   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24057   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24058   ins_encode %{
24059     int opcode = this->ideal_Opcode();
24060     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24061     int mask_len = Matcher::vector_length(this, $mask);
24062     int vlen_enc = vector_length_encoding(this, $mask);
24063     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24064                              $tmp$$Register, mask_len, mbt, vlen_enc);
24065   %}
24066   ins_pipe( pipe_slow );
24067 %}
24068 
24069 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24070   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24071   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24072   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24073   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24074   ins_encode %{
24075     int opcode = this->ideal_Opcode();
24076     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24077     int mask_len = Matcher::vector_length(this, $mask);
24078     int vlen_enc = vector_length_encoding(this, $mask);
24079     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24080                              $tmp$$Register, mask_len, mbt, vlen_enc);
24081   %}
24082   ins_pipe( pipe_slow );
24083 %}
24084 
24085 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24086   predicate(n->in(1)->bottom_type()->isa_vectmask());
24087   match(Set dst (VectorMaskFirstTrue mask));
24088   match(Set dst (VectorMaskLastTrue mask));
24089   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24090   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24091   ins_encode %{
24092     int opcode = this->ideal_Opcode();
24093     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24094     int mask_len = Matcher::vector_length(this, $mask);
24095     int mask_size = mask_len * type2aelembytes(mbt);
24096     int vlen_enc = vector_length_encoding(this, $mask);
24097     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24098                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24099   %}
24100   ins_pipe( pipe_slow );
24101 %}
24102 
24103 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24104   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24105   match(Set dst (VectorMaskFirstTrue mask));
24106   match(Set dst (VectorMaskLastTrue mask));
24107   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24108   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24109   ins_encode %{
24110     int opcode = this->ideal_Opcode();
24111     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24112     int mask_len = Matcher::vector_length(this, $mask);
24113     int vlen_enc = vector_length_encoding(this, $mask);
24114     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24115                              $tmp$$Register, mask_len, mbt, vlen_enc);
24116   %}
24117   ins_pipe( pipe_slow );
24118 %}
24119 
24120 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24121   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24122   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24123   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24124   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24125   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24126   ins_encode %{
24127     int opcode = this->ideal_Opcode();
24128     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24129     int mask_len = Matcher::vector_length(this, $mask);
24130     int vlen_enc = vector_length_encoding(this, $mask);
24131     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24132                              $tmp$$Register, mask_len, mbt, vlen_enc);
24133   %}
24134   ins_pipe( pipe_slow );
24135 %}
24136 
24137 // --------------------------------- Compress/Expand Operations ---------------------------
24138 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24139   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24140   match(Set dst (CompressV src mask));
24141   match(Set dst (ExpandV src mask));
24142   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24143   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24144   ins_encode %{
24145     int opcode = this->ideal_Opcode();
24146     int vlen_enc = vector_length_encoding(this);
24147     BasicType bt  = Matcher::vector_element_basic_type(this);
24148     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24149                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24150   %}
24151   ins_pipe( pipe_slow );
24152 %}
24153 
24154 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24155   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24156   match(Set dst (CompressV src mask));
24157   match(Set dst (ExpandV src mask));
24158   format %{ "vector_compress_expand $dst, $src, $mask" %}
24159   ins_encode %{
24160     int opcode = this->ideal_Opcode();
24161     int vector_len = vector_length_encoding(this);
24162     BasicType bt  = Matcher::vector_element_basic_type(this);
24163     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24164   %}
24165   ins_pipe( pipe_slow );
24166 %}
24167 
24168 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24169   match(Set dst (CompressM mask));
24170   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24171   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24172   ins_encode %{
24173     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24174     int mask_len = Matcher::vector_length(this);
24175     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24176   %}
24177   ins_pipe( pipe_slow );
24178 %}
24179 
24180 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24181 
24182 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24183   predicate(!VM_Version::supports_gfni());
24184   match(Set dst (ReverseV src));
24185   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24186   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24187   ins_encode %{
24188     int vec_enc = vector_length_encoding(this);
24189     BasicType bt = Matcher::vector_element_basic_type(this);
24190     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24191                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24192   %}
24193   ins_pipe( pipe_slow );
24194 %}
24195 
24196 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24197   predicate(VM_Version::supports_gfni());
24198   match(Set dst (ReverseV src));
24199   effect(TEMP dst, TEMP xtmp);
24200   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24201   ins_encode %{
24202     int vec_enc = vector_length_encoding(this);
24203     BasicType bt  = Matcher::vector_element_basic_type(this);
24204     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24205     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24206                                $xtmp$$XMMRegister);
24207   %}
24208   ins_pipe( pipe_slow );
24209 %}
24210 
24211 instruct vreverse_byte_reg(vec dst, vec src) %{
24212   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24213   match(Set dst (ReverseBytesV src));
24214   effect(TEMP dst);
24215   format %{ "vector_reverse_byte $dst, $src" %}
24216   ins_encode %{
24217     int vec_enc = vector_length_encoding(this);
24218     BasicType bt = Matcher::vector_element_basic_type(this);
24219     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24220   %}
24221   ins_pipe( pipe_slow );
24222 %}
24223 
24224 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24225   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24226   match(Set dst (ReverseBytesV src));
24227   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24228   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24229   ins_encode %{
24230     int vec_enc = vector_length_encoding(this);
24231     BasicType bt = Matcher::vector_element_basic_type(this);
24232     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24233                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24234   %}
24235   ins_pipe( pipe_slow );
24236 %}
24237 
24238 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24239 
24240 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24241   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24242                                               Matcher::vector_length_in_bytes(n->in(1))));
24243   match(Set dst (CountLeadingZerosV src));
24244   format %{ "vector_count_leading_zeros $dst, $src" %}
24245   ins_encode %{
24246      int vlen_enc = vector_length_encoding(this, $src);
24247      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24248      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24249                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24250   %}
24251   ins_pipe( pipe_slow );
24252 %}
24253 
24254 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24255   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24256                                               Matcher::vector_length_in_bytes(n->in(1))));
24257   match(Set dst (CountLeadingZerosV src mask));
24258   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24259   ins_encode %{
24260     int vlen_enc = vector_length_encoding(this, $src);
24261     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24262     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24263     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24264                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24265   %}
24266   ins_pipe( pipe_slow );
24267 %}
24268 
24269 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24270   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24271             VM_Version::supports_avx512cd() &&
24272             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24273   match(Set dst (CountLeadingZerosV src));
24274   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24275   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24276   ins_encode %{
24277     int vlen_enc = vector_length_encoding(this, $src);
24278     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24279     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24280                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24281   %}
24282   ins_pipe( pipe_slow );
24283 %}
24284 
24285 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24286   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24287   match(Set dst (CountLeadingZerosV src));
24288   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24289   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24290   ins_encode %{
24291     int vlen_enc = vector_length_encoding(this, $src);
24292     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24293     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24294                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24295                                        $rtmp$$Register, true, vlen_enc);
24296   %}
24297   ins_pipe( pipe_slow );
24298 %}
24299 
24300 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24301   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24302             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24303   match(Set dst (CountLeadingZerosV src));
24304   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24305   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24306   ins_encode %{
24307     int vlen_enc = vector_length_encoding(this, $src);
24308     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24309     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24310                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24311   %}
24312   ins_pipe( pipe_slow );
24313 %}
24314 
24315 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24316   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24317             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24318   match(Set dst (CountLeadingZerosV src));
24319   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24320   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24321   ins_encode %{
24322     int vlen_enc = vector_length_encoding(this, $src);
24323     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24324     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24325                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24326   %}
24327   ins_pipe( pipe_slow );
24328 %}
24329 
24330 // ---------------------------------- Vector Masked Operations ------------------------------------
24331 
24332 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24333   match(Set dst (AddVB (Binary dst src2) mask));
24334   match(Set dst (AddVS (Binary dst src2) mask));
24335   match(Set dst (AddVI (Binary dst src2) mask));
24336   match(Set dst (AddVL (Binary dst src2) mask));
24337   match(Set dst (AddVF (Binary dst src2) mask));
24338   match(Set dst (AddVD (Binary dst src2) mask));
24339   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24340   ins_encode %{
24341     int vlen_enc = vector_length_encoding(this);
24342     BasicType bt = Matcher::vector_element_basic_type(this);
24343     int opc = this->ideal_Opcode();
24344     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24345                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24346   %}
24347   ins_pipe( pipe_slow );
24348 %}
24349 
24350 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24351   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24352   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24353   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24354   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24355   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24356   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24357   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24358   ins_encode %{
24359     int vlen_enc = vector_length_encoding(this);
24360     BasicType bt = Matcher::vector_element_basic_type(this);
24361     int opc = this->ideal_Opcode();
24362     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24363                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24364   %}
24365   ins_pipe( pipe_slow );
24366 %}
24367 
24368 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24369   match(Set dst (XorV (Binary dst src2) mask));
24370   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24371   ins_encode %{
24372     int vlen_enc = vector_length_encoding(this);
24373     BasicType bt = Matcher::vector_element_basic_type(this);
24374     int opc = this->ideal_Opcode();
24375     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24376                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24377   %}
24378   ins_pipe( pipe_slow );
24379 %}
24380 
24381 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24382   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24383   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24384   ins_encode %{
24385     int vlen_enc = vector_length_encoding(this);
24386     BasicType bt = Matcher::vector_element_basic_type(this);
24387     int opc = this->ideal_Opcode();
24388     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24389                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24390   %}
24391   ins_pipe( pipe_slow );
24392 %}
24393 
24394 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24395   match(Set dst (OrV (Binary dst src2) mask));
24396   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24397   ins_encode %{
24398     int vlen_enc = vector_length_encoding(this);
24399     BasicType bt = Matcher::vector_element_basic_type(this);
24400     int opc = this->ideal_Opcode();
24401     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24403   %}
24404   ins_pipe( pipe_slow );
24405 %}
24406 
24407 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24408   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24409   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24410   ins_encode %{
24411     int vlen_enc = vector_length_encoding(this);
24412     BasicType bt = Matcher::vector_element_basic_type(this);
24413     int opc = this->ideal_Opcode();
24414     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24415                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24416   %}
24417   ins_pipe( pipe_slow );
24418 %}
24419 
24420 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24421   match(Set dst (AndV (Binary dst src2) mask));
24422   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24423   ins_encode %{
24424     int vlen_enc = vector_length_encoding(this);
24425     BasicType bt = Matcher::vector_element_basic_type(this);
24426     int opc = this->ideal_Opcode();
24427     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24429   %}
24430   ins_pipe( pipe_slow );
24431 %}
24432 
24433 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24434   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24435   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24436   ins_encode %{
24437     int vlen_enc = vector_length_encoding(this);
24438     BasicType bt = Matcher::vector_element_basic_type(this);
24439     int opc = this->ideal_Opcode();
24440     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24441                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24442   %}
24443   ins_pipe( pipe_slow );
24444 %}
24445 
24446 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24447   match(Set dst (SubVB (Binary dst src2) mask));
24448   match(Set dst (SubVS (Binary dst src2) mask));
24449   match(Set dst (SubVI (Binary dst src2) mask));
24450   match(Set dst (SubVL (Binary dst src2) mask));
24451   match(Set dst (SubVF (Binary dst src2) mask));
24452   match(Set dst (SubVD (Binary dst src2) mask));
24453   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24454   ins_encode %{
24455     int vlen_enc = vector_length_encoding(this);
24456     BasicType bt = Matcher::vector_element_basic_type(this);
24457     int opc = this->ideal_Opcode();
24458     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24459                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24460   %}
24461   ins_pipe( pipe_slow );
24462 %}
24463 
24464 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24465   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24466   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24467   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24468   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24469   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24470   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24471   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24472   ins_encode %{
24473     int vlen_enc = vector_length_encoding(this);
24474     BasicType bt = Matcher::vector_element_basic_type(this);
24475     int opc = this->ideal_Opcode();
24476     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24477                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24478   %}
24479   ins_pipe( pipe_slow );
24480 %}
24481 
24482 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24483   match(Set dst (MulVS (Binary dst src2) mask));
24484   match(Set dst (MulVI (Binary dst src2) mask));
24485   match(Set dst (MulVL (Binary dst src2) mask));
24486   match(Set dst (MulVF (Binary dst src2) mask));
24487   match(Set dst (MulVD (Binary dst src2) mask));
24488   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24489   ins_encode %{
24490     int vlen_enc = vector_length_encoding(this);
24491     BasicType bt = Matcher::vector_element_basic_type(this);
24492     int opc = this->ideal_Opcode();
24493     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24495   %}
24496   ins_pipe( pipe_slow );
24497 %}
24498 
24499 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24500   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24501   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24502   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24503   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24504   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24505   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24506   ins_encode %{
24507     int vlen_enc = vector_length_encoding(this);
24508     BasicType bt = Matcher::vector_element_basic_type(this);
24509     int opc = this->ideal_Opcode();
24510     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24511                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24512   %}
24513   ins_pipe( pipe_slow );
24514 %}
24515 
24516 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24517   match(Set dst (SqrtVF dst mask));
24518   match(Set dst (SqrtVD dst mask));
24519   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24520   ins_encode %{
24521     int vlen_enc = vector_length_encoding(this);
24522     BasicType bt = Matcher::vector_element_basic_type(this);
24523     int opc = this->ideal_Opcode();
24524     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24525                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24526   %}
24527   ins_pipe( pipe_slow );
24528 %}
24529 
24530 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24531   match(Set dst (DivVF (Binary dst src2) mask));
24532   match(Set dst (DivVD (Binary dst src2) mask));
24533   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24534   ins_encode %{
24535     int vlen_enc = vector_length_encoding(this);
24536     BasicType bt = Matcher::vector_element_basic_type(this);
24537     int opc = this->ideal_Opcode();
24538     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24539                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24540   %}
24541   ins_pipe( pipe_slow );
24542 %}
24543 
24544 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24545   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24546   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24547   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24548   ins_encode %{
24549     int vlen_enc = vector_length_encoding(this);
24550     BasicType bt = Matcher::vector_element_basic_type(this);
24551     int opc = this->ideal_Opcode();
24552     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24553                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24554   %}
24555   ins_pipe( pipe_slow );
24556 %}
24557 
24558 
24559 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24560   match(Set dst (RotateLeftV (Binary dst shift) mask));
24561   match(Set dst (RotateRightV (Binary dst shift) mask));
24562   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24563   ins_encode %{
24564     int vlen_enc = vector_length_encoding(this);
24565     BasicType bt = Matcher::vector_element_basic_type(this);
24566     int opc = this->ideal_Opcode();
24567     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24568                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24569   %}
24570   ins_pipe( pipe_slow );
24571 %}
24572 
24573 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24574   match(Set dst (RotateLeftV (Binary dst src2) mask));
24575   match(Set dst (RotateRightV (Binary dst src2) mask));
24576   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24577   ins_encode %{
24578     int vlen_enc = vector_length_encoding(this);
24579     BasicType bt = Matcher::vector_element_basic_type(this);
24580     int opc = this->ideal_Opcode();
24581     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24582                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24583   %}
24584   ins_pipe( pipe_slow );
24585 %}
24586 
24587 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24588   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24589   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24590   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24591   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24592   ins_encode %{
24593     int vlen_enc = vector_length_encoding(this);
24594     BasicType bt = Matcher::vector_element_basic_type(this);
24595     int opc = this->ideal_Opcode();
24596     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24597                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24598   %}
24599   ins_pipe( pipe_slow );
24600 %}
24601 
24602 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24603   predicate(!n->as_ShiftV()->is_var_shift());
24604   match(Set dst (LShiftVS (Binary dst src2) mask));
24605   match(Set dst (LShiftVI (Binary dst src2) mask));
24606   match(Set dst (LShiftVL (Binary dst src2) mask));
24607   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24608   ins_encode %{
24609     int vlen_enc = vector_length_encoding(this);
24610     BasicType bt = Matcher::vector_element_basic_type(this);
24611     int opc = this->ideal_Opcode();
24612     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24613                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24614   %}
24615   ins_pipe( pipe_slow );
24616 %}
24617 
24618 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24619   predicate(n->as_ShiftV()->is_var_shift());
24620   match(Set dst (LShiftVS (Binary dst src2) mask));
24621   match(Set dst (LShiftVI (Binary dst src2) mask));
24622   match(Set dst (LShiftVL (Binary dst src2) mask));
24623   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24624   ins_encode %{
24625     int vlen_enc = vector_length_encoding(this);
24626     BasicType bt = Matcher::vector_element_basic_type(this);
24627     int opc = this->ideal_Opcode();
24628     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24629                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24630   %}
24631   ins_pipe( pipe_slow );
24632 %}
24633 
24634 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24635   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24636   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24637   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24638   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24639   ins_encode %{
24640     int vlen_enc = vector_length_encoding(this);
24641     BasicType bt = Matcher::vector_element_basic_type(this);
24642     int opc = this->ideal_Opcode();
24643     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24644                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24645   %}
24646   ins_pipe( pipe_slow );
24647 %}
24648 
24649 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24650   predicate(!n->as_ShiftV()->is_var_shift());
24651   match(Set dst (RShiftVS (Binary dst src2) mask));
24652   match(Set dst (RShiftVI (Binary dst src2) mask));
24653   match(Set dst (RShiftVL (Binary dst src2) mask));
24654   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24655   ins_encode %{
24656     int vlen_enc = vector_length_encoding(this);
24657     BasicType bt = Matcher::vector_element_basic_type(this);
24658     int opc = this->ideal_Opcode();
24659     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24660                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24661   %}
24662   ins_pipe( pipe_slow );
24663 %}
24664 
24665 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24666   predicate(n->as_ShiftV()->is_var_shift());
24667   match(Set dst (RShiftVS (Binary dst src2) mask));
24668   match(Set dst (RShiftVI (Binary dst src2) mask));
24669   match(Set dst (RShiftVL (Binary dst src2) mask));
24670   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24671   ins_encode %{
24672     int vlen_enc = vector_length_encoding(this);
24673     BasicType bt = Matcher::vector_element_basic_type(this);
24674     int opc = this->ideal_Opcode();
24675     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24676                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24677   %}
24678   ins_pipe( pipe_slow );
24679 %}
24680 
24681 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24682   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24683   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24684   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24685   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24686   ins_encode %{
24687     int vlen_enc = vector_length_encoding(this);
24688     BasicType bt = Matcher::vector_element_basic_type(this);
24689     int opc = this->ideal_Opcode();
24690     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24691                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24692   %}
24693   ins_pipe( pipe_slow );
24694 %}
24695 
24696 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24697   predicate(!n->as_ShiftV()->is_var_shift());
24698   match(Set dst (URShiftVS (Binary dst src2) mask));
24699   match(Set dst (URShiftVI (Binary dst src2) mask));
24700   match(Set dst (URShiftVL (Binary dst src2) mask));
24701   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24702   ins_encode %{
24703     int vlen_enc = vector_length_encoding(this);
24704     BasicType bt = Matcher::vector_element_basic_type(this);
24705     int opc = this->ideal_Opcode();
24706     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24707                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24708   %}
24709   ins_pipe( pipe_slow );
24710 %}
24711 
24712 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24713   predicate(n->as_ShiftV()->is_var_shift());
24714   match(Set dst (URShiftVS (Binary dst src2) mask));
24715   match(Set dst (URShiftVI (Binary dst src2) mask));
24716   match(Set dst (URShiftVL (Binary dst src2) mask));
24717   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24718   ins_encode %{
24719     int vlen_enc = vector_length_encoding(this);
24720     BasicType bt = Matcher::vector_element_basic_type(this);
24721     int opc = this->ideal_Opcode();
24722     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24724   %}
24725   ins_pipe( pipe_slow );
24726 %}
24727 
24728 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24729   match(Set dst (MaxV (Binary dst src2) mask));
24730   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24731   ins_encode %{
24732     int vlen_enc = vector_length_encoding(this);
24733     BasicType bt = Matcher::vector_element_basic_type(this);
24734     int opc = this->ideal_Opcode();
24735     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24736                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24737   %}
24738   ins_pipe( pipe_slow );
24739 %}
24740 
24741 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24742   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24743   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24744   ins_encode %{
24745     int vlen_enc = vector_length_encoding(this);
24746     BasicType bt = Matcher::vector_element_basic_type(this);
24747     int opc = this->ideal_Opcode();
24748     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24749                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24750   %}
24751   ins_pipe( pipe_slow );
24752 %}
24753 
24754 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24755   match(Set dst (MinV (Binary dst src2) mask));
24756   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24757   ins_encode %{
24758     int vlen_enc = vector_length_encoding(this);
24759     BasicType bt = Matcher::vector_element_basic_type(this);
24760     int opc = this->ideal_Opcode();
24761     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24762                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24763   %}
24764   ins_pipe( pipe_slow );
24765 %}
24766 
24767 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24768   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24769   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24770   ins_encode %{
24771     int vlen_enc = vector_length_encoding(this);
24772     BasicType bt = Matcher::vector_element_basic_type(this);
24773     int opc = this->ideal_Opcode();
24774     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24775                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24776   %}
24777   ins_pipe( pipe_slow );
24778 %}
24779 
24780 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24781   match(Set dst (VectorRearrange (Binary dst src2) mask));
24782   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24783   ins_encode %{
24784     int vlen_enc = vector_length_encoding(this);
24785     BasicType bt = Matcher::vector_element_basic_type(this);
24786     int opc = this->ideal_Opcode();
24787     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24788                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24789   %}
24790   ins_pipe( pipe_slow );
24791 %}
24792 
24793 instruct vabs_masked(vec dst, kReg mask) %{
24794   match(Set dst (AbsVB dst mask));
24795   match(Set dst (AbsVS dst mask));
24796   match(Set dst (AbsVI dst mask));
24797   match(Set dst (AbsVL dst mask));
24798   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24799   ins_encode %{
24800     int vlen_enc = vector_length_encoding(this);
24801     BasicType bt = Matcher::vector_element_basic_type(this);
24802     int opc = this->ideal_Opcode();
24803     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24804                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24805   %}
24806   ins_pipe( pipe_slow );
24807 %}
24808 
24809 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24810   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24811   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24812   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24813   ins_encode %{
24814     assert(UseFMA, "Needs FMA instructions support.");
24815     int vlen_enc = vector_length_encoding(this);
24816     BasicType bt = Matcher::vector_element_basic_type(this);
24817     int opc = this->ideal_Opcode();
24818     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24819                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24820   %}
24821   ins_pipe( pipe_slow );
24822 %}
24823 
24824 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24825   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24826   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24827   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24828   ins_encode %{
24829     assert(UseFMA, "Needs FMA instructions support.");
24830     int vlen_enc = vector_length_encoding(this);
24831     BasicType bt = Matcher::vector_element_basic_type(this);
24832     int opc = this->ideal_Opcode();
24833     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24834                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24835   %}
24836   ins_pipe( pipe_slow );
24837 %}
24838 
24839 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24840   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24841   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24842   ins_encode %{
24843     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24844     int vlen_enc = vector_length_encoding(this, $src1);
24845     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24846 
24847     // Comparison i
24848     switch (src1_elem_bt) {
24849       case T_BYTE: {
24850         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24851         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24852         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24853         break;
24854       }
24855       case T_SHORT: {
24856         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24857         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24858         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24859         break;
24860       }
24861       case T_INT: {
24862         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24863         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24864         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24865         break;
24866       }
24867       case T_LONG: {
24868         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24869         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24870         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24871         break;
24872       }
24873       case T_FLOAT: {
24874         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24875         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24876         break;
24877       }
24878       case T_DOUBLE: {
24879         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24880         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24881         break;
24882       }
24883       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24884     }
24885   %}
24886   ins_pipe( pipe_slow );
24887 %}
24888 
24889 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24890   predicate(Matcher::vector_length(n) <= 32);
24891   match(Set dst (MaskAll src));
24892   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24893   ins_encode %{
24894     int mask_len = Matcher::vector_length(this);
24895     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24896   %}
24897   ins_pipe( pipe_slow );
24898 %}
24899 
24900 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24901   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24902   match(Set dst (XorVMask src (MaskAll cnt)));
24903   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24904   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24905   ins_encode %{
24906     uint masklen = Matcher::vector_length(this);
24907     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24908   %}
24909   ins_pipe( pipe_slow );
24910 %}
24911 
24912 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24913   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24914             (Matcher::vector_length(n) == 16) ||
24915             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24916   match(Set dst (XorVMask src (MaskAll cnt)));
24917   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24918   ins_encode %{
24919     uint masklen = Matcher::vector_length(this);
24920     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24921   %}
24922   ins_pipe( pipe_slow );
24923 %}
24924 
24925 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24926   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24927   match(Set dst (VectorLongToMask src));
24928   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24929   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24930   ins_encode %{
24931     int mask_len = Matcher::vector_length(this);
24932     int vec_enc  = vector_length_encoding(mask_len);
24933     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24934                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24935   %}
24936   ins_pipe( pipe_slow );
24937 %}
24938 
24939 
24940 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24941   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24942   match(Set dst (VectorLongToMask src));
24943   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24944   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24945   ins_encode %{
24946     int mask_len = Matcher::vector_length(this);
24947     assert(mask_len <= 32, "invalid mask length");
24948     int vec_enc  = vector_length_encoding(mask_len);
24949     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24950                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24951   %}
24952   ins_pipe( pipe_slow );
24953 %}
24954 
24955 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24956   predicate(n->bottom_type()->isa_vectmask());
24957   match(Set dst (VectorLongToMask src));
24958   format %{ "long_to_mask_evex $dst, $src\t!" %}
24959   ins_encode %{
24960     __ kmov($dst$$KRegister, $src$$Register);
24961   %}
24962   ins_pipe( pipe_slow );
24963 %}
24964 
24965 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24966   match(Set dst (AndVMask src1 src2));
24967   match(Set dst (OrVMask src1 src2));
24968   match(Set dst (XorVMask src1 src2));
24969   effect(TEMP kscratch);
24970   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24971   ins_encode %{
24972     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24973     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24974     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24975     uint masklen = Matcher::vector_length(this);
24976     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24977     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24978   %}
24979   ins_pipe( pipe_slow );
24980 %}
24981 
24982 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24983   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24984   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24985   ins_encode %{
24986     int vlen_enc = vector_length_encoding(this);
24987     BasicType bt = Matcher::vector_element_basic_type(this);
24988     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24989                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24990   %}
24991   ins_pipe( pipe_slow );
24992 %}
24993 
24994 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24995   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24996   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24997   ins_encode %{
24998     int vlen_enc = vector_length_encoding(this);
24999     BasicType bt = Matcher::vector_element_basic_type(this);
25000     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25001                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25002   %}
25003   ins_pipe( pipe_slow );
25004 %}
25005 
25006 instruct castMM(kReg dst)
25007 %{
25008   match(Set dst (CastVV dst));
25009 
25010   size(0);
25011   format %{ "# castVV of $dst" %}
25012   ins_encode(/* empty encoding */);
25013   ins_cost(0);
25014   ins_pipe(empty);
25015 %}
25016 
25017 instruct castVV(vec dst)
25018 %{
25019   match(Set dst (CastVV dst));
25020 
25021   size(0);
25022   format %{ "# castVV of $dst" %}
25023   ins_encode(/* empty encoding */);
25024   ins_cost(0);
25025   ins_pipe(empty);
25026 %}
25027 
25028 instruct castVVLeg(legVec dst)
25029 %{
25030   match(Set dst (CastVV dst));
25031 
25032   size(0);
25033   format %{ "# castVV of $dst" %}
25034   ins_encode(/* empty encoding */);
25035   ins_cost(0);
25036   ins_pipe(empty);
25037 %}
25038 
25039 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25040 %{
25041   match(Set dst (IsInfiniteF src));
25042   effect(TEMP ktmp, KILL cr);
25043   format %{ "float_class_check $dst, $src" %}
25044   ins_encode %{
25045     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25046     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25047   %}
25048   ins_pipe(pipe_slow);
25049 %}
25050 
25051 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25052 %{
25053   match(Set dst (IsInfiniteD src));
25054   effect(TEMP ktmp, KILL cr);
25055   format %{ "double_class_check $dst, $src" %}
25056   ins_encode %{
25057     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25058     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25059   %}
25060   ins_pipe(pipe_slow);
25061 %}
25062 
25063 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25064 %{
25065   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25066             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25067   match(Set dst (SaturatingAddV src1 src2));
25068   match(Set dst (SaturatingSubV src1 src2));
25069   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25070   ins_encode %{
25071     int vlen_enc = vector_length_encoding(this);
25072     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25073     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25074                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25075   %}
25076   ins_pipe(pipe_slow);
25077 %}
25078 
25079 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25080 %{
25081   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25082             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25083   match(Set dst (SaturatingAddV src1 src2));
25084   match(Set dst (SaturatingSubV src1 src2));
25085   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25086   ins_encode %{
25087     int vlen_enc = vector_length_encoding(this);
25088     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25089     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25090                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25091   %}
25092   ins_pipe(pipe_slow);
25093 %}
25094 
25095 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25096 %{
25097   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25098             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25099             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25100   match(Set dst (SaturatingAddV src1 src2));
25101   match(Set dst (SaturatingSubV src1 src2));
25102   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25103   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25104   ins_encode %{
25105     int vlen_enc = vector_length_encoding(this);
25106     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25107     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25108                                         $src1$$XMMRegister, $src2$$XMMRegister,
25109                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25110                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25111   %}
25112   ins_pipe(pipe_slow);
25113 %}
25114 
25115 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25116 %{
25117   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25118             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25119             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25120   match(Set dst (SaturatingAddV src1 src2));
25121   match(Set dst (SaturatingSubV src1 src2));
25122   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25123   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25124   ins_encode %{
25125     int vlen_enc = vector_length_encoding(this);
25126     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25127     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25128                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25129                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25130   %}
25131   ins_pipe(pipe_slow);
25132 %}
25133 
25134 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25135 %{
25136   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25137             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25138             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25139   match(Set dst (SaturatingAddV src1 src2));
25140   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25141   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25142   ins_encode %{
25143     int vlen_enc = vector_length_encoding(this);
25144     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25145     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25146                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25147   %}
25148   ins_pipe(pipe_slow);
25149 %}
25150 
25151 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25152 %{
25153   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25154             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25155             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25156   match(Set dst (SaturatingAddV src1 src2));
25157   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25158   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25159   ins_encode %{
25160     int vlen_enc = vector_length_encoding(this);
25161     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25162     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25163                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25164   %}
25165   ins_pipe(pipe_slow);
25166 %}
25167 
25168 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25169 %{
25170   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25171             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25172             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25173   match(Set dst (SaturatingSubV src1 src2));
25174   effect(TEMP ktmp);
25175   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25176   ins_encode %{
25177     int vlen_enc = vector_length_encoding(this);
25178     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25179     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25180                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25181   %}
25182   ins_pipe(pipe_slow);
25183 %}
25184 
25185 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25186 %{
25187   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25188             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25189             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25190   match(Set dst (SaturatingSubV src1 src2));
25191   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25192   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25193   ins_encode %{
25194     int vlen_enc = vector_length_encoding(this);
25195     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25196     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25197                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25198   %}
25199   ins_pipe(pipe_slow);
25200 %}
25201 
25202 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25203 %{
25204   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25205             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25206   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25207   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25208   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25209   ins_encode %{
25210     int vlen_enc = vector_length_encoding(this);
25211     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25212     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25213                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25214   %}
25215   ins_pipe(pipe_slow);
25216 %}
25217 
25218 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25219 %{
25220   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25221             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25222   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25223   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25224   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25225   ins_encode %{
25226     int vlen_enc = vector_length_encoding(this);
25227     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25228     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25229                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25230   %}
25231   ins_pipe(pipe_slow);
25232 %}
25233 
25234 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25235   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25236             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25237   match(Set dst (SaturatingAddV (Binary dst src) mask));
25238   match(Set dst (SaturatingSubV (Binary dst src) mask));
25239   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25240   ins_encode %{
25241     int vlen_enc = vector_length_encoding(this);
25242     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25243     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25244                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25245   %}
25246   ins_pipe( pipe_slow );
25247 %}
25248 
25249 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25250   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25251             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25252   match(Set dst (SaturatingAddV (Binary dst src) mask));
25253   match(Set dst (SaturatingSubV (Binary dst src) mask));
25254   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25255   ins_encode %{
25256     int vlen_enc = vector_length_encoding(this);
25257     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25258     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25259                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25260   %}
25261   ins_pipe( pipe_slow );
25262 %}
25263 
25264 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25265   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25266             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25267   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25268   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25269   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25270   ins_encode %{
25271     int vlen_enc = vector_length_encoding(this);
25272     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25273     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25274                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25275   %}
25276   ins_pipe( pipe_slow );
25277 %}
25278 
25279 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25280   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25281             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25282   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25283   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25284   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25285   ins_encode %{
25286     int vlen_enc = vector_length_encoding(this);
25287     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25288     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25289                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25290   %}
25291   ins_pipe( pipe_slow );
25292 %}
25293 
25294 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25295 %{
25296   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25297   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25298   ins_encode %{
25299     int vlen_enc = vector_length_encoding(this);
25300     BasicType bt = Matcher::vector_element_basic_type(this);
25301     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25302   %}
25303   ins_pipe(pipe_slow);
25304 %}
25305 
25306 instruct reinterpretS2HF(regF dst, rRegI src)
25307 %{
25308   match(Set dst (ReinterpretS2HF src));
25309   format %{ "vmovw $dst, $src" %}
25310   ins_encode %{
25311     __ vmovw($dst$$XMMRegister, $src$$Register);
25312   %}
25313   ins_pipe(pipe_slow);
25314 %}
25315 
25316 instruct reinterpretHF2S(rRegI dst, regF src)
25317 %{
25318   match(Set dst (ReinterpretHF2S src));
25319   format %{ "vmovw $dst, $src" %}
25320   ins_encode %{
25321     __ vmovw($dst$$Register, $src$$XMMRegister);
25322   %}
25323   ins_pipe(pipe_slow);
25324 %}
25325 
25326 instruct convF2HFAndS2HF(regF dst, regF src)
25327 %{
25328   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25329   format %{ "convF2HFAndS2HF $dst, $src" %}
25330   ins_encode %{
25331     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25332   %}
25333   ins_pipe(pipe_slow);
25334 %}
25335 
25336 instruct convHF2SAndHF2F(regF dst, regF src)
25337 %{
25338   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25339   format %{ "convHF2SAndHF2F $dst, $src" %}
25340   ins_encode %{
25341     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25342   %}
25343   ins_pipe(pipe_slow);
25344 %}
25345 
25346 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25347 %{
25348   match(Set dst (SqrtHF src));
25349   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25350   ins_encode %{
25351     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25352   %}
25353   ins_pipe(pipe_slow);
25354 %}
25355 
25356 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25357 %{
25358   match(Set dst (AddHF src1 src2));
25359   match(Set dst (DivHF src1 src2));
25360   match(Set dst (MulHF src1 src2));
25361   match(Set dst (SubHF src1 src2));
25362   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25363   ins_encode %{
25364     int opcode = this->ideal_Opcode();
25365     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25366   %}
25367   ins_pipe(pipe_slow);
25368 %}
25369 
25370 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25371 %{
25372   predicate(VM_Version::supports_avx10_2());
25373   match(Set dst (MaxHF src1 src2));
25374   match(Set dst (MinHF src1 src2));
25375   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25376   ins_encode %{
25377     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25378     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25379   %}
25380   ins_pipe( pipe_slow );
25381 %}
25382 
25383 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25384 %{
25385   predicate(!VM_Version::supports_avx10_2());
25386   match(Set dst (MaxHF src1 src2));
25387   match(Set dst (MinHF src1 src2));
25388   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25389   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25390   ins_encode %{
25391     int opcode = this->ideal_Opcode();
25392     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25393                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25394   %}
25395   ins_pipe( pipe_slow );
25396 %}
25397 
25398 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25399 %{
25400   match(Set dst (FmaHF  src2 (Binary dst src1)));
25401   effect(DEF dst);
25402   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25403   ins_encode %{
25404     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25405   %}
25406   ins_pipe( pipe_slow );
25407 %}
25408 
25409 
25410 instruct vector_sqrt_HF_reg(vec dst, vec src)
25411 %{
25412   match(Set dst (SqrtVHF src));
25413   format %{ "vector_sqrt_fp16 $dst, $src" %}
25414   ins_encode %{
25415     int vlen_enc = vector_length_encoding(this);
25416     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25417   %}
25418   ins_pipe(pipe_slow);
25419 %}
25420 
25421 instruct vector_sqrt_HF_mem(vec dst, memory src)
25422 %{
25423   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25424   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25425   ins_encode %{
25426     int vlen_enc = vector_length_encoding(this);
25427     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25428   %}
25429   ins_pipe(pipe_slow);
25430 %}
25431 
25432 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25433 %{
25434   match(Set dst (AddVHF src1 src2));
25435   match(Set dst (DivVHF src1 src2));
25436   match(Set dst (MulVHF src1 src2));
25437   match(Set dst (SubVHF src1 src2));
25438   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25439   ins_encode %{
25440     int vlen_enc = vector_length_encoding(this);
25441     int opcode = this->ideal_Opcode();
25442     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25443   %}
25444   ins_pipe(pipe_slow);
25445 %}
25446 
25447 
25448 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25449 %{
25450   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25451   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25452   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25453   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25454   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25455   ins_encode %{
25456     int vlen_enc = vector_length_encoding(this);
25457     int opcode = this->ideal_Opcode();
25458     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25459   %}
25460   ins_pipe(pipe_slow);
25461 %}
25462 
25463 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25464 %{
25465   match(Set dst (FmaVHF src2 (Binary dst src1)));
25466   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25467   ins_encode %{
25468     int vlen_enc = vector_length_encoding(this);
25469     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25470   %}
25471   ins_pipe( pipe_slow );
25472 %}
25473 
25474 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25475 %{
25476   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25477   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25478   ins_encode %{
25479     int vlen_enc = vector_length_encoding(this);
25480     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25481   %}
25482   ins_pipe( pipe_slow );
25483 %}
25484 
25485 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25486 %{
25487   predicate(VM_Version::supports_avx10_2());
25488   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25489   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25490   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25491   ins_encode %{
25492     int vlen_enc = vector_length_encoding(this);
25493     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25494     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25495   %}
25496   ins_pipe( pipe_slow );
25497 %}
25498 
25499 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25500 %{
25501   predicate(VM_Version::supports_avx10_2());
25502   match(Set dst (MinVHF src1 src2));
25503   match(Set dst (MaxVHF src1 src2));
25504   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25505   ins_encode %{
25506     int vlen_enc = vector_length_encoding(this);
25507     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25508     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25509   %}
25510   ins_pipe( pipe_slow );
25511 %}
25512 
25513 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25514 %{
25515   predicate(!VM_Version::supports_avx10_2());
25516   match(Set dst (MinVHF src1 src2));
25517   match(Set dst (MaxVHF src1 src2));
25518   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520   ins_encode %{
25521     int vlen_enc = vector_length_encoding(this);
25522     int opcode = this->ideal_Opcode();
25523     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25524                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25525   %}
25526   ins_pipe( pipe_slow );
25527 %}
25528 
25529 //----------PEEPHOLE RULES-----------------------------------------------------
25530 // These must follow all instruction definitions as they use the names
25531 // defined in the instructions definitions.
25532 //
25533 // peeppredicate ( rule_predicate );
25534 // // the predicate unless which the peephole rule will be ignored
25535 //
25536 // peepmatch ( root_instr_name [preceding_instruction]* );
25537 //
25538 // peepprocedure ( procedure_name );
25539 // // provide a procedure name to perform the optimization, the procedure should
25540 // // reside in the architecture dependent peephole file, the method has the
25541 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25542 // // with the arguments being the basic block, the current node index inside the
25543 // // block, the register allocator, the functions upon invoked return a new node
25544 // // defined in peepreplace, and the rules of the nodes appearing in the
25545 // // corresponding peepmatch, the function return true if successful, else
25546 // // return false
25547 //
25548 // peepconstraint %{
25549 // (instruction_number.operand_name relational_op instruction_number.operand_name
25550 //  [, ...] );
25551 // // instruction numbers are zero-based using left to right order in peepmatch
25552 //
25553 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25554 // // provide an instruction_number.operand_name for each operand that appears
25555 // // in the replacement instruction's match rule
25556 //
25557 // ---------VM FLAGS---------------------------------------------------------
25558 //
25559 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25560 //
25561 // Each peephole rule is given an identifying number starting with zero and
25562 // increasing by one in the order seen by the parser.  An individual peephole
25563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25564 // on the command-line.
25565 //
25566 // ---------CURRENT LIMITATIONS----------------------------------------------
25567 //
25568 // Only transformations inside a basic block (do we need more for peephole)
25569 //
25570 // ---------EXAMPLE----------------------------------------------------------
25571 //
25572 // // pertinent parts of existing instructions in architecture description
25573 // instruct movI(rRegI dst, rRegI src)
25574 // %{
25575 //   match(Set dst (CopyI src));
25576 // %}
25577 //
25578 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25579 // %{
25580 //   match(Set dst (AddI dst src));
25581 //   effect(KILL cr);
25582 // %}
25583 //
25584 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25585 // %{
25586 //   match(Set dst (AddI dst src));
25587 // %}
25588 //
25589 // 1. Simple replacement
25590 // - Only match adjacent instructions in same basic block
25591 // - Only equality constraints
25592 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25593 // - Only one replacement instruction
25594 //
25595 // // Change (inc mov) to lea
25596 // peephole %{
25597 //   // lea should only be emitted when beneficial
25598 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25599 //   // increment preceded by register-register move
25600 //   peepmatch ( incI_rReg movI );
25601 //   // require that the destination register of the increment
25602 //   // match the destination register of the move
25603 //   peepconstraint ( 0.dst == 1.dst );
25604 //   // construct a replacement instruction that sets
25605 //   // the destination to ( move's source register + one )
25606 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25607 // %}
25608 //
25609 // 2. Procedural replacement
25610 // - More flexible finding relevent nodes
25611 // - More flexible constraints
25612 // - More flexible transformations
25613 // - May utilise architecture-dependent API more effectively
25614 // - Currently only one replacement instruction due to adlc parsing capabilities
25615 //
25616 // // Change (inc mov) to lea
25617 // peephole %{
25618 //   // lea should only be emitted when beneficial
25619 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25620 //   // the rule numbers of these nodes inside are passed into the function below
25621 //   peepmatch ( incI_rReg movI );
25622 //   // the method that takes the responsibility of transformation
25623 //   peepprocedure ( inc_mov_to_lea );
25624 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25625 //   // node is passed into the function above
25626 //   peepreplace ( leaI_rReg_immI() );
25627 // %}
25628 
25629 // These instructions is not matched by the matcher but used by the peephole
25630 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25631 %{
25632   predicate(false);
25633   match(Set dst (AddI src1 src2));
25634   format %{ "leal    $dst, [$src1 + $src2]" %}
25635   ins_encode %{
25636     Register dst = $dst$$Register;
25637     Register src1 = $src1$$Register;
25638     Register src2 = $src2$$Register;
25639     if (src1 != rbp && src1 != r13) {
25640       __ leal(dst, Address(src1, src2, Address::times_1));
25641     } else {
25642       assert(src2 != rbp && src2 != r13, "");
25643       __ leal(dst, Address(src2, src1, Address::times_1));
25644     }
25645   %}
25646   ins_pipe(ialu_reg_reg);
25647 %}
25648 
25649 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25650 %{
25651   predicate(false);
25652   match(Set dst (AddI src1 src2));
25653   format %{ "leal    $dst, [$src1 + $src2]" %}
25654   ins_encode %{
25655     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25656   %}
25657   ins_pipe(ialu_reg_reg);
25658 %}
25659 
25660 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25661 %{
25662   predicate(false);
25663   match(Set dst (LShiftI src shift));
25664   format %{ "leal    $dst, [$src << $shift]" %}
25665   ins_encode %{
25666     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25667     Register src = $src$$Register;
25668     if (scale == Address::times_2 && src != rbp && src != r13) {
25669       __ leal($dst$$Register, Address(src, src, Address::times_1));
25670     } else {
25671       __ leal($dst$$Register, Address(noreg, src, scale));
25672     }
25673   %}
25674   ins_pipe(ialu_reg_reg);
25675 %}
25676 
25677 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25678 %{
25679   predicate(false);
25680   match(Set dst (AddL src1 src2));
25681   format %{ "leaq    $dst, [$src1 + $src2]" %}
25682   ins_encode %{
25683     Register dst = $dst$$Register;
25684     Register src1 = $src1$$Register;
25685     Register src2 = $src2$$Register;
25686     if (src1 != rbp && src1 != r13) {
25687       __ leaq(dst, Address(src1, src2, Address::times_1));
25688     } else {
25689       assert(src2 != rbp && src2 != r13, "");
25690       __ leaq(dst, Address(src2, src1, Address::times_1));
25691     }
25692   %}
25693   ins_pipe(ialu_reg_reg);
25694 %}
25695 
25696 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25697 %{
25698   predicate(false);
25699   match(Set dst (AddL src1 src2));
25700   format %{ "leaq    $dst, [$src1 + $src2]" %}
25701   ins_encode %{
25702     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25703   %}
25704   ins_pipe(ialu_reg_reg);
25705 %}
25706 
25707 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25708 %{
25709   predicate(false);
25710   match(Set dst (LShiftL src shift));
25711   format %{ "leaq    $dst, [$src << $shift]" %}
25712   ins_encode %{
25713     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25714     Register src = $src$$Register;
25715     if (scale == Address::times_2 && src != rbp && src != r13) {
25716       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25717     } else {
25718       __ leaq($dst$$Register, Address(noreg, src, scale));
25719     }
25720   %}
25721   ins_pipe(ialu_reg_reg);
25722 %}
25723 
25724 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25725 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25726 // processors with at least partial ALU support for lea
25727 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25728 // beneficial for processors with full ALU support
25729 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25730 
25731 peephole
25732 %{
25733   peeppredicate(VM_Version::supports_fast_2op_lea());
25734   peepmatch (addI_rReg);
25735   peepprocedure (lea_coalesce_reg);
25736   peepreplace (leaI_rReg_rReg_peep());
25737 %}
25738 
25739 peephole
25740 %{
25741   peeppredicate(VM_Version::supports_fast_2op_lea());
25742   peepmatch (addI_rReg_imm);
25743   peepprocedure (lea_coalesce_imm);
25744   peepreplace (leaI_rReg_immI_peep());
25745 %}
25746 
25747 peephole
25748 %{
25749   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25750                 VM_Version::is_intel_cascade_lake());
25751   peepmatch (incI_rReg);
25752   peepprocedure (lea_coalesce_imm);
25753   peepreplace (leaI_rReg_immI_peep());
25754 %}
25755 
25756 peephole
25757 %{
25758   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759                 VM_Version::is_intel_cascade_lake());
25760   peepmatch (decI_rReg);
25761   peepprocedure (lea_coalesce_imm);
25762   peepreplace (leaI_rReg_immI_peep());
25763 %}
25764 
25765 peephole
25766 %{
25767   peeppredicate(VM_Version::supports_fast_2op_lea());
25768   peepmatch (salI_rReg_immI2);
25769   peepprocedure (lea_coalesce_imm);
25770   peepreplace (leaI_rReg_immI2_peep());
25771 %}
25772 
25773 peephole
25774 %{
25775   peeppredicate(VM_Version::supports_fast_2op_lea());
25776   peepmatch (addL_rReg);
25777   peepprocedure (lea_coalesce_reg);
25778   peepreplace (leaL_rReg_rReg_peep());
25779 %}
25780 
25781 peephole
25782 %{
25783   peeppredicate(VM_Version::supports_fast_2op_lea());
25784   peepmatch (addL_rReg_imm);
25785   peepprocedure (lea_coalesce_imm);
25786   peepreplace (leaL_rReg_immL32_peep());
25787 %}
25788 
25789 peephole
25790 %{
25791   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25792                 VM_Version::is_intel_cascade_lake());
25793   peepmatch (incL_rReg);
25794   peepprocedure (lea_coalesce_imm);
25795   peepreplace (leaL_rReg_immL32_peep());
25796 %}
25797 
25798 peephole
25799 %{
25800   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25801                 VM_Version::is_intel_cascade_lake());
25802   peepmatch (decL_rReg);
25803   peepprocedure (lea_coalesce_imm);
25804   peepreplace (leaL_rReg_immL32_peep());
25805 %}
25806 
25807 peephole
25808 %{
25809   peeppredicate(VM_Version::supports_fast_2op_lea());
25810   peepmatch (salL_rReg_immI2);
25811   peepprocedure (lea_coalesce_imm);
25812   peepreplace (leaL_rReg_immI2_peep());
25813 %}
25814 
25815 peephole
25816 %{
25817   peepmatch (leaPCompressedOopOffset);
25818   peepprocedure (lea_remove_redundant);
25819 %}
25820 
25821 peephole
25822 %{
25823   peepmatch (leaP8Narrow);
25824   peepprocedure (lea_remove_redundant);
25825 %}
25826 
25827 peephole
25828 %{
25829   peepmatch (leaP32Narrow);
25830   peepprocedure (lea_remove_redundant);
25831 %}
25832 
25833 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25834 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25835 
25836 //int variant
25837 peephole
25838 %{
25839   peepmatch (testI_reg);
25840   peepprocedure (test_may_remove);
25841 %}
25842 
25843 //long variant
25844 peephole
25845 %{
25846   peepmatch (testL_reg);
25847   peepprocedure (test_may_remove);
25848 %}
25849 
25850 
25851 //----------SMARTSPILL RULES---------------------------------------------------
25852 // These must follow all instruction definitions as they use the names
25853 // defined in the instructions definitions.