1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je                  #
 1746 // |-jz -> a | b       # a & b
 1747 // |    -> a           #
 1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1749                             XMMRegister a, XMMRegister b,
 1750                             XMMRegister xmmt, Register rt,
 1751                             bool min, enum FP_PREC pt) {
 1752 
 1753   Label nan, zero, below, above, done;
 1754 
 1755   emit_fp_ucom(masm, pt, a, b);
 1756 
 1757   if (dst->encoding() != (min ? b : a)->encoding()) {
 1758     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1759   } else {
 1760     __ jccb(Assembler::above, done);
 1761   }
 1762 
 1763   __ jccb(Assembler::parity, nan);  // PF=1
 1764   __ jccb(Assembler::below, below); // CF=1
 1765 
 1766   // equal
 1767   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1768   emit_fp_ucom(masm, pt, a, xmmt);
 1769 
 1770   __ jccb(Assembler::equal, zero);
 1771   movfp(masm, pt, dst, a, rt);
 1772 
 1773   __ jmp(done);
 1774 
 1775   __ bind(zero);
 1776   if (min) {
 1777     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1778   } else {
 1779     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1780   }
 1781 
 1782   __ jmp(done);
 1783 
 1784   __ bind(above);
 1785   movfp(masm, pt, dst, min ? b : a, rt);
 1786 
 1787   __ jmp(done);
 1788 
 1789   __ bind(nan);
 1790   if (pt == fp_prec_hlf) {
 1791     __ movl(rt, 0x00007e00); // Float16.NaN
 1792     __ evmovw(dst, rt);
 1793   } else if (pt == fp_prec_flt) {
 1794     __ movl(rt, 0x7fc00000); // Float.NaN
 1795     __ movdl(dst, rt);
 1796   } else {
 1797     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1798     __ movdq(dst, rt);
 1799   }
 1800   __ jmp(done);
 1801 
 1802   __ bind(below);
 1803   movfp(masm, pt, dst, min ? a : b, rt);
 1804 
 1805   __ bind(done);
 1806 }
 1807 
 1808 //=============================================================================
 1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1810 
 1811 int ConstantTable::calculate_table_base_offset() const {
 1812   return 0;  // absolute addressing, no offset
 1813 }
 1814 
 1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1817   ShouldNotReachHere();
 1818 }
 1819 
 1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1821   // Empty encoding
 1822 }
 1823 
 1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1825   return 0;
 1826 }
 1827 
 1828 #ifndef PRODUCT
 1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1830   st->print("# MachConstantBaseNode (empty encoding)");
 1831 }
 1832 #endif
 1833 
 1834 
 1835 //=============================================================================
 1836 #ifndef PRODUCT
 1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1838   Compile* C = ra_->C;
 1839 
 1840   int framesize = C->output()->frame_size_in_bytes();
 1841   int bangsize = C->output()->bang_size_in_bytes();
 1842   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1843   // Remove wordSize for return addr which is already pushed.
 1844   framesize -= wordSize;
 1845 
 1846   if (C->output()->need_stack_bang(bangsize)) {
 1847     framesize -= wordSize;
 1848     st->print("# stack bang (%d bytes)", bangsize);
 1849     st->print("\n\t");
 1850     st->print("pushq   rbp\t# Save rbp");
 1851     if (PreserveFramePointer) {
 1852         st->print("\n\t");
 1853         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1854     }
 1855     if (framesize) {
 1856       st->print("\n\t");
 1857       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1858     }
 1859   } else {
 1860     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1861     st->print("\n\t");
 1862     framesize -= wordSize;
 1863     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1864     if (PreserveFramePointer) {
 1865       st->print("\n\t");
 1866       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1867       if (framesize > 0) {
 1868         st->print("\n\t");
 1869         st->print("addq    rbp, #%d", framesize);
 1870       }
 1871     }
 1872   }
 1873 
 1874   if (VerifyStackAtCalls) {
 1875     st->print("\n\t");
 1876     framesize -= wordSize;
 1877     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1878 #ifdef ASSERT
 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP
 1954   framesize -= 2*wordSize;
 1955 
 1956   if (framesize) {
 1957     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1958     st->print("\t");
 1959   }
 1960 
 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 
 2038 static enum RC rc_class(OptoReg::Name reg)
 2039 {
 2040   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2041 
 2042   if (OptoReg::is_stack(reg)) return rc_stack;
 2043 
 2044   VMReg r = OptoReg::as_VMReg(reg);
 2045 
 2046   if (r->is_Register()) return rc_int;
 2047 
 2048   if (r->is_KRegister()) return rc_kreg;
 2049 
 2050   assert(r->is_XMMRegister(), "must be");
 2051   return rc_float;
 2052 }
 2053 
 2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2056                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2057 
 2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2059                      int stack_offset, int reg, uint ireg, outputStream* st);
 2060 
 2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2062                                       int dst_offset, uint ireg, outputStream* st) {
 2063   if (masm) {
 2064     switch (ireg) {
 2065     case Op_VecS:
 2066       __ movq(Address(rsp, -8), rax);
 2067       __ movl(rax, Address(rsp, src_offset));
 2068       __ movl(Address(rsp, dst_offset), rax);
 2069       __ movq(rax, Address(rsp, -8));
 2070       break;
 2071     case Op_VecD:
 2072       __ pushq(Address(rsp, src_offset));
 2073       __ popq (Address(rsp, dst_offset));
 2074       break;
 2075     case Op_VecX:
 2076       __ pushq(Address(rsp, src_offset));
 2077       __ popq (Address(rsp, dst_offset));
 2078       __ pushq(Address(rsp, src_offset+8));
 2079       __ popq (Address(rsp, dst_offset+8));
 2080       break;
 2081     case Op_VecY:
 2082       __ vmovdqu(Address(rsp, -32), xmm0);
 2083       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2084       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2085       __ vmovdqu(xmm0, Address(rsp, -32));
 2086       break;
 2087     case Op_VecZ:
 2088       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2089       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2090       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2091       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2092       break;
 2093     default:
 2094       ShouldNotReachHere();
 2095     }
 2096 #ifndef PRODUCT
 2097   } else {
 2098     switch (ireg) {
 2099     case Op_VecS:
 2100       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2101                 "movl    rax, [rsp + #%d]\n\t"
 2102                 "movl    [rsp + #%d], rax\n\t"
 2103                 "movq    rax, [rsp - #8]",
 2104                 src_offset, dst_offset);
 2105       break;
 2106     case Op_VecD:
 2107       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2108                 "popq    [rsp + #%d]",
 2109                 src_offset, dst_offset);
 2110       break;
 2111      case Op_VecX:
 2112       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2113                 "popq    [rsp + #%d]\n\t"
 2114                 "pushq   [rsp + #%d]\n\t"
 2115                 "popq    [rsp + #%d]",
 2116                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2117       break;
 2118     case Op_VecY:
 2119       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2120                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2121                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2122                 "vmovdqu xmm0, [rsp - #32]",
 2123                 src_offset, dst_offset);
 2124       break;
 2125     case Op_VecZ:
 2126       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2127                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2128                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2129                 "vmovdqu xmm0, [rsp - #64]",
 2130                 src_offset, dst_offset);
 2131       break;
 2132     default:
 2133       ShouldNotReachHere();
 2134     }
 2135 #endif
 2136   }
 2137 }
 2138 
 2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2140                                        PhaseRegAlloc* ra_,
 2141                                        bool do_size,
 2142                                        outputStream* st) const {
 2143   assert(masm != nullptr || st  != nullptr, "sanity");
 2144   // Get registers to move
 2145   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2146   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2147   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2148   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2149 
 2150   enum RC src_second_rc = rc_class(src_second);
 2151   enum RC src_first_rc = rc_class(src_first);
 2152   enum RC dst_second_rc = rc_class(dst_second);
 2153   enum RC dst_first_rc = rc_class(dst_first);
 2154 
 2155   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2156          "must move at least 1 register" );
 2157 
 2158   if (src_first == dst_first && src_second == dst_second) {
 2159     // Self copy, no move
 2160     return 0;
 2161   }
 2162   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2163     uint ireg = ideal_reg();
 2164     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2165     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2166     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2167       // mem -> mem
 2168       int src_offset = ra_->reg2offset(src_first);
 2169       int dst_offset = ra_->reg2offset(dst_first);
 2170       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2171     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2172       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2173     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2174       int stack_offset = ra_->reg2offset(dst_first);
 2175       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2176     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2177       int stack_offset = ra_->reg2offset(src_first);
 2178       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2179     } else {
 2180       ShouldNotReachHere();
 2181     }
 2182     return 0;
 2183   }
 2184   if (src_first_rc == rc_stack) {
 2185     // mem ->
 2186     if (dst_first_rc == rc_stack) {
 2187       // mem -> mem
 2188       assert(src_second != dst_first, "overlap");
 2189       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2190           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2191         // 64-bit
 2192         int src_offset = ra_->reg2offset(src_first);
 2193         int dst_offset = ra_->reg2offset(dst_first);
 2194         if (masm) {
 2195           __ pushq(Address(rsp, src_offset));
 2196           __ popq (Address(rsp, dst_offset));
 2197 #ifndef PRODUCT
 2198         } else {
 2199           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2200                     "popq    [rsp + #%d]",
 2201                      src_offset, dst_offset);
 2202 #endif
 2203         }
 2204       } else {
 2205         // 32-bit
 2206         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2207         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2208         // No pushl/popl, so:
 2209         int src_offset = ra_->reg2offset(src_first);
 2210         int dst_offset = ra_->reg2offset(dst_first);
 2211         if (masm) {
 2212           __ movq(Address(rsp, -8), rax);
 2213           __ movl(rax, Address(rsp, src_offset));
 2214           __ movl(Address(rsp, dst_offset), rax);
 2215           __ movq(rax, Address(rsp, -8));
 2216 #ifndef PRODUCT
 2217         } else {
 2218           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2219                     "movl    rax, [rsp + #%d]\n\t"
 2220                     "movl    [rsp + #%d], rax\n\t"
 2221                     "movq    rax, [rsp - #8]",
 2222                      src_offset, dst_offset);
 2223 #endif
 2224         }
 2225       }
 2226       return 0;
 2227     } else if (dst_first_rc == rc_int) {
 2228       // mem -> gpr
 2229       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2230           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2231         // 64-bit
 2232         int offset = ra_->reg2offset(src_first);
 2233         if (masm) {
 2234           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2235 #ifndef PRODUCT
 2236         } else {
 2237           st->print("movq    %s, [rsp + #%d]\t# spill",
 2238                      Matcher::regName[dst_first],
 2239                      offset);
 2240 #endif
 2241         }
 2242       } else {
 2243         // 32-bit
 2244         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2245         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2246         int offset = ra_->reg2offset(src_first);
 2247         if (masm) {
 2248           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2249 #ifndef PRODUCT
 2250         } else {
 2251           st->print("movl    %s, [rsp + #%d]\t# spill",
 2252                      Matcher::regName[dst_first],
 2253                      offset);
 2254 #endif
 2255         }
 2256       }
 2257       return 0;
 2258     } else if (dst_first_rc == rc_float) {
 2259       // mem-> xmm
 2260       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2261           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2262         // 64-bit
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("%s  %s, [rsp + #%d]\t# spill",
 2269                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2270                      Matcher::regName[dst_first],
 2271                      offset);
 2272 #endif
 2273         }
 2274       } else {
 2275         // 32-bit
 2276         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2277         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2278         int offset = ra_->reg2offset(src_first);
 2279         if (masm) {
 2280           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2281 #ifndef PRODUCT
 2282         } else {
 2283           st->print("movss   %s, [rsp + #%d]\t# spill",
 2284                      Matcher::regName[dst_first],
 2285                      offset);
 2286 #endif
 2287         }
 2288       }
 2289       return 0;
 2290     } else if (dst_first_rc == rc_kreg) {
 2291       // mem -> kreg
 2292       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2293           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2294         // 64-bit
 2295         int offset = ra_->reg2offset(src_first);
 2296         if (masm) {
 2297           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2298 #ifndef PRODUCT
 2299         } else {
 2300           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2301                      Matcher::regName[dst_first],
 2302                      offset);
 2303 #endif
 2304         }
 2305       }
 2306       return 0;
 2307     }
 2308   } else if (src_first_rc == rc_int) {
 2309     // gpr ->
 2310     if (dst_first_rc == rc_stack) {
 2311       // gpr -> mem
 2312       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2313           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2314         // 64-bit
 2315         int offset = ra_->reg2offset(dst_first);
 2316         if (masm) {
 2317           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2318 #ifndef PRODUCT
 2319         } else {
 2320           st->print("movq    [rsp + #%d], %s\t# spill",
 2321                      offset,
 2322                      Matcher::regName[src_first]);
 2323 #endif
 2324         }
 2325       } else {
 2326         // 32-bit
 2327         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2328         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2329         int offset = ra_->reg2offset(dst_first);
 2330         if (masm) {
 2331           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2332 #ifndef PRODUCT
 2333         } else {
 2334           st->print("movl    [rsp + #%d], %s\t# spill",
 2335                      offset,
 2336                      Matcher::regName[src_first]);
 2337 #endif
 2338         }
 2339       }
 2340       return 0;
 2341     } else if (dst_first_rc == rc_int) {
 2342       // gpr -> gpr
 2343       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2344           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2345         // 64-bit
 2346         if (masm) {
 2347           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movq    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       } else {
 2358         // 32-bit
 2359         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2360         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2361         if (masm) {
 2362           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2363                   as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movl    %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371         return 0;
 2372       }
 2373     } else if (dst_first_rc == rc_float) {
 2374       // gpr -> xmm
 2375       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2376           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2377         // 64-bit
 2378         if (masm) {
 2379           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2380 #ifndef PRODUCT
 2381         } else {
 2382           st->print("movdq   %s, %s\t# spill",
 2383                      Matcher::regName[dst_first],
 2384                      Matcher::regName[src_first]);
 2385 #endif
 2386         }
 2387       } else {
 2388         // 32-bit
 2389         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2390         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2391         if (masm) {
 2392           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393 #ifndef PRODUCT
 2394         } else {
 2395           st->print("movdl   %s, %s\t# spill",
 2396                      Matcher::regName[dst_first],
 2397                      Matcher::regName[src_first]);
 2398 #endif
 2399         }
 2400       }
 2401       return 0;
 2402     } else if (dst_first_rc == rc_kreg) {
 2403       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2404           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2405         // 64-bit
 2406         if (masm) {
 2407           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2408   #ifndef PRODUCT
 2409         } else {
 2410            st->print("kmovq   %s, %s\t# spill",
 2411                        Matcher::regName[dst_first],
 2412                        Matcher::regName[src_first]);
 2413   #endif
 2414         }
 2415       }
 2416       Unimplemented();
 2417       return 0;
 2418     }
 2419   } else if (src_first_rc == rc_float) {
 2420     // xmm ->
 2421     if (dst_first_rc == rc_stack) {
 2422       // xmm -> mem
 2423       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2424           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2425         // 64-bit
 2426         int offset = ra_->reg2offset(dst_first);
 2427         if (masm) {
 2428           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2429 #ifndef PRODUCT
 2430         } else {
 2431           st->print("movsd   [rsp + #%d], %s\t# spill",
 2432                      offset,
 2433                      Matcher::regName[src_first]);
 2434 #endif
 2435         }
 2436       } else {
 2437         // 32-bit
 2438         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2439         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2440         int offset = ra_->reg2offset(dst_first);
 2441         if (masm) {
 2442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movss   [rsp + #%d], %s\t# spill",
 2446                      offset,
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       }
 2451       return 0;
 2452     } else if (dst_first_rc == rc_int) {
 2453       // xmm -> gpr
 2454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2456         // 64-bit
 2457         if (masm) {
 2458           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2459 #ifndef PRODUCT
 2460         } else {
 2461           st->print("movdq   %s, %s\t# spill",
 2462                      Matcher::regName[dst_first],
 2463                      Matcher::regName[src_first]);
 2464 #endif
 2465         }
 2466       } else {
 2467         // 32-bit
 2468         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2469         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2470         if (masm) {
 2471           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("movdl   %s, %s\t# spill",
 2475                      Matcher::regName[dst_first],
 2476                      Matcher::regName[src_first]);
 2477 #endif
 2478         }
 2479       }
 2480       return 0;
 2481     } else if (dst_first_rc == rc_float) {
 2482       // xmm -> xmm
 2483       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2484           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2485         // 64-bit
 2486         if (masm) {
 2487           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2488 #ifndef PRODUCT
 2489         } else {
 2490           st->print("%s  %s, %s\t# spill",
 2491                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2492                      Matcher::regName[dst_first],
 2493                      Matcher::regName[src_first]);
 2494 #endif
 2495         }
 2496       } else {
 2497         // 32-bit
 2498         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2499         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2500         if (masm) {
 2501           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2502 #ifndef PRODUCT
 2503         } else {
 2504           st->print("%s  %s, %s\t# spill",
 2505                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2506                      Matcher::regName[dst_first],
 2507                      Matcher::regName[src_first]);
 2508 #endif
 2509         }
 2510       }
 2511       return 0;
 2512     } else if (dst_first_rc == rc_kreg) {
 2513       assert(false, "Illegal spilling");
 2514       return 0;
 2515     }
 2516   } else if (src_first_rc == rc_kreg) {
 2517     if (dst_first_rc == rc_stack) {
 2518       // mem -> kreg
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         int offset = ra_->reg2offset(dst_first);
 2523         if (masm) {
 2524           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2528                      offset,
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       return 0;
 2534     } else if (dst_first_rc == rc_int) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       Unimplemented();
 2549       return 0;
 2550     } else if (dst_first_rc == rc_kreg) {
 2551       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2552           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2553         // 64-bit
 2554         if (masm) {
 2555           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2556 #ifndef PRODUCT
 2557         } else {
 2558          st->print("kmovq   %s, %s\t# spill",
 2559                      Matcher::regName[dst_first],
 2560                      Matcher::regName[src_first]);
 2561 #endif
 2562         }
 2563       }
 2564       return 0;
 2565     } else if (dst_first_rc == rc_float) {
 2566       assert(false, "Illegal spill");
 2567       return 0;
 2568     }
 2569   }
 2570 
 2571   assert(0," foo ");
 2572   Unimplemented();
 2573   return 0;
 2574 }
 2575 
 2576 #ifndef PRODUCT
 2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2578   implementation(nullptr, ra_, false, st);
 2579 }
 2580 #endif
 2581 
 2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2583   implementation(masm, ra_, false, nullptr);
 2584 }
 2585 
 2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2587   return MachNode::size(ra_);
 2588 }
 2589 
 2590 //=============================================================================
 2591 #ifndef PRODUCT
 2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_reg_first(this);
 2596   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2597             Matcher::regName[reg], offset);
 2598 }
 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 
 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif
 2660 
 2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2662                                             int oper_index) {
 2663   if (mdef == nullptr) {
 2664     return false;
 2665   }
 2666 
 2667   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2668       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2669     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2670     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2671     return false;
 2672   }
 2673 
 2674   // Complex memory operand covers multiple incoming edges needed for
 2675   // address computation. Biasing def towards any address component will not
 2676   // result in NDD demotion by assembler.
 2677   if (mdef->operand_num_edges(oper_index) != 1) {
 2678     return false;
 2679   }
 2680 
 2681   // Demotion candidate must be register mask compatible with definition.
 2682   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2683   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2684     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2685     return false;
 2686   }
 2687 
 2688   switch (oper_index) {
 2689   // First operand of MachNode corresponding to Intel APX NDD selection
 2690   // pattern can share its assigned register with definition operand if
 2691   // their live ranges do not overlap. In such a scenario we can demote
 2692   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2693   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2694   // are decorated with a special flag by instruction selector.
 2695   case 1:
 2696     return is_ndd_demotable_opr1(mdef);
 2697 
 2698   // Definition operand of commutative operation can be biased towards second
 2699   // operand.
 2700   case 2:
 2701     return is_ndd_demotable_opr2(mdef);
 2702 
 2703   // Current scheme only selects up to two biasing candidates
 2704   default:
 2705     assert(false, "unhandled operand index: %s", mdef->Name());
 2706     break;
 2707   }
 2708 
 2709   return false;
 2710 }
 2711 
 2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2713   assert(EnableVectorSupport, "sanity");
 2714   int lo = XMM0_num;
 2715   int hi = XMM0b_num;
 2716   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2717   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2718   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2719   return OptoRegPair(hi, lo);
 2720 }
 2721 
 2722 // Is this branch offset short enough that a short branch can be used?
 2723 //
 2724 // NOTE: If the platform does not provide any short branch variants, then
 2725 //       this method should return false for offset 0.
 2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2727   // The passed offset is relative to address of the branch.
 2728   // On 86 a branch displacement is calculated relative to address
 2729   // of a next instruction.
 2730   offset -= br_size;
 2731 
 2732   // the short version of jmpConUCF2 contains multiple branches,
 2733   // making the reach slightly less
 2734   if (rule == jmpConUCF2_rule)
 2735     return (-126 <= offset && offset <= 125);
 2736   return (-128 <= offset && offset <= 127);
 2737 }
 2738 
 2739 #ifdef ASSERT
 2740 // Return whether or not this register is ever used as an argument.
 2741 bool Matcher::can_be_java_arg(int reg)
 2742 {
 2743   return
 2744     reg ==  RDI_num || reg == RDI_H_num ||
 2745     reg ==  RSI_num || reg == RSI_H_num ||
 2746     reg ==  RDX_num || reg == RDX_H_num ||
 2747     reg ==  RCX_num || reg == RCX_H_num ||
 2748     reg ==   R8_num || reg ==  R8_H_num ||
 2749     reg ==   R9_num || reg ==  R9_H_num ||
 2750     reg ==  R12_num || reg == R12_H_num ||
 2751     reg == XMM0_num || reg == XMM0b_num ||
 2752     reg == XMM1_num || reg == XMM1b_num ||
 2753     reg == XMM2_num || reg == XMM2b_num ||
 2754     reg == XMM3_num || reg == XMM3b_num ||
 2755     reg == XMM4_num || reg == XMM4b_num ||
 2756     reg == XMM5_num || reg == XMM5b_num ||
 2757     reg == XMM6_num || reg == XMM6b_num ||
 2758     reg == XMM7_num || reg == XMM7b_num;
 2759 }
 2760 #endif
 2761 
 2762 uint Matcher::int_pressure_limit()
 2763 {
 2764   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2765 }
 2766 
 2767 uint Matcher::float_pressure_limit()
 2768 {
 2769   // After experiment around with different values, the following default threshold
 2770   // works best for LCM's register pressure scheduling on x64.
 2771   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2772   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2773   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2774 }
 2775 
 2776 // Register for DIVI projection of divmodI
 2777 const RegMask& Matcher::divI_proj_mask() {
 2778   return INT_RAX_REG_mask();
 2779 }
 2780 
 2781 // Register for MODI projection of divmodI
 2782 const RegMask& Matcher::modI_proj_mask() {
 2783   return INT_RDX_REG_mask();
 2784 }
 2785 
 2786 // Register for DIVL projection of divmodL
 2787 const RegMask& Matcher::divL_proj_mask() {
 2788   return LONG_RAX_REG_mask();
 2789 }
 2790 
 2791 // Register for MODL projection of divmodL
 2792 const RegMask& Matcher::modL_proj_mask() {
 2793   return LONG_RDX_REG_mask();
 2794 }
 2795 
 2796 %}
 2797 
 2798 source_hpp %{
 2799 // Header information of the source block.
 2800 // Method declarations/definitions which are used outside
 2801 // the ad-scope can conveniently be defined here.
 2802 //
 2803 // To keep related declarations/definitions/uses close together,
 2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2805 
 2806 #include "runtime/vm_version.hpp"
 2807 
 2808 class NativeJump;
 2809 
 2810 class CallStubImpl {
 2811 
 2812   //--------------------------------------------------------------
 2813   //---<  Used for optimization in Compile::shorten_branches  >---
 2814   //--------------------------------------------------------------
 2815 
 2816  public:
 2817   // Size of call trampoline stub.
 2818   static uint size_call_trampoline() {
 2819     return 0; // no call trampolines on this platform
 2820   }
 2821 
 2822   // number of relocations needed by a call trampoline stub
 2823   static uint reloc_call_trampoline() {
 2824     return 0; // no call trampolines on this platform
 2825   }
 2826 };
 2827 
 2828 class HandlerImpl {
 2829 
 2830  public:
 2831 
 2832   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2833 
 2834   static uint size_deopt_handler() {
 2835     // one call and one jmp.
 2836     return 7;
 2837   }
 2838 };
 2839 
 2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2841   switch(bytes) {
 2842     case  4: // fall-through
 2843     case  8: // fall-through
 2844     case 16: return Assembler::AVX_128bit;
 2845     case 32: return Assembler::AVX_256bit;
 2846     case 64: return Assembler::AVX_512bit;
 2847 
 2848     default: {
 2849       ShouldNotReachHere();
 2850       return Assembler::AVX_NoVec;
 2851     }
 2852   }
 2853 }
 2854 
 2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2856   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2857 }
 2858 
 2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2860   uint def_idx = use->operand_index(opnd);
 2861   Node* def = use->in(def_idx);
 2862   return vector_length_encoding(def);
 2863 }
 2864 
 2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2866   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2867          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2868 }
 2869 
 2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2871   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2872            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2873 }
 2874 
 2875 class Node::PD {
 2876 public:
 2877   enum NodeFlags : uint64_t {
 2878     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2879     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2880     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2881     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2882     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2883     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2884     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2885     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2886     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2887     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2888     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2889     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2890     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2891     _last_flag                = Flag_ndd_demotable_opr2
 2892   };
 2893 };
 2894 
 2895 %} // end source_hpp
 2896 
 2897 source %{
 2898 
 2899 #include "opto/addnode.hpp"
 2900 #include "c2_intelJccErratum_x86.hpp"
 2901 
 2902 void PhaseOutput::pd_perform_mach_node_analysis() {
 2903   if (VM_Version::has_intel_jcc_erratum()) {
 2904     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2905     _buf_sizes._code += extra_padding;
 2906   }
 2907 }
 2908 
 2909 int MachNode::pd_alignment_required() const {
 2910   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2911     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2912     return IntelJccErratum::largest_jcc_size() + 1;
 2913   } else {
 2914     return 1;
 2915   }
 2916 }
 2917 
 2918 int MachNode::compute_padding(int current_offset) const {
 2919   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2920     Compile* C = Compile::current();
 2921     PhaseOutput* output = C->output();
 2922     Block* block = output->block();
 2923     int index = output->index();
 2924     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2925   } else {
 2926     return 0;
 2927   }
 2928 }
 2929 
 2930 // Emit deopt handler code.
 2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2932 
 2933   // Note that the code buffer's insts_mark is always relative to insts.
 2934   // That's why we must use the macroassembler to generate a handler.
 2935   address base = __ start_a_stub(size_deopt_handler());
 2936   if (base == nullptr) {
 2937     ciEnv::current()->record_failure("CodeCache is full");
 2938     return 0;  // CodeBuffer::expand failed
 2939   }
 2940   int offset = __ offset();
 2941 
 2942   Label start;
 2943   __ bind(start);
 2944 
 2945   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2946 
 2947   int entry_offset = __ offset();
 2948 
 2949   __ jmp(start);
 2950 
 2951   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2952   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2953          "out of bounds read in post-call NOP check");
 2954   __ end_a_stub();
 2955   return entry_offset;
 2956 }
 2957 
 2958 static Assembler::Width widthForType(BasicType bt) {
 2959   if (bt == T_BYTE) {
 2960     return Assembler::B;
 2961   } else if (bt == T_SHORT) {
 2962     return Assembler::W;
 2963   } else if (bt == T_INT) {
 2964     return Assembler::D;
 2965   } else {
 2966     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2967     return Assembler::Q;
 2968   }
 2969 }
 2970 
 2971 //=============================================================================
 2972 
 2973   // Float masks come from different places depending on platform.
 2974   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2975   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2976   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2977   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2978   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2979   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2980   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2981   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2982   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2983   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2984   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2985   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2986   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2987   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2988   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2989   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2990   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2991   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2992   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2993 
 2994 //=============================================================================
 2995 bool Matcher::match_rule_supported(int opcode) {
 2996   if (!has_match_rule(opcode)) {
 2997     return false; // no match rule present
 2998   }
 2999   switch (opcode) {
 3000     case Op_AbsVL:
 3001     case Op_StoreVectorScatter:
 3002       if (UseAVX < 3) {
 3003         return false;
 3004       }
 3005       break;
 3006     case Op_PopCountI:
 3007     case Op_PopCountL:
 3008       if (!UsePopCountInstruction) {
 3009         return false;
 3010       }
 3011       break;
 3012     case Op_PopCountVI:
 3013       if (UseAVX < 2) {
 3014         return false;
 3015       }
 3016       break;
 3017     case Op_CompressV:
 3018     case Op_ExpandV:
 3019     case Op_PopCountVL:
 3020       if (UseAVX < 2) {
 3021         return false;
 3022       }
 3023       break;
 3024     case Op_MulVI:
 3025       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3026         return false;
 3027       }
 3028       break;
 3029     case Op_MulVL:
 3030       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3031         return false;
 3032       }
 3033       break;
 3034     case Op_MulReductionVL:
 3035       if (VM_Version::supports_avx512dq() == false) {
 3036         return false;
 3037       }
 3038       break;
 3039     case Op_AbsVB:
 3040     case Op_AbsVS:
 3041     case Op_AbsVI:
 3042     case Op_AddReductionVI:
 3043     case Op_AndReductionV:
 3044     case Op_OrReductionV:
 3045     case Op_XorReductionV:
 3046       if (UseSSE < 3) { // requires at least SSSE3
 3047         return false;
 3048       }
 3049       break;
 3050     case Op_MaxHF:
 3051     case Op_MinHF:
 3052       if (!VM_Version::supports_avx512vlbw()) {
 3053         return false;
 3054       }  // fallthrough
 3055     case Op_AddHF:
 3056     case Op_DivHF:
 3057     case Op_FmaHF:
 3058     case Op_MulHF:
 3059     case Op_ReinterpretS2HF:
 3060     case Op_ReinterpretHF2S:
 3061     case Op_SubHF:
 3062     case Op_SqrtHF:
 3063       if (!VM_Version::supports_avx512_fp16()) {
 3064         return false;
 3065       }
 3066       break;
 3067     case Op_VectorLoadShuffle:
 3068     case Op_VectorRearrange:
 3069     case Op_MulReductionVI:
 3070       if (UseSSE < 4) { // requires at least SSE4
 3071         return false;
 3072       }
 3073       break;
 3074     case Op_IsInfiniteF:
 3075     case Op_IsInfiniteD:
 3076       if (!VM_Version::supports_avx512dq()) {
 3077         return false;
 3078       }
 3079       break;
 3080     case Op_SqrtVD:
 3081     case Op_SqrtVF:
 3082     case Op_VectorMaskCmp:
 3083     case Op_VectorCastB2X:
 3084     case Op_VectorCastS2X:
 3085     case Op_VectorCastI2X:
 3086     case Op_VectorCastL2X:
 3087     case Op_VectorCastF2X:
 3088     case Op_VectorCastD2X:
 3089     case Op_VectorUCastB2X:
 3090     case Op_VectorUCastS2X:
 3091     case Op_VectorUCastI2X:
 3092     case Op_VectorMaskCast:
 3093       if (UseAVX < 1) { // enabled for AVX only
 3094         return false;
 3095       }
 3096       break;
 3097     case Op_PopulateIndex:
 3098       if (UseAVX < 2) {
 3099         return false;
 3100       }
 3101       break;
 3102     case Op_RoundVF:
 3103       if (UseAVX < 2) { // enabled for AVX2 only
 3104         return false;
 3105       }
 3106       break;
 3107     case Op_RoundVD:
 3108       if (UseAVX < 3) {
 3109         return false;  // enabled for AVX3 only
 3110       }
 3111       break;
 3112     case Op_CompareAndSwapL:
 3113     case Op_CompareAndSwapP:
 3114       break;
 3115     case Op_StrIndexOf:
 3116       if (!UseSSE42Intrinsics) {
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_StrIndexOfChar:
 3121       if (!UseSSE42Intrinsics) {
 3122         return false;
 3123       }
 3124       break;
 3125     case Op_OnSpinWait:
 3126       if (VM_Version::supports_on_spin_wait() == false) {
 3127         return false;
 3128       }
 3129       break;
 3130     case Op_MulVB:
 3131     case Op_LShiftVB:
 3132     case Op_RShiftVB:
 3133     case Op_URShiftVB:
 3134     case Op_VectorInsert:
 3135     case Op_VectorLoadMask:
 3136     case Op_VectorStoreMask:
 3137     case Op_VectorBlend:
 3138       if (UseSSE < 4) {
 3139         return false;
 3140       }
 3141       break;
 3142     case Op_MaxD:
 3143     case Op_MaxF:
 3144     case Op_MinD:
 3145     case Op_MinF:
 3146       if (UseAVX < 1) { // enabled for AVX only
 3147         return false;
 3148       }
 3149       break;
 3150     case Op_CacheWB:
 3151     case Op_CacheWBPreSync:
 3152     case Op_CacheWBPostSync:
 3153       if (!VM_Version::supports_data_cache_line_flush()) {
 3154         return false;
 3155       }
 3156       break;
 3157     case Op_ExtractB:
 3158     case Op_ExtractL:
 3159     case Op_ExtractI:
 3160     case Op_RoundDoubleMode:
 3161       if (UseSSE < 4) {
 3162         return false;
 3163       }
 3164       break;
 3165     case Op_RoundDoubleModeV:
 3166       if (VM_Version::supports_avx() == false) {
 3167         return false; // 128bit vroundpd is not available
 3168       }
 3169       break;
 3170     case Op_LoadVectorGather:
 3171     case Op_LoadVectorGatherMasked:
 3172       if (UseAVX < 2) {
 3173         return false;
 3174       }
 3175       break;
 3176     case Op_FmaF:
 3177     case Op_FmaD:
 3178     case Op_FmaVD:
 3179     case Op_FmaVF:
 3180       if (!UseFMA) {
 3181         return false;
 3182       }
 3183       break;
 3184     case Op_MacroLogicV:
 3185       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3186         return false;
 3187       }
 3188       break;
 3189 
 3190     case Op_VectorCmpMasked:
 3191     case Op_VectorMaskGen:
 3192       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3193         return false;
 3194       }
 3195       break;
 3196     case Op_VectorMaskFirstTrue:
 3197     case Op_VectorMaskLastTrue:
 3198     case Op_VectorMaskTrueCount:
 3199     case Op_VectorMaskToLong:
 3200       if (UseAVX < 1) {
 3201          return false;
 3202       }
 3203       break;
 3204     case Op_RoundF:
 3205     case Op_RoundD:
 3206       break;
 3207     case Op_CopySignD:
 3208     case Op_CopySignF:
 3209       if (UseAVX < 3)  {
 3210         return false;
 3211       }
 3212       if (!VM_Version::supports_avx512vl()) {
 3213         return false;
 3214       }
 3215       break;
 3216     case Op_CompressBits:
 3217     case Op_ExpandBits:
 3218       if (!VM_Version::supports_bmi2()) {
 3219         return false;
 3220       }
 3221       break;
 3222     case Op_CompressM:
 3223       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3224         return false;
 3225       }
 3226       break;
 3227     case Op_ConvF2HF:
 3228     case Op_ConvHF2F:
 3229       if (!VM_Version::supports_float16()) {
 3230         return false;
 3231       }
 3232       break;
 3233     case Op_VectorCastF2HF:
 3234     case Op_VectorCastHF2F:
 3235       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3236         return false;
 3237       }
 3238       break;
 3239   }
 3240   return true;  // Match rules are supported by default.
 3241 }
 3242 
 3243 //------------------------------------------------------------------------
 3244 
 3245 static inline bool is_pop_count_instr_target(BasicType bt) {
 3246   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3247          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3248 }
 3249 
 3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3251   return match_rule_supported_vector(opcode, vlen, bt);
 3252 }
 3253 
 3254 // Identify extra cases that we might want to provide match rules for vector nodes and
 3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3257   if (!match_rule_supported(opcode)) {
 3258     return false;
 3259   }
 3260   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3261   //   * SSE2 supports 128bit vectors for all types;
 3262   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3263   //   * AVX2 supports 256bit vectors for all types;
 3264   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3265   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3266   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3267   // And MaxVectorSize is taken into account as well.
 3268   if (!vector_size_supported(bt, vlen)) {
 3269     return false;
 3270   }
 3271   // Special cases which require vector length follow:
 3272   //   * implementation limitations
 3273   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3274   //   * 128bit vroundpd instruction is present only in AVX1
 3275   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3276   switch (opcode) {
 3277     case Op_MaxVHF:
 3278     case Op_MinVHF:
 3279       if (!VM_Version::supports_avx512bw()) {
 3280         return false;
 3281       }
 3282     case Op_AddVHF:
 3283     case Op_DivVHF:
 3284     case Op_FmaVHF:
 3285     case Op_MulVHF:
 3286     case Op_SubVHF:
 3287     case Op_SqrtVHF:
 3288       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3289         return false;
 3290       }
 3291       if (!VM_Version::supports_avx512_fp16()) {
 3292         return false;
 3293       }
 3294       break;
 3295     case Op_AbsVF:
 3296     case Op_NegVF:
 3297       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3298         return false; // 512bit vandps and vxorps are not available
 3299       }
 3300       break;
 3301     case Op_AbsVD:
 3302     case Op_NegVD:
 3303       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3304         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3305       }
 3306       break;
 3307     case Op_RotateRightV:
 3308     case Op_RotateLeftV:
 3309       if (bt != T_INT && bt != T_LONG) {
 3310         return false;
 3311       } // fallthrough
 3312     case Op_MacroLogicV:
 3313       if (!VM_Version::supports_evex() ||
 3314           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3315         return false;
 3316       }
 3317       break;
 3318     case Op_ClearArray:
 3319     case Op_VectorMaskGen:
 3320     case Op_VectorCmpMasked:
 3321       if (!VM_Version::supports_avx512bw()) {
 3322         return false;
 3323       }
 3324       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3325         return false;
 3326       }
 3327       break;
 3328     case Op_LoadVectorMasked:
 3329     case Op_StoreVectorMasked:
 3330       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3331         return false;
 3332       }
 3333       break;
 3334     case Op_UMinV:
 3335     case Op_UMaxV:
 3336       if (UseAVX == 0) {
 3337         return false;
 3338       }
 3339       break;
 3340     case Op_UMinReductionV:
 3341     case Op_UMaxReductionV:
 3342       if (UseAVX == 0) {
 3343         return false;
 3344       }
 3345       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_MaxV:
 3353     case Op_MinV:
 3354       if (UseSSE < 4 && is_integral_type(bt)) {
 3355         return false;
 3356       }
 3357       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3358           // Float/Double intrinsics are enabled for AVX family currently.
 3359           if (UseAVX == 0) {
 3360             return false;
 3361           }
 3362           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3363             return false;
 3364           }
 3365       }
 3366       break;
 3367     case Op_CallLeafVector:
 3368       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3369         return false;
 3370       }
 3371       break;
 3372     case Op_AddReductionVI:
 3373       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3374         return false;
 3375       }
 3376       // fallthrough
 3377     case Op_AndReductionV:
 3378     case Op_OrReductionV:
 3379     case Op_XorReductionV:
 3380       if (is_subword_type(bt) && (UseSSE < 4)) {
 3381         return false;
 3382       }
 3383       break;
 3384     case Op_MinReductionV:
 3385     case Op_MaxReductionV:
 3386       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3387         return false;
 3388       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3389         return false;
 3390       }
 3391       // Float/Double intrinsics enabled for AVX family.
 3392       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3393         return false;
 3394       }
 3395       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3396         return false;
 3397       }
 3398       break;
 3399     case Op_VectorBlend:
 3400       if (UseAVX == 0 && size_in_bits < 128) {
 3401         return false;
 3402       }
 3403       break;
 3404     case Op_VectorTest:
 3405       if (UseSSE < 4) {
 3406         return false; // Implementation limitation
 3407       } else if (size_in_bits < 32) {
 3408         return false; // Implementation limitation
 3409       }
 3410       break;
 3411     case Op_VectorLoadShuffle:
 3412     case Op_VectorRearrange:
 3413       if(vlen == 2) {
 3414         return false; // Implementation limitation due to how shuffle is loaded
 3415       } else if (size_in_bits == 256 && UseAVX < 2) {
 3416         return false; // Implementation limitation
 3417       }
 3418       break;
 3419     case Op_VectorLoadMask:
 3420     case Op_VectorMaskCast:
 3421       if (size_in_bits == 256 && UseAVX < 2) {
 3422         return false; // Implementation limitation
 3423       }
 3424       // fallthrough
 3425     case Op_VectorStoreMask:
 3426       if (vlen == 2) {
 3427         return false; // Implementation limitation
 3428       }
 3429       break;
 3430     case Op_PopulateIndex:
 3431       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3432         return false;
 3433       }
 3434       break;
 3435     case Op_VectorCastB2X:
 3436     case Op_VectorCastS2X:
 3437     case Op_VectorCastI2X:
 3438       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3439         return false;
 3440       }
 3441       break;
 3442     case Op_VectorCastL2X:
 3443       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3444         return false;
 3445       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3446         return false;
 3447       }
 3448       break;
 3449     case Op_VectorCastF2X: {
 3450         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3451         // happen after intermediate conversion to integer and special handling
 3452         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3453         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3454         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3455           return false;
 3456         }
 3457       }
 3458       // fallthrough
 3459     case Op_VectorCastD2X:
 3460       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3461         return false;
 3462       }
 3463       break;
 3464     case Op_VectorCastF2HF:
 3465     case Op_VectorCastHF2F:
 3466       if (!VM_Version::supports_f16c() &&
 3467          ((!VM_Version::supports_evex() ||
 3468          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_RoundVD:
 3473       if (!VM_Version::supports_avx512dq()) {
 3474         return false;
 3475       }
 3476       break;
 3477     case Op_MulReductionVI:
 3478       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3479         return false;
 3480       }
 3481       break;
 3482     case Op_LoadVectorGatherMasked:
 3483       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3484         return false;
 3485       }
 3486       if (is_subword_type(bt) &&
 3487          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3488           (size_in_bits < 64)                                      ||
 3489           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3490         return false;
 3491       }
 3492       break;
 3493     case Op_StoreVectorScatterMasked:
 3494     case Op_StoreVectorScatter:
 3495       if (is_subword_type(bt)) {
 3496         return false;
 3497       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3498         return false;
 3499       }
 3500       // fallthrough
 3501     case Op_LoadVectorGather:
 3502       if (!is_subword_type(bt) && size_in_bits == 64) {
 3503         return false;
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits < 64) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SaturatingAddV:
 3510     case Op_SaturatingSubV:
 3511       if (UseAVX < 1) {
 3512         return false; // Implementation limitation
 3513       }
 3514       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3515         return false;
 3516       }
 3517       break;
 3518     case Op_SelectFromTwoVector:
 3519        if (size_in_bits < 128) {
 3520          return false;
 3521        }
 3522        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3523          return false;
 3524        }
 3525        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3526          return false;
 3527        }
 3528        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3529          return false;
 3530        }
 3531        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3532          return false;
 3533        }
 3534        break;
 3535     case Op_MaskAll:
 3536       if (!VM_Version::supports_evex()) {
 3537         return false;
 3538       }
 3539       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3540         return false;
 3541       }
 3542       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3543         return false;
 3544       }
 3545       break;
 3546     case Op_VectorMaskCmp:
 3547       if (vlen < 2 || size_in_bits < 32) {
 3548         return false;
 3549       }
 3550       break;
 3551     case Op_CompressM:
 3552       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3553         return false;
 3554       }
 3555       break;
 3556     case Op_CompressV:
 3557     case Op_ExpandV:
 3558       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3559         return false;
 3560       }
 3561       if (size_in_bits < 128 ) {
 3562         return false;
 3563       }
 3564     case Op_VectorLongToMask:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3569         return false;
 3570       }
 3571       break;
 3572     case Op_SignumVD:
 3573     case Op_SignumVF:
 3574       if (UseAVX < 1) {
 3575         return false;
 3576       }
 3577       break;
 3578     case Op_PopCountVI:
 3579     case Op_PopCountVL: {
 3580         if (!is_pop_count_instr_target(bt) &&
 3581             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3582           return false;
 3583         }
 3584       }
 3585       break;
 3586     case Op_ReverseV:
 3587     case Op_ReverseBytesV:
 3588       if (UseAVX < 2) {
 3589         return false;
 3590       }
 3591       break;
 3592     case Op_CountTrailingZerosV:
 3593     case Op_CountLeadingZerosV:
 3594       if (UseAVX < 2) {
 3595         return false;
 3596       }
 3597       break;
 3598   }
 3599   return true;  // Per default match rules are supported.
 3600 }
 3601 
 3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3603   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3604   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3605   // of their non-masked counterpart with mask edge being the differentiator.
 3606   // This routine does a strict check on the existence of masked operation patterns
 3607   // by returning a default false value for all the other opcodes apart from the
 3608   // ones whose masked instruction patterns are defined in this file.
 3609   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3610     return false;
 3611   }
 3612 
 3613   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3614   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3615     return false;
 3616   }
 3617   switch(opcode) {
 3618     // Unary masked operations
 3619     case Op_AbsVB:
 3620     case Op_AbsVS:
 3621       if(!VM_Version::supports_avx512bw()) {
 3622         return false;  // Implementation limitation
 3623       }
 3624     case Op_AbsVI:
 3625     case Op_AbsVL:
 3626       return true;
 3627 
 3628     // Ternary masked operations
 3629     case Op_FmaVF:
 3630     case Op_FmaVD:
 3631       return true;
 3632 
 3633     case Op_MacroLogicV:
 3634       if(bt != T_INT && bt != T_LONG) {
 3635         return false;
 3636       }
 3637       return true;
 3638 
 3639     // Binary masked operations
 3640     case Op_AddVB:
 3641     case Op_AddVS:
 3642     case Op_SubVB:
 3643     case Op_SubVS:
 3644     case Op_MulVS:
 3645     case Op_LShiftVS:
 3646     case Op_RShiftVS:
 3647     case Op_URShiftVS:
 3648       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3649       if (!VM_Version::supports_avx512bw()) {
 3650         return false;  // Implementation limitation
 3651       }
 3652       return true;
 3653 
 3654     case Op_MulVL:
 3655       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3656       if (!VM_Version::supports_avx512dq()) {
 3657         return false;  // Implementation limitation
 3658       }
 3659       return true;
 3660 
 3661     case Op_AndV:
 3662     case Op_OrV:
 3663     case Op_XorV:
 3664     case Op_RotateRightV:
 3665     case Op_RotateLeftV:
 3666       if (bt != T_INT && bt != T_LONG) {
 3667         return false; // Implementation limitation
 3668       }
 3669       return true;
 3670 
 3671     case Op_VectorLoadMask:
 3672       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3673       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3674         return false;
 3675       }
 3676       return true;
 3677 
 3678     case Op_AddVI:
 3679     case Op_AddVL:
 3680     case Op_AddVF:
 3681     case Op_AddVD:
 3682     case Op_SubVI:
 3683     case Op_SubVL:
 3684     case Op_SubVF:
 3685     case Op_SubVD:
 3686     case Op_MulVI:
 3687     case Op_MulVF:
 3688     case Op_MulVD:
 3689     case Op_DivVF:
 3690     case Op_DivVD:
 3691     case Op_SqrtVF:
 3692     case Op_SqrtVD:
 3693     case Op_LShiftVI:
 3694     case Op_LShiftVL:
 3695     case Op_RShiftVI:
 3696     case Op_RShiftVL:
 3697     case Op_URShiftVI:
 3698     case Op_URShiftVL:
 3699     case Op_LoadVectorMasked:
 3700     case Op_StoreVectorMasked:
 3701     case Op_LoadVectorGatherMasked:
 3702     case Op_StoreVectorScatterMasked:
 3703       return true;
 3704 
 3705     case Op_UMinV:
 3706     case Op_UMaxV:
 3707       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3708         return false;
 3709       } // fallthrough
 3710     case Op_MaxV:
 3711     case Op_MinV:
 3712       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719     case Op_SaturatingAddV:
 3720     case Op_SaturatingSubV:
 3721       if (!is_subword_type(bt)) {
 3722         return false;
 3723       }
 3724       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3725         return false; // Implementation limitation
 3726       }
 3727       return true;
 3728 
 3729     case Op_VectorMaskCmp:
 3730       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3731         return false; // Implementation limitation
 3732       }
 3733       return true;
 3734 
 3735     case Op_VectorRearrange:
 3736       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3737         return false; // Implementation limitation
 3738       }
 3739       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3740         return false; // Implementation limitation
 3741       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     // Binary Logical operations
 3747     case Op_AndVMask:
 3748     case Op_OrVMask:
 3749     case Op_XorVMask:
 3750       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3751         return false; // Implementation limitation
 3752       }
 3753       return true;
 3754 
 3755     case Op_PopCountVI:
 3756     case Op_PopCountVL:
 3757       if (!is_pop_count_instr_target(bt)) {
 3758         return false;
 3759       }
 3760       return true;
 3761 
 3762     case Op_MaskAll:
 3763       return true;
 3764 
 3765     case Op_CountLeadingZerosV:
 3766       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3767         return true;
 3768       }
 3769     default:
 3770       return false;
 3771   }
 3772 }
 3773 
 3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3775   return false;
 3776 }
 3777 
 3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3780   switch (elem_bt) {
 3781     case T_BYTE:  return false;
 3782     case T_SHORT: return !VM_Version::supports_avx512bw();
 3783     case T_INT:   return !VM_Version::supports_avx();
 3784     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3785     default:
 3786       ShouldNotReachHere();
 3787       return false;
 3788   }
 3789 }
 3790 
 3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3792   // Prefer predicate if the mask type is "TypeVectMask".
 3793   return vt->isa_vectmask() != nullptr;
 3794 }
 3795 
 3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3797   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3798   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3799   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3800       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3801     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3802     return new legVecZOper();
 3803   }
 3804   if (legacy) {
 3805     switch (ideal_reg) {
 3806       case Op_VecS: return new legVecSOper();
 3807       case Op_VecD: return new legVecDOper();
 3808       case Op_VecX: return new legVecXOper();
 3809       case Op_VecY: return new legVecYOper();
 3810       case Op_VecZ: return new legVecZOper();
 3811     }
 3812   } else {
 3813     switch (ideal_reg) {
 3814       case Op_VecS: return new vecSOper();
 3815       case Op_VecD: return new vecDOper();
 3816       case Op_VecX: return new vecXOper();
 3817       case Op_VecY: return new vecYOper();
 3818       case Op_VecZ: return new vecZOper();
 3819     }
 3820   }
 3821   ShouldNotReachHere();
 3822   return nullptr;
 3823 }
 3824 
 3825 bool Matcher::is_reg2reg_move(MachNode* m) {
 3826   switch (m->rule()) {
 3827     case MoveVec2Leg_rule:
 3828     case MoveLeg2Vec_rule:
 3829     case MoveF2VL_rule:
 3830     case MoveF2LEG_rule:
 3831     case MoveVL2F_rule:
 3832     case MoveLEG2F_rule:
 3833     case MoveD2VL_rule:
 3834     case MoveD2LEG_rule:
 3835     case MoveVL2D_rule:
 3836     case MoveLEG2D_rule:
 3837       return true;
 3838     default:
 3839       return false;
 3840   }
 3841 }
 3842 
 3843 bool Matcher::is_generic_vector(MachOper* opnd) {
 3844   switch (opnd->opcode()) {
 3845     case VEC:
 3846     case LEGVEC:
 3847       return true;
 3848     default:
 3849       return false;
 3850   }
 3851 }
 3852 
 3853 //------------------------------------------------------------------------
 3854 
 3855 const RegMask* Matcher::predicate_reg_mask(void) {
 3856   return &_VECTMASK_REG_mask;
 3857 }
 3858 
 3859 // Max vector size in bytes. 0 if not supported.
 3860 int Matcher::vector_width_in_bytes(BasicType bt) {
 3861   assert(is_java_primitive(bt), "only primitive type vectors");
 3862   // SSE2 supports 128bit vectors for all types.
 3863   // AVX2 supports 256bit vectors for all types.
 3864   // AVX2/EVEX supports 512bit vectors for all types.
 3865   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3866   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3867   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3868     size = (UseAVX > 2) ? 64 : 32;
 3869   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3870     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3871   // Use flag to limit vector size.
 3872   size = MIN2(size,(int)MaxVectorSize);
 3873   // Minimum 2 values in vector (or 4 for bytes).
 3874   switch (bt) {
 3875   case T_DOUBLE:
 3876   case T_LONG:
 3877     if (size < 16) return 0;
 3878     break;
 3879   case T_FLOAT:
 3880   case T_INT:
 3881     if (size < 8) return 0;
 3882     break;
 3883   case T_BOOLEAN:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_CHAR:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_BYTE:
 3890     if (size < 4) return 0;
 3891     break;
 3892   case T_SHORT:
 3893     if (size < 4) return 0;
 3894     break;
 3895   default:
 3896     ShouldNotReachHere();
 3897   }
 3898   return size;
 3899 }
 3900 
 3901 // Limits on vector size (number of elements) loaded into vector.
 3902 int Matcher::max_vector_size(const BasicType bt) {
 3903   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3904 }
 3905 int Matcher::min_vector_size(const BasicType bt) {
 3906   int max_size = max_vector_size(bt);
 3907   // Min size which can be loaded into vector is 4 bytes.
 3908   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3909   // Support for calling svml double64 vectors
 3910   if (bt == T_DOUBLE) {
 3911     size = 1;
 3912   }
 3913   return MIN2(size,max_size);
 3914 }
 3915 
 3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3917   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3918   // by default on Cascade Lake
 3919   if (VM_Version::is_default_intel_cascade_lake()) {
 3920     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3921   }
 3922   return Matcher::max_vector_size(bt);
 3923 }
 3924 
 3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3926   return -1;
 3927 }
 3928 
 3929 // Vector ideal reg corresponding to specified size in bytes
 3930 uint Matcher::vector_ideal_reg(int size) {
 3931   assert(MaxVectorSize >= size, "");
 3932   switch(size) {
 3933     case  4: return Op_VecS;
 3934     case  8: return Op_VecD;
 3935     case 16: return Op_VecX;
 3936     case 32: return Op_VecY;
 3937     case 64: return Op_VecZ;
 3938   }
 3939   ShouldNotReachHere();
 3940   return 0;
 3941 }
 3942 
 3943 // Check for shift by small constant as well
 3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3945   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3946       shift->in(2)->get_int() <= 3 &&
 3947       // Are there other uses besides address expressions?
 3948       !matcher->is_visited(shift)) {
 3949     address_visited.set(shift->_idx); // Flag as address_visited
 3950     mstack.push(shift->in(2), Matcher::Visit);
 3951     Node *conv = shift->in(1);
 3952     // Allow Matcher to match the rule which bypass
 3953     // ConvI2L operation for an array index on LP64
 3954     // if the index value is positive.
 3955     if (conv->Opcode() == Op_ConvI2L &&
 3956         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3957         // Are there other uses besides address expressions?
 3958         !matcher->is_visited(conv)) {
 3959       address_visited.set(conv->_idx); // Flag as address_visited
 3960       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3961     } else {
 3962       mstack.push(conv, Matcher::Pre_Visit);
 3963     }
 3964     return true;
 3965   }
 3966   return false;
 3967 }
 3968 
 3969 // This function identifies sub-graphs in which a 'load' node is
 3970 // input to two different nodes, and such that it can be matched
 3971 // with BMI instructions like blsi, blsr, etc.
 3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3974 // refers to the same node.
 3975 //
 3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3977 // This is a temporary solution until we make DAGs expressible in ADL.
 3978 template<typename ConType>
 3979 class FusedPatternMatcher {
 3980   Node* _op1_node;
 3981   Node* _mop_node;
 3982   int _con_op;
 3983 
 3984   static int match_next(Node* n, int next_op, int next_op_idx) {
 3985     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3986       return -1;
 3987     }
 3988 
 3989     if (next_op_idx == -1) { // n is commutative, try rotations
 3990       if (n->in(1)->Opcode() == next_op) {
 3991         return 1;
 3992       } else if (n->in(2)->Opcode() == next_op) {
 3993         return 2;
 3994       }
 3995     } else {
 3996       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3997       if (n->in(next_op_idx)->Opcode() == next_op) {
 3998         return next_op_idx;
 3999       }
 4000     }
 4001     return -1;
 4002   }
 4003 
 4004  public:
 4005   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4006     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4007 
 4008   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4009              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4010              typename ConType::NativeType con_value) {
 4011     if (_op1_node->Opcode() != op1) {
 4012       return false;
 4013     }
 4014     if (_mop_node->outcnt() > 2) {
 4015       return false;
 4016     }
 4017     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4018     if (op1_op2_idx == -1) {
 4019       return false;
 4020     }
 4021     // Memory operation must be the other edge
 4022     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4023 
 4024     // Check that the mop node is really what we want
 4025     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4026       Node* op2_node = _op1_node->in(op1_op2_idx);
 4027       if (op2_node->outcnt() > 1) {
 4028         return false;
 4029       }
 4030       assert(op2_node->Opcode() == op2, "Should be");
 4031       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4032       if (op2_con_idx == -1) {
 4033         return false;
 4034       }
 4035       // Memory operation must be the other edge
 4036       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4037       // Check that the memory operation is the same node
 4038       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4039         // Now check the constant
 4040         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4041         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4042           return true;
 4043         }
 4044       }
 4045     }
 4046     return false;
 4047   }
 4048 };
 4049 
 4050 static bool is_bmi_pattern(Node* n, Node* m) {
 4051   assert(UseBMI1Instructions, "sanity");
 4052   if (n != nullptr && m != nullptr) {
 4053     if (m->Opcode() == Op_LoadI) {
 4054       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4055       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4056              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4057              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4058     } else if (m->Opcode() == Op_LoadL) {
 4059       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4060       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4061              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4062              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4063     }
 4064   }
 4065   return false;
 4066 }
 4067 
 4068 // Should the matcher clone input 'm' of node 'n'?
 4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4070   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4071   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4072     mstack.push(m, Visit);
 4073     return true;
 4074   }
 4075   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4076     mstack.push(m, Visit);           // m = ShiftCntV
 4077     return true;
 4078   }
 4079   if (is_encode_and_store_pattern(n, m)) {
 4080     mstack.push(m, Visit);
 4081     return true;
 4082   }
 4083   return false;
 4084 }
 4085 
 4086 // Should the Matcher clone shifts on addressing modes, expecting them
 4087 // to be subsumed into complex addressing expressions or compute them
 4088 // into registers?
 4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4090   Node *off = m->in(AddPNode::Offset);
 4091   if (off->is_Con()) {
 4092     address_visited.test_set(m->_idx); // Flag as address_visited
 4093     Node *adr = m->in(AddPNode::Address);
 4094 
 4095     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4096     // AtomicAdd is not an addressing expression.
 4097     // Cheap to find it by looking for screwy base.
 4098     if (adr->is_AddP() &&
 4099         !adr->in(AddPNode::Base)->is_top() &&
 4100         !adr->in(AddPNode::Offset)->is_Con() &&
 4101         off->get_long() == (int) (off->get_long()) && // immL32
 4102         // Are there other uses besides address expressions?
 4103         !is_visited(adr)) {
 4104       address_visited.set(adr->_idx); // Flag as address_visited
 4105       Node *shift = adr->in(AddPNode::Offset);
 4106       if (!clone_shift(shift, this, mstack, address_visited)) {
 4107         mstack.push(shift, Pre_Visit);
 4108       }
 4109       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4110       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4111     } else {
 4112       mstack.push(adr, Pre_Visit);
 4113     }
 4114 
 4115     // Clone X+offset as it also folds into most addressing expressions
 4116     mstack.push(off, Visit);
 4117     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4118     return true;
 4119   } else if (clone_shift(off, this, mstack, address_visited)) {
 4120     address_visited.test_set(m->_idx); // Flag as address_visited
 4121     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4122     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4123     return true;
 4124   }
 4125   return false;
 4126 }
 4127 
 4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4129   switch (bt) {
 4130     case BoolTest::eq:
 4131       return Assembler::eq;
 4132     case BoolTest::ne:
 4133       return Assembler::neq;
 4134     case BoolTest::le:
 4135     case BoolTest::ule:
 4136       return Assembler::le;
 4137     case BoolTest::ge:
 4138     case BoolTest::uge:
 4139       return Assembler::nlt;
 4140     case BoolTest::lt:
 4141     case BoolTest::ult:
 4142       return Assembler::lt;
 4143     case BoolTest::gt:
 4144     case BoolTest::ugt:
 4145       return Assembler::nle;
 4146     default : ShouldNotReachHere(); return Assembler::_false;
 4147   }
 4148 }
 4149 
 4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4151   switch (bt) {
 4152   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4153   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4154   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4155   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4156   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4157   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4158   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4159   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4160   }
 4161 }
 4162 
 4163 // Helper methods for MachSpillCopyNode::implementation().
 4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4165                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4166   assert(ireg == Op_VecS || // 32bit vector
 4167          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4168           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4169          "no non-adjacent vector moves" );
 4170   if (masm) {
 4171     switch (ireg) {
 4172     case Op_VecS: // copy whole register
 4173     case Op_VecD:
 4174     case Op_VecX:
 4175       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4176         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4177       } else {
 4178         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4179      }
 4180       break;
 4181     case Op_VecY:
 4182       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4183         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4184       } else {
 4185         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4186      }
 4187       break;
 4188     case Op_VecZ:
 4189       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4190       break;
 4191     default:
 4192       ShouldNotReachHere();
 4193     }
 4194 #ifndef PRODUCT
 4195   } else {
 4196     switch (ireg) {
 4197     case Op_VecS:
 4198     case Op_VecD:
 4199     case Op_VecX:
 4200       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4201       break;
 4202     case Op_VecY:
 4203     case Op_VecZ:
 4204       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4205       break;
 4206     default:
 4207       ShouldNotReachHere();
 4208     }
 4209 #endif
 4210   }
 4211 }
 4212 
 4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4214                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4215   if (masm) {
 4216     if (is_load) {
 4217       switch (ireg) {
 4218       case Op_VecS:
 4219         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecD:
 4222         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4223         break;
 4224       case Op_VecX:
 4225         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4226           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4227         } else {
 4228           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4229           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4230         }
 4231         break;
 4232       case Op_VecY:
 4233         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4234           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4235         } else {
 4236           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4237           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4238         }
 4239         break;
 4240       case Op_VecZ:
 4241         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4242         break;
 4243       default:
 4244         ShouldNotReachHere();
 4245       }
 4246     } else { // store
 4247       switch (ireg) {
 4248       case Op_VecS:
 4249         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecD:
 4252         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4253         break;
 4254       case Op_VecX:
 4255         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4256           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4257         }
 4258         else {
 4259           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4260         }
 4261         break;
 4262       case Op_VecY:
 4263         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4264           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4265         }
 4266         else {
 4267           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4268         }
 4269         break;
 4270       case Op_VecZ:
 4271         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4272         break;
 4273       default:
 4274         ShouldNotReachHere();
 4275       }
 4276     }
 4277 #ifndef PRODUCT
 4278   } else {
 4279     if (is_load) {
 4280       switch (ireg) {
 4281       case Op_VecS:
 4282         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284       case Op_VecD:
 4285         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287        case Op_VecX:
 4288         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4289         break;
 4290       case Op_VecY:
 4291       case Op_VecZ:
 4292         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4293         break;
 4294       default:
 4295         ShouldNotReachHere();
 4296       }
 4297     } else { // store
 4298       switch (ireg) {
 4299       case Op_VecS:
 4300         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302       case Op_VecD:
 4303         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305        case Op_VecX:
 4306         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4307         break;
 4308       case Op_VecY:
 4309       case Op_VecZ:
 4310         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4311         break;
 4312       default:
 4313         ShouldNotReachHere();
 4314       }
 4315     }
 4316 #endif
 4317   }
 4318 }
 4319 
 4320 template <class T>
 4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4322   int size = type2aelembytes(bt) * len;
 4323   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4324   for (int i = 0; i < len; i++) {
 4325     int offset = i * type2aelembytes(bt);
 4326     switch (bt) {
 4327       case T_BYTE: val->at(i) = con; break;
 4328       case T_SHORT: {
 4329         jshort c = con;
 4330         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4331         break;
 4332       }
 4333       case T_INT: {
 4334         jint c = con;
 4335         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4336         break;
 4337       }
 4338       case T_LONG: {
 4339         jlong c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4341         break;
 4342       }
 4343       case T_FLOAT: {
 4344         jfloat c = con;
 4345         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4346         break;
 4347       }
 4348       case T_DOUBLE: {
 4349         jdouble c = con;
 4350         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4351         break;
 4352       }
 4353       default: assert(false, "%s", type2name(bt));
 4354     }
 4355   }
 4356   return val;
 4357 }
 4358 
 4359 static inline jlong high_bit_set(BasicType bt) {
 4360   switch (bt) {
 4361     case T_BYTE:  return 0x8080808080808080;
 4362     case T_SHORT: return 0x8000800080008000;
 4363     case T_INT:   return 0x8000000080000000;
 4364     case T_LONG:  return 0x8000000000000000;
 4365     default:
 4366       ShouldNotReachHere();
 4367       return 0;
 4368   }
 4369 }
 4370 
 4371 #ifndef PRODUCT
 4372   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4373     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4374   }
 4375 #endif
 4376 
 4377   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4378     __ nop(_count);
 4379   }
 4380 
 4381   uint MachNopNode::size(PhaseRegAlloc*) const {
 4382     return _count;
 4383   }
 4384 
 4385 #ifndef PRODUCT
 4386   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4387     st->print("# breakpoint");
 4388   }
 4389 #endif
 4390 
 4391   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4392     __ int3();
 4393   }
 4394 
 4395   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4396     return MachNode::size(ra_);
 4397   }
 4398 
 4399 %}
 4400 
 4401 //----------ENCODING BLOCK-----------------------------------------------------
 4402 // This block specifies the encoding classes used by the compiler to
 4403 // output byte streams.  Encoding classes are parameterized macros
 4404 // used by Machine Instruction Nodes in order to generate the bit
 4405 // encoding of the instruction.  Operands specify their base encoding
 4406 // interface with the interface keyword.  There are currently
 4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4408 // COND_INTER.  REG_INTER causes an operand to generate a function
 4409 // which returns its register number when queried.  CONST_INTER causes
 4410 // an operand to generate a function which returns the value of the
 4411 // constant when queried.  MEMORY_INTER causes an operand to generate
 4412 // four functions which return the Base Register, the Index Register,
 4413 // the Scale Value, and the Offset Value of the operand when queried.
 4414 // COND_INTER causes an operand to generate six functions which return
 4415 // the encoding code (ie - encoding bits for the instruction)
 4416 // associated with each basic boolean condition for a conditional
 4417 // instruction.
 4418 //
 4419 // Instructions specify two basic values for encoding.  Again, a
 4420 // function is available to check if the constant displacement is an
 4421 // oop. They use the ins_encode keyword to specify their encoding
 4422 // classes (which must be a sequence of enc_class names, and their
 4423 // parameters, specified in the encoding block), and they use the
 4424 // opcode keyword to specify, in order, their primary, secondary, and
 4425 // tertiary opcode.  Only the opcode sections which a particular
 4426 // instruction needs for encoding need to be specified.
 4427 encode %{
 4428   enc_class cdql_enc(no_rax_rdx_RegI div)
 4429   %{
 4430     // Full implementation of Java idiv and irem; checks for
 4431     // special case as described in JVM spec., p.243 & p.271.
 4432     //
 4433     //         normal case                           special case
 4434     //
 4435     // input : rax: dividend                         min_int
 4436     //         reg: divisor                          -1
 4437     //
 4438     // output: rax: quotient  (= rax idiv reg)       min_int
 4439     //         rdx: remainder (= rax irem reg)       0
 4440     //
 4441     //  Code sequnce:
 4442     //
 4443     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4444     //    5:   75 07/08                jne    e <normal>
 4445     //    7:   33 d2                   xor    %edx,%edx
 4446     //  [div >= 8 -> offset + 1]
 4447     //  [REX_B]
 4448     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4449     //    c:   74 03/04                je     11 <done>
 4450     // 000000000000000e <normal>:
 4451     //    e:   99                      cltd
 4452     //  [div >= 8 -> offset + 1]
 4453     //  [REX_B]
 4454     //    f:   f7 f9                   idiv   $div
 4455     // 0000000000000011 <done>:
 4456     Label normal;
 4457     Label done;
 4458 
 4459     // cmp    $0x80000000,%eax
 4460     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4461 
 4462     // jne    e <normal>
 4463     __ jccb(Assembler::notEqual, normal);
 4464 
 4465     // xor    %edx,%edx
 4466     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4467 
 4468     // cmp    $0xffffffffffffffff,%ecx
 4469     __ cmpl($div$$Register, -1);
 4470 
 4471     // je     11 <done>
 4472     __ jccb(Assembler::equal, done);
 4473 
 4474     // <normal>
 4475     // cltd
 4476     __ bind(normal);
 4477     __ cdql();
 4478 
 4479     // idivl
 4480     // <done>
 4481     __ idivl($div$$Register);
 4482     __ bind(done);
 4483   %}
 4484 
 4485   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4486   %{
 4487     // Full implementation of Java ldiv and lrem; checks for
 4488     // special case as described in JVM spec., p.243 & p.271.
 4489     //
 4490     //         normal case                           special case
 4491     //
 4492     // input : rax: dividend                         min_long
 4493     //         reg: divisor                          -1
 4494     //
 4495     // output: rax: quotient  (= rax idiv reg)       min_long
 4496     //         rdx: remainder (= rax irem reg)       0
 4497     //
 4498     //  Code sequnce:
 4499     //
 4500     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4501     //    7:   00 00 80
 4502     //    a:   48 39 d0                cmp    %rdx,%rax
 4503     //    d:   75 08                   jne    17 <normal>
 4504     //    f:   33 d2                   xor    %edx,%edx
 4505     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4506     //   15:   74 05                   je     1c <done>
 4507     // 0000000000000017 <normal>:
 4508     //   17:   48 99                   cqto
 4509     //   19:   48 f7 f9                idiv   $div
 4510     // 000000000000001c <done>:
 4511     Label normal;
 4512     Label done;
 4513 
 4514     // mov    $0x8000000000000000,%rdx
 4515     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4516 
 4517     // cmp    %rdx,%rax
 4518     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4519 
 4520     // jne    17 <normal>
 4521     __ jccb(Assembler::notEqual, normal);
 4522 
 4523     // xor    %edx,%edx
 4524     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4525 
 4526     // cmp    $0xffffffffffffffff,$div
 4527     __ cmpq($div$$Register, -1);
 4528 
 4529     // je     1e <done>
 4530     __ jccb(Assembler::equal, done);
 4531 
 4532     // <normal>
 4533     // cqto
 4534     __ bind(normal);
 4535     __ cdqq();
 4536 
 4537     // idivq (note: must be emitted by the user of this rule)
 4538     // <done>
 4539     __ idivq($div$$Register);
 4540     __ bind(done);
 4541   %}
 4542 
 4543   enc_class clear_avx %{
 4544     DEBUG_ONLY(int off0 = __ offset());
 4545     if (generate_vzeroupper(Compile::current())) {
 4546       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4547       // Clear upper bits of YMM registers when current compiled code uses
 4548       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4549       __ vzeroupper();
 4550     }
 4551     DEBUG_ONLY(int off1 = __ offset());
 4552     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4553   %}
 4554 
 4555   enc_class Java_To_Runtime(method meth) %{
 4556     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4557     __ call(r10);
 4558     __ post_call_nop();
 4559   %}
 4560 
 4561   enc_class Java_Static_Call(method meth)
 4562   %{
 4563     // JAVA STATIC CALL
 4564     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4565     // determine who we intended to call.
 4566     if (!_method) {
 4567       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4568     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4569       // The NOP here is purely to ensure that eliding a call to
 4570       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4571       __ nop(5);
 4572       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4573     } else {
 4574       int method_index = resolved_method_index(masm);
 4575       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4576                                                   : static_call_Relocation::spec(method_index);
 4577       address mark = __ pc();
 4578       int call_offset = __ offset();
 4579       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4580       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4581         // Calls of the same statically bound method can share
 4582         // a stub to the interpreter.
 4583         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4584       } else {
 4585         // Emit stubs for static call.
 4586         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4587         __ clear_inst_mark();
 4588         if (stub == nullptr) {
 4589           ciEnv::current()->record_failure("CodeCache is full");
 4590           return;
 4591         }
 4592       }
 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }
 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.
 4633 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4634 //        |        |   in   |  3   area for Intel ret address
 4635 //     Owned by    |preserve|      Empty on Sparc.
 4636 //       SELF      +--------+
 4637 //        |        |  pad2  |  2   pad to align old SP
 4638 //        |        +--------+  1
 4639 //        |        | locks  |  0
 4640 //        |        +--------+----> OptoReg::stack0(), even aligned
 4641 //        |        |  pad1  | 11   pad to align new SP
 4642 //        |        +--------+
 4643 //        |        |        | 10
 4644 //        |        | spills |  9   spills
 4645 //        V        |        |  8   (pad0 slot for callee)
 4646 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4647 //        ^        |  out   |  7
 4648 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4649 //     Owned by    +--------+
 4650 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4651 //        |    new |preserve|      Must be even-aligned.
 4652 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4653 //        |        |        |
 4654 //
 4655 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4656 //         known from SELF's arguments and the Java calling convention.
 4657 //         Region 6-7 is determined per call site.
 4658 // Note 2: If the calling convention leaves holes in the incoming argument
 4659 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4660 //         are owned by the CALLEE.  Holes should not be necessary in the
 4661 //         incoming area, as the Java calling convention is completely under
 4662 //         the control of the AD file.  Doubles can be sorted and packed to
 4663 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4664 //         varargs C calling conventions.
 4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4666 //         even aligned with pad0 as needed.
 4667 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4668 //         region 6-11 is even aligned; it may be padded out more so that
 4669 //         the region from SP to FP meets the minimum stack alignment.
 4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4671 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4672 //         SP meets the minimum alignment.
 4673 
 4674 frame
 4675 %{
 4676   // These three registers define part of the calling convention
 4677   // between compiled code and the interpreter.
 4678   inline_cache_reg(RAX);                // Inline Cache Register
 4679 
 4680   // Optional: name the operand used by cisc-spilling to access
 4681   // [stack_pointer + offset]
 4682   cisc_spilling_operand_name(indOffset32);
 4683 
 4684   // Number of stack slots consumed by locking an object
 4685   sync_stack_slots(2);
 4686 
 4687   // Compiled code's Frame Pointer
 4688   frame_pointer(RSP);
 4689 
 4690   // Stack alignment requirement
 4691   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4692 
 4693   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4694   // for calls to C.  Supports the var-args backing area for register parms.
 4695   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4696 
 4697   // The after-PROLOG location of the return address.  Location of
 4698   // return address specifies a type (REG or STACK) and a number
 4699   // representing the register number (i.e. - use a register name) or
 4700   // stack slot.
 4701   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4702   // Otherwise, it is above the locks and verification slot and alignment word
 4703   return_addr(STACK - 2 +
 4704               align_up((Compile::current()->in_preserve_stack_slots() +
 4705                         Compile::current()->fixed_slots()),
 4706                        stack_alignment_in_slots()));
 4707 
 4708   // Location of compiled Java return values.  Same as C for now.
 4709   return_value
 4710   %{
 4711     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4712            "only return normal values");
 4713 
 4714     static const int lo[Op_RegL + 1] = {
 4715       0,
 4716       0,
 4717       RAX_num,  // Op_RegN
 4718       RAX_num,  // Op_RegI
 4719       RAX_num,  // Op_RegP
 4720       XMM0_num, // Op_RegF
 4721       XMM0_num, // Op_RegD
 4722       RAX_num   // Op_RegL
 4723     };
 4724     static const int hi[Op_RegL + 1] = {
 4725       0,
 4726       0,
 4727       OptoReg::Bad, // Op_RegN
 4728       OptoReg::Bad, // Op_RegI
 4729       RAX_H_num,    // Op_RegP
 4730       OptoReg::Bad, // Op_RegF
 4731       XMM0b_num,    // Op_RegD
 4732       RAX_H_num     // Op_RegL
 4733     };
 4734     // Excluded flags and vector registers.
 4735     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4736     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4737   %}
 4738 %}
 4739 
 4740 //----------ATTRIBUTES---------------------------------------------------------
 4741 //----------Operand Attributes-------------------------------------------------
 4742 op_attrib op_cost(0);        // Required cost attribute
 4743 
 4744 //----------Instruction Attributes---------------------------------------------
 4745 ins_attrib ins_cost(100);       // Required cost attribute
 4746 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4748                                 // a non-matching short branch variant
 4749                                 // of some long branch?
 4750 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4751                                 // be a power of 2) specifies the
 4752                                 // alignment that some part of the
 4753                                 // instruction (not necessarily the
 4754                                 // start) requires.  If > 1, a
 4755                                 // compute_padding() function must be
 4756                                 // provided for the instruction
 4757 
 4758 // Whether this node is expanded during code emission into a sequence of
 4759 // instructions and the first instruction can perform an implicit null check.
 4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4761 
 4762 //----------OPERANDS-----------------------------------------------------------
 4763 // Operand definitions must precede instruction definitions for correct parsing
 4764 // in the ADLC because operands constitute user defined types which are used in
 4765 // instruction definitions.
 4766 
 4767 //----------Simple Operands----------------------------------------------------
 4768 // Immediate Operands
 4769 // Integer Immediate
 4770 operand immI()
 4771 %{
 4772   match(ConI);
 4773 
 4774   op_cost(10);
 4775   format %{ %}
 4776   interface(CONST_INTER);
 4777 %}
 4778 
 4779 // Constant for test vs zero
 4780 operand immI_0()
 4781 %{
 4782   predicate(n->get_int() == 0);
 4783   match(ConI);
 4784 
 4785   op_cost(0);
 4786   format %{ %}
 4787   interface(CONST_INTER);
 4788 %}
 4789 
 4790 // Constant for increment
 4791 operand immI_1()
 4792 %{
 4793   predicate(n->get_int() == 1);
 4794   match(ConI);
 4795 
 4796   op_cost(0);
 4797   format %{ %}
 4798   interface(CONST_INTER);
 4799 %}
 4800 
 4801 // Constant for decrement
 4802 operand immI_M1()
 4803 %{
 4804   predicate(n->get_int() == -1);
 4805   match(ConI);
 4806 
 4807   op_cost(0);
 4808   format %{ %}
 4809   interface(CONST_INTER);
 4810 %}
 4811 
 4812 operand immI_2()
 4813 %{
 4814   predicate(n->get_int() == 2);
 4815   match(ConI);
 4816 
 4817   op_cost(0);
 4818   format %{ %}
 4819   interface(CONST_INTER);
 4820 %}
 4821 
 4822 operand immI_4()
 4823 %{
 4824   predicate(n->get_int() == 4);
 4825   match(ConI);
 4826 
 4827   op_cost(0);
 4828   format %{ %}
 4829   interface(CONST_INTER);
 4830 %}
 4831 
 4832 operand immI_8()
 4833 %{
 4834   predicate(n->get_int() == 8);
 4835   match(ConI);
 4836 
 4837   op_cost(0);
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 // Valid scale values for addressing modes
 4843 operand immI2()
 4844 %{
 4845   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4846   match(ConI);
 4847 
 4848   format %{ %}
 4849   interface(CONST_INTER);
 4850 %}
 4851 
 4852 operand immU7()
 4853 %{
 4854   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4855   match(ConI);
 4856 
 4857   op_cost(5);
 4858   format %{ %}
 4859   interface(CONST_INTER);
 4860 %}
 4861 
 4862 operand immI8()
 4863 %{
 4864   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4865   match(ConI);
 4866 
 4867   op_cost(5);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 operand immU8()
 4873 %{
 4874   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4875   match(ConI);
 4876 
 4877   op_cost(5);
 4878   format %{ %}
 4879   interface(CONST_INTER);
 4880 %}
 4881 
 4882 operand immI16()
 4883 %{
 4884   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4885   match(ConI);
 4886 
 4887   op_cost(10);
 4888   format %{ %}
 4889   interface(CONST_INTER);
 4890 %}
 4891 
 4892 // Int Immediate non-negative
 4893 operand immU31()
 4894 %{
 4895   predicate(n->get_int() >= 0);
 4896   match(ConI);
 4897 
 4898   op_cost(0);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 // Pointer Immediate
 4904 operand immP()
 4905 %{
 4906   match(ConP);
 4907 
 4908   op_cost(10);
 4909   format %{ %}
 4910   interface(CONST_INTER);
 4911 %}
 4912 
 4913 // Null Pointer Immediate
 4914 operand immP0()
 4915 %{
 4916   predicate(n->get_ptr() == 0);
 4917   match(ConP);
 4918 
 4919   op_cost(5);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 // Pointer Immediate
 4925 operand immN() %{
 4926   match(ConN);
 4927 
 4928   op_cost(10);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 operand immNKlass() %{
 4934   match(ConNKlass);
 4935 
 4936   op_cost(10);
 4937   format %{ %}
 4938   interface(CONST_INTER);
 4939 %}
 4940 
 4941 // Null Pointer Immediate
 4942 operand immN0() %{
 4943   predicate(n->get_narrowcon() == 0);
 4944   match(ConN);
 4945 
 4946   op_cost(5);
 4947   format %{ %}
 4948   interface(CONST_INTER);
 4949 %}
 4950 
 4951 operand immP31()
 4952 %{
 4953   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4954             && (n->get_ptr() >> 31) == 0);
 4955   match(ConP);
 4956 
 4957   op_cost(5);
 4958   format %{ %}
 4959   interface(CONST_INTER);
 4960 %}
 4961 
 4962 
 4963 // Long Immediate
 4964 operand immL()
 4965 %{
 4966   match(ConL);
 4967 
 4968   op_cost(20);
 4969   format %{ %}
 4970   interface(CONST_INTER);
 4971 %}
 4972 
 4973 // Long Immediate 8-bit
 4974 operand immL8()
 4975 %{
 4976   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4977   match(ConL);
 4978 
 4979   op_cost(5);
 4980   format %{ %}
 4981   interface(CONST_INTER);
 4982 %}
 4983 
 4984 // Long Immediate 32-bit unsigned
 4985 operand immUL32()
 4986 %{
 4987   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4988   match(ConL);
 4989 
 4990   op_cost(10);
 4991   format %{ %}
 4992   interface(CONST_INTER);
 4993 %}
 4994 
 4995 // Long Immediate 32-bit signed
 4996 operand immL32()
 4997 %{
 4998   predicate(n->get_long() == (int) (n->get_long()));
 4999   match(ConL);
 5000 
 5001   op_cost(15);
 5002   format %{ %}
 5003   interface(CONST_INTER);
 5004 %}
 5005 
 5006 operand immL_Pow2()
 5007 %{
 5008   predicate(is_power_of_2((julong)n->get_long()));
 5009   match(ConL);
 5010 
 5011   op_cost(15);
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 operand immL_NotPow2()
 5017 %{
 5018   predicate(is_power_of_2((julong)~n->get_long()));
 5019   match(ConL);
 5020 
 5021   op_cost(15);
 5022   format %{ %}
 5023   interface(CONST_INTER);
 5024 %}
 5025 
 5026 // Long Immediate zero
 5027 operand immL0()
 5028 %{
 5029   predicate(n->get_long() == 0L);
 5030   match(ConL);
 5031 
 5032   op_cost(10);
 5033   format %{ %}
 5034   interface(CONST_INTER);
 5035 %}
 5036 
 5037 // Constant for increment
 5038 operand immL1()
 5039 %{
 5040   predicate(n->get_long() == 1);
 5041   match(ConL);
 5042 
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 // Constant for decrement
 5048 operand immL_M1()
 5049 %{
 5050   predicate(n->get_long() == -1);
 5051   match(ConL);
 5052 
 5053   format %{ %}
 5054   interface(CONST_INTER);
 5055 %}
 5056 
 5057 // Long Immediate: low 32-bit mask
 5058 operand immL_32bits()
 5059 %{
 5060   predicate(n->get_long() == 0xFFFFFFFFL);
 5061   match(ConL);
 5062   op_cost(20);
 5063 
 5064   format %{ %}
 5065   interface(CONST_INTER);
 5066 %}
 5067 
 5068 // Int Immediate: 2^n-1, positive
 5069 operand immI_Pow2M1()
 5070 %{
 5071   predicate((n->get_int() > 0)
 5072             && is_power_of_2((juint)n->get_int() + 1));
 5073   match(ConI);
 5074 
 5075   op_cost(20);
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Float Immediate zero
 5081 operand immF0()
 5082 %{
 5083   predicate(jint_cast(n->getf()) == 0);
 5084   match(ConF);
 5085 
 5086   op_cost(5);
 5087   format %{ %}
 5088   interface(CONST_INTER);
 5089 %}
 5090 
 5091 // Float Immediate
 5092 operand immF()
 5093 %{
 5094   match(ConF);
 5095 
 5096   op_cost(15);
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Half Float Immediate
 5102 operand immH()
 5103 %{
 5104   match(ConH);
 5105 
 5106   op_cost(15);
 5107   format %{ %}
 5108   interface(CONST_INTER);
 5109 %}
 5110 
 5111 // Double Immediate zero
 5112 operand immD0()
 5113 %{
 5114   predicate(jlong_cast(n->getd()) == 0);
 5115   match(ConD);
 5116 
 5117   op_cost(5);
 5118   format %{ %}
 5119   interface(CONST_INTER);
 5120 %}
 5121 
 5122 // Double Immediate
 5123 operand immD()
 5124 %{
 5125   match(ConD);
 5126 
 5127   op_cost(15);
 5128   format %{ %}
 5129   interface(CONST_INTER);
 5130 %}
 5131 
 5132 // Immediates for special shifts (sign extend)
 5133 
 5134 // Constants for increment
 5135 operand immI_16()
 5136 %{
 5137   predicate(n->get_int() == 16);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 operand immI_24()
 5145 %{
 5146   predicate(n->get_int() == 24);
 5147   match(ConI);
 5148 
 5149   format %{ %}
 5150   interface(CONST_INTER);
 5151 %}
 5152 
 5153 // Constant for byte-wide masking
 5154 operand immI_255()
 5155 %{
 5156   predicate(n->get_int() == 255);
 5157   match(ConI);
 5158 
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Constant for short-wide masking
 5164 operand immI_65535()
 5165 %{
 5166   predicate(n->get_int() == 65535);
 5167   match(ConI);
 5168 
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Constant for byte-wide masking
 5174 operand immL_255()
 5175 %{
 5176   predicate(n->get_long() == 255);
 5177   match(ConL);
 5178 
 5179   format %{ %}
 5180   interface(CONST_INTER);
 5181 %}
 5182 
 5183 // Constant for short-wide masking
 5184 operand immL_65535()
 5185 %{
 5186   predicate(n->get_long() == 65535);
 5187   match(ConL);
 5188 
 5189   format %{ %}
 5190   interface(CONST_INTER);
 5191 %}
 5192 
 5193 // AOT Runtime Constants Address
 5194 operand immAOTRuntimeConstantsAddress()
 5195 %{
 5196   // Check if the address is in the range of AOT Runtime Constants
 5197   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5198   match(ConP);
 5199 
 5200   op_cost(0);
 5201   format %{ %}
 5202   interface(CONST_INTER);
 5203 %}
 5204 
 5205 operand kReg()
 5206 %{
 5207   constraint(ALLOC_IN_RC(vectmask_reg));
 5208   match(RegVectMask);
 5209   format %{%}
 5210   interface(REG_INTER);
 5211 %}
 5212 
 5213 // Register Operands
 5214 // Integer Register
 5215 operand rRegI()
 5216 %{
 5217   constraint(ALLOC_IN_RC(int_reg));
 5218   match(RegI);
 5219 
 5220   match(rax_RegI);
 5221   match(rbx_RegI);
 5222   match(rcx_RegI);
 5223   match(rdx_RegI);
 5224   match(rdi_RegI);
 5225 
 5226   format %{ %}
 5227   interface(REG_INTER);
 5228 %}
 5229 
 5230 // Special Registers
 5231 operand rax_RegI()
 5232 %{
 5233   constraint(ALLOC_IN_RC(int_rax_reg));
 5234   match(RegI);
 5235   match(rRegI);
 5236 
 5237   format %{ "RAX" %}
 5238   interface(REG_INTER);
 5239 %}
 5240 
 5241 // Special Registers
 5242 operand rbx_RegI()
 5243 %{
 5244   constraint(ALLOC_IN_RC(int_rbx_reg));
 5245   match(RegI);
 5246   match(rRegI);
 5247 
 5248   format %{ "RBX" %}
 5249   interface(REG_INTER);
 5250 %}
 5251 
 5252 operand rcx_RegI()
 5253 %{
 5254   constraint(ALLOC_IN_RC(int_rcx_reg));
 5255   match(RegI);
 5256   match(rRegI);
 5257 
 5258   format %{ "RCX" %}
 5259   interface(REG_INTER);
 5260 %}
 5261 
 5262 operand rdx_RegI()
 5263 %{
 5264   constraint(ALLOC_IN_RC(int_rdx_reg));
 5265   match(RegI);
 5266   match(rRegI);
 5267 
 5268   format %{ "RDX" %}
 5269   interface(REG_INTER);
 5270 %}
 5271 
 5272 operand rdi_RegI()
 5273 %{
 5274   constraint(ALLOC_IN_RC(int_rdi_reg));
 5275   match(RegI);
 5276   match(rRegI);
 5277 
 5278   format %{ "RDI" %}
 5279   interface(REG_INTER);
 5280 %}
 5281 
 5282 operand no_rax_rdx_RegI()
 5283 %{
 5284   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5285   match(RegI);
 5286   match(rbx_RegI);
 5287   match(rcx_RegI);
 5288   match(rdi_RegI);
 5289 
 5290   format %{ %}
 5291   interface(REG_INTER);
 5292 %}
 5293 
 5294 operand no_rbp_r13_RegI()
 5295 %{
 5296   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5297   match(RegI);
 5298   match(rRegI);
 5299   match(rax_RegI);
 5300   match(rbx_RegI);
 5301   match(rcx_RegI);
 5302   match(rdx_RegI);
 5303   match(rdi_RegI);
 5304 
 5305   format %{ %}
 5306   interface(REG_INTER);
 5307 %}
 5308 
 5309 // Pointer Register
 5310 operand any_RegP()
 5311 %{
 5312   constraint(ALLOC_IN_RC(any_reg));
 5313   match(RegP);
 5314   match(rax_RegP);
 5315   match(rbx_RegP);
 5316   match(rdi_RegP);
 5317   match(rsi_RegP);
 5318   match(rbp_RegP);
 5319   match(r15_RegP);
 5320   match(rRegP);
 5321 
 5322   format %{ %}
 5323   interface(REG_INTER);
 5324 %}
 5325 
 5326 operand rRegP()
 5327 %{
 5328   constraint(ALLOC_IN_RC(ptr_reg));
 5329   match(RegP);
 5330   match(rax_RegP);
 5331   match(rbx_RegP);
 5332   match(rdi_RegP);
 5333   match(rsi_RegP);
 5334   match(rbp_RegP);  // See Q&A below about
 5335   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5336 
 5337   format %{ %}
 5338   interface(REG_INTER);
 5339 %}
 5340 
 5341 operand rRegN() %{
 5342   constraint(ALLOC_IN_RC(int_reg));
 5343   match(RegN);
 5344 
 5345   format %{ %}
 5346   interface(REG_INTER);
 5347 %}
 5348 
 5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5352 // The output of an instruction is controlled by the allocator, which respects
 5353 // register class masks, not match rules.  Unless an instruction mentions
 5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5355 // by the allocator as an input.
 5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5358 // result, RBP is not included in the output of the instruction either.
 5359 
 5360 // This operand is not allowed to use RBP even if
 5361 // RBP is not used to hold the frame pointer.
 5362 operand no_rbp_RegP()
 5363 %{
 5364   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5365   match(RegP);
 5366   match(rbx_RegP);
 5367   match(rsi_RegP);
 5368   match(rdi_RegP);
 5369 
 5370   format %{ %}
 5371   interface(REG_INTER);
 5372 %}
 5373 
 5374 // Special Registers
 5375 // Return a pointer value
 5376 operand rax_RegP()
 5377 %{
 5378   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5379   match(RegP);
 5380   match(rRegP);
 5381 
 5382   format %{ %}
 5383   interface(REG_INTER);
 5384 %}
 5385 
 5386 // Special Registers
 5387 // Return a compressed pointer value
 5388 operand rax_RegN()
 5389 %{
 5390   constraint(ALLOC_IN_RC(int_rax_reg));
 5391   match(RegN);
 5392   match(rRegN);
 5393 
 5394   format %{ %}
 5395   interface(REG_INTER);
 5396 %}
 5397 
 5398 // Used in AtomicAdd
 5399 operand rbx_RegP()
 5400 %{
 5401   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5402   match(RegP);
 5403   match(rRegP);
 5404 
 5405   format %{ %}
 5406   interface(REG_INTER);
 5407 %}
 5408 
 5409 operand rsi_RegP()
 5410 %{
 5411   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5412   match(RegP);
 5413   match(rRegP);
 5414 
 5415   format %{ %}
 5416   interface(REG_INTER);
 5417 %}
 5418 
 5419 operand rbp_RegP()
 5420 %{
 5421   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5422   match(RegP);
 5423   match(rRegP);
 5424 
 5425   format %{ %}
 5426   interface(REG_INTER);
 5427 %}
 5428 
 5429 // Used in rep stosq
 5430 operand rdi_RegP()
 5431 %{
 5432   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5433   match(RegP);
 5434   match(rRegP);
 5435 
 5436   format %{ %}
 5437   interface(REG_INTER);
 5438 %}
 5439 
 5440 operand r15_RegP()
 5441 %{
 5442   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5443   match(RegP);
 5444   match(rRegP);
 5445 
 5446   format %{ %}
 5447   interface(REG_INTER);
 5448 %}
 5449 
 5450 operand rRegL()
 5451 %{
 5452   constraint(ALLOC_IN_RC(long_reg));
 5453   match(RegL);
 5454   match(rax_RegL);
 5455   match(rdx_RegL);
 5456 
 5457   format %{ %}
 5458   interface(REG_INTER);
 5459 %}
 5460 
 5461 // Special Registers
 5462 operand no_rax_rdx_RegL()
 5463 %{
 5464   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5465   match(RegL);
 5466   match(rRegL);
 5467 
 5468   format %{ %}
 5469   interface(REG_INTER);
 5470 %}
 5471 
 5472 operand rax_RegL()
 5473 %{
 5474   constraint(ALLOC_IN_RC(long_rax_reg));
 5475   match(RegL);
 5476   match(rRegL);
 5477 
 5478   format %{ "RAX" %}
 5479   interface(REG_INTER);
 5480 %}
 5481 
 5482 operand rcx_RegL()
 5483 %{
 5484   constraint(ALLOC_IN_RC(long_rcx_reg));
 5485   match(RegL);
 5486   match(rRegL);
 5487 
 5488   format %{ %}
 5489   interface(REG_INTER);
 5490 %}
 5491 
 5492 operand rdx_RegL()
 5493 %{
 5494   constraint(ALLOC_IN_RC(long_rdx_reg));
 5495   match(RegL);
 5496   match(rRegL);
 5497 
 5498   format %{ %}
 5499   interface(REG_INTER);
 5500 %}
 5501 
 5502 operand r11_RegL()
 5503 %{
 5504   constraint(ALLOC_IN_RC(long_r11_reg));
 5505   match(RegL);
 5506   match(rRegL);
 5507 
 5508   format %{ %}
 5509   interface(REG_INTER);
 5510 %}
 5511 
 5512 operand no_rbp_r13_RegL()
 5513 %{
 5514   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5515   match(RegL);
 5516   match(rRegL);
 5517   match(rax_RegL);
 5518   match(rcx_RegL);
 5519   match(rdx_RegL);
 5520 
 5521   format %{ %}
 5522   interface(REG_INTER);
 5523 %}
 5524 
 5525 // Flags register, used as output of compare instructions
 5526 operand rFlagsReg()
 5527 %{
 5528   constraint(ALLOC_IN_RC(int_flags));
 5529   match(RegFlags);
 5530 
 5531   format %{ "RFLAGS" %}
 5532   interface(REG_INTER);
 5533 %}
 5534 
 5535 // Flags register, used as output of FLOATING POINT compare instructions
 5536 operand rFlagsRegU()
 5537 %{
 5538   constraint(ALLOC_IN_RC(int_flags));
 5539   match(RegFlags);
 5540 
 5541   format %{ "RFLAGS_U" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCF() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CF" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 operand rFlagsRegUCFE() %{
 5555   constraint(ALLOC_IN_RC(int_flags));
 5556   match(RegFlags);
 5557   predicate(UseAPX && VM_Version::supports_avx10_2());
 5558 
 5559   format %{ "RFLAGS_U_CFE" %}
 5560   interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand regF() %{
 5565    constraint(ALLOC_IN_RC(float_reg));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand legRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_legacy));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Float register operands
 5582 operand vlRegF() %{
 5583    constraint(ALLOC_IN_RC(float_reg_vl));
 5584    match(RegF);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand regD() %{
 5592    constraint(ALLOC_IN_RC(double_reg));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand legRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_legacy));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 // Double register operands
 5609 operand vlRegD() %{
 5610    constraint(ALLOC_IN_RC(double_reg_vl));
 5611    match(RegD);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 //----------Memory Operands----------------------------------------------------
 5618 // Direct Memory Operand
 5619 // operand direct(immP addr)
 5620 // %{
 5621 //   match(addr);
 5622 
 5623 //   format %{ "[$addr]" %}
 5624 //   interface(MEMORY_INTER) %{
 5625 //     base(0xFFFFFFFF);
 5626 //     index(0x4);
 5627 //     scale(0x0);
 5628 //     disp($addr);
 5629 //   %}
 5630 // %}
 5631 
 5632 // Indirect Memory Operand
 5633 operand indirect(any_RegP reg)
 5634 %{
 5635   constraint(ALLOC_IN_RC(ptr_reg));
 5636   match(reg);
 5637 
 5638   format %{ "[$reg]" %}
 5639   interface(MEMORY_INTER) %{
 5640     base($reg);
 5641     index(0x4);
 5642     scale(0x0);
 5643     disp(0x0);
 5644   %}
 5645 %}
 5646 
 5647 // Indirect Memory Plus Short Offset Operand
 5648 operand indOffset8(any_RegP reg, immL8 off)
 5649 %{
 5650   constraint(ALLOC_IN_RC(ptr_reg));
 5651   match(AddP reg off);
 5652 
 5653   format %{ "[$reg + $off (8-bit)]" %}
 5654   interface(MEMORY_INTER) %{
 5655     base($reg);
 5656     index(0x4);
 5657     scale(0x0);
 5658     disp($off);
 5659   %}
 5660 %}
 5661 
 5662 // Indirect Memory Plus Long Offset Operand
 5663 operand indOffset32(any_RegP reg, immL32 off)
 5664 %{
 5665   constraint(ALLOC_IN_RC(ptr_reg));
 5666   match(AddP reg off);
 5667 
 5668   format %{ "[$reg + $off (32-bit)]" %}
 5669   interface(MEMORY_INTER) %{
 5670     base($reg);
 5671     index(0x4);
 5672     scale(0x0);
 5673     disp($off);
 5674   %}
 5675 %}
 5676 
 5677 // Indirect Memory Plus Index Register Plus Offset Operand
 5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   match(AddP (AddP reg lreg) off);
 5682 
 5683   op_cost(10);
 5684   format %{"[$reg + $off + $lreg]" %}
 5685   interface(MEMORY_INTER) %{
 5686     base($reg);
 5687     index($lreg);
 5688     scale(0x0);
 5689     disp($off);
 5690   %}
 5691 %}
 5692 
 5693 // Indirect Memory Plus Index Register Plus Offset Operand
 5694 operand indIndex(any_RegP reg, rRegL lreg)
 5695 %{
 5696   constraint(ALLOC_IN_RC(ptr_reg));
 5697   match(AddP reg lreg);
 5698 
 5699   op_cost(10);
 5700   format %{"[$reg + $lreg]" %}
 5701   interface(MEMORY_INTER) %{
 5702     base($reg);
 5703     index($lreg);
 5704     scale(0x0);
 5705     disp(0x0);
 5706   %}
 5707 %}
 5708 
 5709 // Indirect Memory Times Scale Plus Index Register
 5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5711 %{
 5712   constraint(ALLOC_IN_RC(ptr_reg));
 5713   match(AddP reg (LShiftL lreg scale));
 5714 
 5715   op_cost(10);
 5716   format %{"[$reg + $lreg << $scale]" %}
 5717   interface(MEMORY_INTER) %{
 5718     base($reg);
 5719     index($lreg);
 5720     scale($scale);
 5721     disp(0x0);
 5722   %}
 5723 %}
 5724 
 5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5726 %{
 5727   constraint(ALLOC_IN_RC(ptr_reg));
 5728   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5729   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5730 
 5731   op_cost(10);
 5732   format %{"[$reg + pos $idx << $scale]" %}
 5733   interface(MEMORY_INTER) %{
 5734     base($reg);
 5735     index($idx);
 5736     scale($scale);
 5737     disp(0x0);
 5738   %}
 5739 %}
 5740 
 5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5743 %{
 5744   constraint(ALLOC_IN_RC(ptr_reg));
 5745   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5746 
 5747   op_cost(10);
 5748   format %{"[$reg + $off + $lreg << $scale]" %}
 5749   interface(MEMORY_INTER) %{
 5750     base($reg);
 5751     index($lreg);
 5752     scale($scale);
 5753     disp($off);
 5754   %}
 5755 %}
 5756 
 5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5759 %{
 5760   constraint(ALLOC_IN_RC(ptr_reg));
 5761   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5762   match(AddP (AddP reg (ConvI2L idx)) off);
 5763 
 5764   op_cost(10);
 5765   format %{"[$reg + $off + $idx]" %}
 5766   interface(MEMORY_INTER) %{
 5767     base($reg);
 5768     index($idx);
 5769     scale(0x0);
 5770     disp($off);
 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)
 5811 %{
 5812   predicate(CompressedOops::shift() == 0);
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   match(DecodeN reg);
 5815 
 5816   format %{ "[$reg]" %}
 5817   interface(MEMORY_INTER) %{
 5818     base($reg);
 5819     index(0x4);
 5820     scale(0x0);
 5821     disp(0x0);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Memory Plus Short Offset Operand
 5826 operand indOffset8Narrow(rRegN reg, immL8 off)
 5827 %{
 5828   predicate(CompressedOops::shift() == 0);
 5829   constraint(ALLOC_IN_RC(ptr_reg));
 5830   match(AddP (DecodeN reg) off);
 5831 
 5832   format %{ "[$reg + $off (8-bit)]" %}
 5833   interface(MEMORY_INTER) %{
 5834     base($reg);
 5835     index(0x4);
 5836     scale(0x0);
 5837     disp($off);
 5838   %}
 5839 %}
 5840 
 5841 // Indirect Memory Plus Long Offset Operand
 5842 operand indOffset32Narrow(rRegN reg, immL32 off)
 5843 %{
 5844   predicate(CompressedOops::shift() == 0);
 5845   constraint(ALLOC_IN_RC(ptr_reg));
 5846   match(AddP (DecodeN reg) off);
 5847 
 5848   format %{ "[$reg + $off (32-bit)]" %}
 5849   interface(MEMORY_INTER) %{
 5850     base($reg);
 5851     index(0x4);
 5852     scale(0x0);
 5853     disp($off);
 5854   %}
 5855 %}
 5856 
 5857 // Indirect Memory Plus Index Register Plus Offset Operand
 5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5859 %{
 5860   predicate(CompressedOops::shift() == 0);
 5861   constraint(ALLOC_IN_RC(ptr_reg));
 5862   match(AddP (AddP (DecodeN reg) lreg) off);
 5863 
 5864   op_cost(10);
 5865   format %{"[$reg + $off + $lreg]" %}
 5866   interface(MEMORY_INTER) %{
 5867     base($reg);
 5868     index($lreg);
 5869     scale(0x0);
 5870     disp($off);
 5871   %}
 5872 %}
 5873 
 5874 // Indirect Memory Plus Index Register Plus Offset Operand
 5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5876 %{
 5877   predicate(CompressedOops::shift() == 0);
 5878   constraint(ALLOC_IN_RC(ptr_reg));
 5879   match(AddP (DecodeN reg) lreg);
 5880 
 5881   op_cost(10);
 5882   format %{"[$reg + $lreg]" %}
 5883   interface(MEMORY_INTER) %{
 5884     base($reg);
 5885     index($lreg);
 5886     scale(0x0);
 5887     disp(0x0);
 5888   %}
 5889 %}
 5890 
 5891 // Indirect Memory Times Scale Plus Index Register
 5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5893 %{
 5894   predicate(CompressedOops::shift() == 0);
 5895   constraint(ALLOC_IN_RC(ptr_reg));
 5896   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5897 
 5898   op_cost(10);
 5899   format %{"[$reg + $lreg << $scale]" %}
 5900   interface(MEMORY_INTER) %{
 5901     base($reg);
 5902     index($lreg);
 5903     scale($scale);
 5904     disp(0x0);
 5905   %}
 5906 %}
 5907 
 5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5910 %{
 5911   predicate(CompressedOops::shift() == 0);
 5912   constraint(ALLOC_IN_RC(ptr_reg));
 5913   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5914 
 5915   op_cost(10);
 5916   format %{"[$reg + $off + $lreg << $scale]" %}
 5917   interface(MEMORY_INTER) %{
 5918     base($reg);
 5919     index($lreg);
 5920     scale($scale);
 5921     disp($off);
 5922   %}
 5923 %}
 5924 
 5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5927 %{
 5928   constraint(ALLOC_IN_RC(ptr_reg));
 5929   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5930   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5931 
 5932   op_cost(10);
 5933   format %{"[$reg + $off + $idx]" %}
 5934   interface(MEMORY_INTER) %{
 5935     base($reg);
 5936     index($idx);
 5937     scale(0x0);
 5938     disp($off);
 5939   %}
 5940 %}
 5941 
 5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5944 %{
 5945   constraint(ALLOC_IN_RC(ptr_reg));
 5946   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5947   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5948 
 5949   op_cost(10);
 5950   format %{"[$reg + $off + $idx << $scale]" %}
 5951   interface(MEMORY_INTER) %{
 5952     base($reg);
 5953     index($idx);
 5954     scale($scale);
 5955     disp($off);
 5956   %}
 5957 %}
 5958 
 5959 //----------Special Memory Operands--------------------------------------------
 5960 // Stack Slot Operand - This operand is used for loading and storing temporary
 5961 //                      values on the stack where a match requires a value to
 5962 //                      flow through memory.
 5963 operand stackSlotP(sRegP reg)
 5964 %{
 5965   constraint(ALLOC_IN_RC(stack_slots));
 5966   // No match rule because this operand is only generated in matching
 5967 
 5968   format %{ "[$reg]" %}
 5969   interface(MEMORY_INTER) %{
 5970     base(0x4);   // RSP
 5971     index(0x4);  // No Index
 5972     scale(0x0);  // No Scale
 5973     disp($reg);  // Stack Offset
 5974   %}
 5975 %}
 5976 
 5977 operand stackSlotI(sRegI reg)
 5978 %{
 5979   constraint(ALLOC_IN_RC(stack_slots));
 5980   // No match rule because this operand is only generated in matching
 5981 
 5982   format %{ "[$reg]" %}
 5983   interface(MEMORY_INTER) %{
 5984     base(0x4);   // RSP
 5985     index(0x4);  // No Index
 5986     scale(0x0);  // No Scale
 5987     disp($reg);  // Stack Offset
 5988   %}
 5989 %}
 5990 
 5991 operand stackSlotF(sRegF reg)
 5992 %{
 5993   constraint(ALLOC_IN_RC(stack_slots));
 5994   // No match rule because this operand is only generated in matching
 5995 
 5996   format %{ "[$reg]" %}
 5997   interface(MEMORY_INTER) %{
 5998     base(0x4);   // RSP
 5999     index(0x4);  // No Index
 6000     scale(0x0);  // No Scale
 6001     disp($reg);  // Stack Offset
 6002   %}
 6003 %}
 6004 
 6005 operand stackSlotD(sRegD reg)
 6006 %{
 6007   constraint(ALLOC_IN_RC(stack_slots));
 6008   // No match rule because this operand is only generated in matching
 6009 
 6010   format %{ "[$reg]" %}
 6011   interface(MEMORY_INTER) %{
 6012     base(0x4);   // RSP
 6013     index(0x4);  // No Index
 6014     scale(0x0);  // No Scale
 6015     disp($reg);  // Stack Offset
 6016   %}
 6017 %}
 6018 operand stackSlotL(sRegL reg)
 6019 %{
 6020   constraint(ALLOC_IN_RC(stack_slots));
 6021   // No match rule because this operand is only generated in matching
 6022 
 6023   format %{ "[$reg]" %}
 6024   interface(MEMORY_INTER) %{
 6025     base(0x4);   // RSP
 6026     index(0x4);  // No Index
 6027     scale(0x0);  // No Scale
 6028     disp($reg);  // Stack Offset
 6029   %}
 6030 %}
 6031 
 6032 //----------Conditional Branch Operands----------------------------------------
 6033 // Comparison Op  - This is the operation of the comparison, and is limited to
 6034 //                  the following set of codes:
 6035 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6036 //
 6037 // Other attributes of the comparison, such as unsignedness, are specified
 6038 // by the comparison instruction that sets a condition code flags register.
 6039 // That result is represented by a flags operand whose subtype is appropriate
 6040 // to the unsignedness (etc.) of the comparison.
 6041 //
 6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6045 
 6046 // Comparison Code
 6047 operand cmpOp()
 6048 %{
 6049   match(Bool);
 6050 
 6051   format %{ "" %}
 6052   interface(COND_INTER) %{
 6053     equal(0x4, "e");
 6054     not_equal(0x5, "ne");
 6055     less(0xc, "l");
 6056     greater_equal(0xd, "ge");
 6057     less_equal(0xe, "le");
 6058     greater(0xf, "g");
 6059     overflow(0x0, "o");
 6060     no_overflow(0x1, "no");
 6061   %}
 6062 %}
 6063 
 6064 // Comparison Code, unsigned compare.  Used by FP also, with
 6065 // C2 (unordered) turned into GT or LT already.  The other bits
 6066 // C0 and C3 are turned into Carry & Zero flags.
 6067 operand cmpOpU()
 6068 %{
 6069   match(Bool);
 6070 
 6071   format %{ "" %}
 6072   interface(COND_INTER) %{
 6073     equal(0x4, "e");
 6074     not_equal(0x5, "ne");
 6075     less(0x2, "b");
 6076     greater_equal(0x3, "ae");
 6077     less_equal(0x6, "be");
 6078     greater(0x7, "a");
 6079     overflow(0x0, "o");
 6080     no_overflow(0x1, "no");
 6081   %}
 6082 %}
 6083 
 6084 
 6085 // Floating comparisons that don't require any fixup for the unordered case,
 6086 // If both inputs of the comparison are the same, ZF is always set so we
 6087 // don't need to use cmpOpUCF2 for eq/ne
 6088 operand cmpOpUCF() %{
 6089   match(Bool);
 6090   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6091             (n->as_Bool()->_test._test == BoolTest::lt ||
 6092              n->as_Bool()->_test._test == BoolTest::ge ||
 6093              n->as_Bool()->_test._test == BoolTest::le ||
 6094              n->as_Bool()->_test._test == BoolTest::gt ||
 6095              n->in(1)->in(1) == n->in(1)->in(2)));
 6096   format %{ "" %}
 6097   interface(COND_INTER) %{
 6098     equal(0xb, "np");
 6099     not_equal(0xa, "p");
 6100     less(0x2, "b");
 6101     greater_equal(0x3, "ae");
 6102     less_equal(0x6, "be");
 6103     greater(0x7, "a");
 6104     overflow(0x0, "o");
 6105     no_overflow(0x1, "no");
 6106   %}
 6107 %}
 6108 
 6109 
 6110 // Floating comparisons that can be fixed up with extra conditional jumps
 6111 operand cmpOpUCF2() %{
 6112   match(Bool);
 6113   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6114             (n->as_Bool()->_test._test == BoolTest::ne ||
 6115              n->as_Bool()->_test._test == BoolTest::eq) &&
 6116             n->in(1)->in(1) != n->in(1)->in(2));
 6117   format %{ "" %}
 6118   interface(COND_INTER) %{
 6119     equal(0x4, "e");
 6120     not_equal(0x5, "ne");
 6121     less(0x2, "b");
 6122     greater_equal(0x3, "ae");
 6123     less_equal(0x6, "be");
 6124     greater(0x7, "a");
 6125     overflow(0x0, "o");
 6126     no_overflow(0x1, "no");
 6127   %}
 6128 %}
 6129 
 6130 
 6131 // Floating point comparisons that set condition flags to test more directly,
 6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6134 // latter conditions to ones that use unsigned tests before passing into an
 6135 // instruction because the preceding comparison might be based on a three way
 6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6137 operand cmpOpUCFE()
 6138 %{
 6139   match(Bool);
 6140   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6141             (n->as_Bool()->_test._test == BoolTest::ne ||
 6142              n->as_Bool()->_test._test == BoolTest::eq ||
 6143              n->as_Bool()->_test._test == BoolTest::lt ||
 6144              n->as_Bool()->_test._test == BoolTest::ge ||
 6145              n->as_Bool()->_test._test == BoolTest::le ||
 6146              n->as_Bool()->_test._test == BoolTest::gt));
 6147 
 6148   format %{ "" %}
 6149   interface(COND_INTER) %{
 6150     equal(0x4, "e");
 6151     not_equal(0x5, "ne");
 6152     less(0x2, "b");
 6153     greater_equal(0x3, "ae");
 6154     less_equal(0x6, "be");
 6155     greater(0x7, "a");
 6156     overflow(0x0, "o");
 6157     no_overflow(0x1, "no");
 6158   %}
 6159 %}
 6160 
 6161 // Operands for bound floating pointer register arguments
 6162 operand rxmm0() %{
 6163   constraint(ALLOC_IN_RC(xmm0_reg));
 6164   match(VecX);
 6165   format%{%}
 6166   interface(REG_INTER);
 6167 %}
 6168 
 6169 // Vectors
 6170 
 6171 // Dummy generic vector class. Should be used for all vector operands.
 6172 // Replaced with vec[SDXYZ] during post-selection pass.
 6173 operand vec() %{
 6174   constraint(ALLOC_IN_RC(dynamic));
 6175   match(VecX);
 6176   match(VecY);
 6177   match(VecZ);
 6178   match(VecS);
 6179   match(VecD);
 6180 
 6181   format %{ %}
 6182   interface(REG_INTER);
 6183 %}
 6184 
 6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6188 // runtime code generation via reg_class_dynamic.
 6189 operand legVec() %{
 6190   constraint(ALLOC_IN_RC(dynamic));
 6191   match(VecX);
 6192   match(VecY);
 6193   match(VecZ);
 6194   match(VecS);
 6195   match(VecD);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces vec during post-selection cleanup. See above.
 6202 operand vecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces legVec during post-selection cleanup. See above.
 6211 operand legVecS() %{
 6212   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6213   match(VecS);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces vec during post-selection cleanup. See above.
 6220 operand vecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces legVec during post-selection cleanup. See above.
 6229 operand legVecD() %{
 6230   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6231   match(VecD);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces vec during post-selection cleanup. See above.
 6238 operand vecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces legVec during post-selection cleanup. See above.
 6247 operand legVecX() %{
 6248   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6249   match(VecX);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces vec during post-selection cleanup. See above.
 6256 operand vecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces legVec during post-selection cleanup. See above.
 6265 operand legVecY() %{
 6266   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6267   match(VecY);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces vec during post-selection cleanup. See above.
 6274 operand vecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 
 6321 // Generic P2/P3 pipeline
 6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6323 // 3 instructions decoded per cycle.
 6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6325 // 3 ALU op, only ALU0 handles mul instructions.
 6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6327            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6328            BR, FPU,
 6329            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6330 
 6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6332 // Pipeline Description specifies the stages in the machine's pipeline
 6333 
 6334 // Generic P2/P3 pipeline
 6335 pipe_desc(S0, S1, S2, S3, S4, S5);
 6336 
 6337 //----------PIPELINE CLASSES---------------------------------------------------
 6338 // Pipeline Classes describe the stages in which input and output are
 6339 // referenced by the hardware pipeline.
 6340 
 6341 // Naming convention: ialu or fpu
 6342 // Then: _reg
 6343 // Then: _reg if there is a 2nd register
 6344 // Then: _long if it's a pair of instructions implementing a long
 6345 // Then: _fat if it requires the big decoder
 6346 //   Or: _mem if it requires the big decoder and a memory unit.
 6347 
 6348 // Integer ALU reg operation
 6349 pipe_class ialu_reg(rRegI dst)
 6350 %{
 6351     single_instruction;
 6352     dst    : S4(write);
 6353     dst    : S3(read);
 6354     DECODE : S0;        // any decoder
 6355     ALU    : S3;        // any alu
 6356 %}
 6357 
 6358 // Long ALU reg operation
 6359 pipe_class ialu_reg_long(rRegL dst)
 6360 %{
 6361     instruction_count(2);
 6362     dst    : S4(write);
 6363     dst    : S3(read);
 6364     DECODE : S0(2);     // any 2 decoders
 6365     ALU    : S3(2);     // both alus
 6366 %}
 6367 
 6368 // Integer ALU reg operation using big decoder
 6369 pipe_class ialu_reg_fat(rRegI dst)
 6370 %{
 6371     single_instruction;
 6372     dst    : S4(write);
 6373     dst    : S3(read);
 6374     D0     : S0;        // big decoder only
 6375     ALU    : S3;        // any alu
 6376 %}
 6377 
 6378 // Integer ALU reg-reg operation
 6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6380 %{
 6381     single_instruction;
 6382     dst    : S4(write);
 6383     src    : S3(read);
 6384     DECODE : S0;        // any decoder
 6385     ALU    : S3;        // any alu
 6386 %}
 6387 
 6388 // Integer ALU reg-reg operation
 6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6390 %{
 6391     single_instruction;
 6392     dst    : S4(write);
 6393     src    : S3(read);
 6394     D0     : S0;        // big decoder only
 6395     ALU    : S3;        // any alu
 6396 %}
 6397 
 6398 // Integer ALU reg-mem operation
 6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6400 %{
 6401     single_instruction;
 6402     dst    : S5(write);
 6403     mem    : S3(read);
 6404     D0     : S0;        // big decoder only
 6405     ALU    : S4;        // any alu
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer mem operation (prefetch)
 6410 pipe_class ialu_mem(memory mem)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     D0     : S0;        // big decoder only
 6415     MEM    : S3;        // any mem
 6416 %}
 6417 
 6418 // Integer Store to Memory
 6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6420 %{
 6421     single_instruction;
 6422     mem    : S3(read);
 6423     src    : S5(read);
 6424     D0     : S0;        // big decoder only
 6425     ALU    : S4;        // any alu
 6426     MEM    : S3;
 6427 %}
 6428 
 6429 // // Long Store to Memory
 6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6431 // %{
 6432 //     instruction_count(2);
 6433 //     mem    : S3(read);
 6434 //     src    : S5(read);
 6435 //     D0     : S0(2);          // big decoder only; twice
 6436 //     ALU    : S4(2);     // any 2 alus
 6437 //     MEM    : S3(2);  // Both mems
 6438 // %}
 6439 
 6440 // Integer Store to Memory
 6441 pipe_class ialu_mem_imm(memory mem)
 6442 %{
 6443     single_instruction;
 6444     mem    : S3(read);
 6445     D0     : S0;        // big decoder only
 6446     ALU    : S4;        // any alu
 6447     MEM    : S3;
 6448 %}
 6449 
 6450 // Integer ALU0 reg-reg operation
 6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6452 %{
 6453     single_instruction;
 6454     dst    : S4(write);
 6455     src    : S3(read);
 6456     D0     : S0;        // Big decoder only
 6457     ALU0   : S3;        // only alu0
 6458 %}
 6459 
 6460 // Integer ALU0 reg-mem operation
 6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6462 %{
 6463     single_instruction;
 6464     dst    : S5(write);
 6465     mem    : S3(read);
 6466     D0     : S0;        // big decoder only
 6467     ALU0   : S4;        // ALU0 only
 6468     MEM    : S3;        // any mem
 6469 %}
 6470 
 6471 // Integer ALU reg-reg operation
 6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6473 %{
 6474     single_instruction;
 6475     cr     : S4(write);
 6476     src1   : S3(read);
 6477     src2   : S3(read);
 6478     DECODE : S0;        // any decoder
 6479     ALU    : S3;        // any alu
 6480 %}
 6481 
 6482 // Integer ALU reg-imm operation
 6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6484 %{
 6485     single_instruction;
 6486     cr     : S4(write);
 6487     src1   : S3(read);
 6488     DECODE : S0;        // any decoder
 6489     ALU    : S3;        // any alu
 6490 %}
 6491 
 6492 // Integer ALU reg-mem operation
 6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6494 %{
 6495     single_instruction;
 6496     cr     : S4(write);
 6497     src1   : S3(read);
 6498     src2   : S3(read);
 6499     D0     : S0;        // big decoder only
 6500     ALU    : S4;        // any alu
 6501     MEM    : S3;
 6502 %}
 6503 
 6504 // Conditional move reg-reg
 6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6506 %{
 6507     instruction_count(4);
 6508     y      : S4(read);
 6509     q      : S3(read);
 6510     p      : S3(read);
 6511     DECODE : S0(4);     // any decoder
 6512 %}
 6513 
 6514 // Conditional move reg-reg
 6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6516 %{
 6517     single_instruction;
 6518     dst    : S4(write);
 6519     src    : S3(read);
 6520     cr     : S3(read);
 6521     DECODE : S0;        // any decoder
 6522 %}
 6523 
 6524 // Conditional move reg-mem
 6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6526 %{
 6527     single_instruction;
 6528     dst    : S4(write);
 6529     src    : S3(read);
 6530     cr     : S3(read);
 6531     DECODE : S0;        // any decoder
 6532     MEM    : S3;
 6533 %}
 6534 
 6535 // Conditional move reg-reg long
 6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6537 %{
 6538     single_instruction;
 6539     dst    : S4(write);
 6540     src    : S3(read);
 6541     cr     : S3(read);
 6542     DECODE : S0(2);     // any 2 decoders
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg(regD dst)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S3(read);
 6550     DECODE : S0(2);     // any 2 decoders
 6551     FPU    : S3;
 6552 %}
 6553 
 6554 // Float reg-reg operation
 6555 pipe_class fpu_reg_reg(regD dst, regD src)
 6556 %{
 6557     instruction_count(2);
 6558     dst    : S4(write);
 6559     src    : S3(read);
 6560     DECODE : S0(2);     // any 2 decoders
 6561     FPU    : S3;
 6562 %}
 6563 
 6564 // Float reg-reg operation
 6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6566 %{
 6567     instruction_count(3);
 6568     dst    : S4(write);
 6569     src1   : S3(read);
 6570     src2   : S3(read);
 6571     DECODE : S0(3);     // any 3 decoders
 6572     FPU    : S3(2);
 6573 %}
 6574 
 6575 // Float reg-reg operation
 6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6577 %{
 6578     instruction_count(4);
 6579     dst    : S4(write);
 6580     src1   : S3(read);
 6581     src2   : S3(read);
 6582     src3   : S3(read);
 6583     DECODE : S0(4);     // any 3 decoders
 6584     FPU    : S3(2);
 6585 %}
 6586 
 6587 // Float reg-reg operation
 6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6589 %{
 6590     instruction_count(4);
 6591     dst    : S4(write);
 6592     src1   : S3(read);
 6593     src2   : S3(read);
 6594     src3   : S3(read);
 6595     DECODE : S1(3);     // any 3 decoders
 6596     D0     : S0;        // Big decoder only
 6597     FPU    : S3(2);
 6598     MEM    : S3;
 6599 %}
 6600 
 6601 // Float reg-mem operation
 6602 pipe_class fpu_reg_mem(regD dst, memory mem)
 6603 %{
 6604     instruction_count(2);
 6605     dst    : S5(write);
 6606     mem    : S3(read);
 6607     D0     : S0;        // big decoder only
 6608     DECODE : S1;        // any decoder for FPU POP
 6609     FPU    : S4;
 6610     MEM    : S3;        // any mem
 6611 %}
 6612 
 6613 // Float reg-mem operation
 6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6615 %{
 6616     instruction_count(3);
 6617     dst    : S5(write);
 6618     src1   : S3(read);
 6619     mem    : S3(read);
 6620     D0     : S0;        // big decoder only
 6621     DECODE : S1(2);     // any decoder for FPU POP
 6622     FPU    : S4;
 6623     MEM    : S3;        // any mem
 6624 %}
 6625 
 6626 // Float mem-reg operation
 6627 pipe_class fpu_mem_reg(memory mem, regD src)
 6628 %{
 6629     instruction_count(2);
 6630     src    : S5(read);
 6631     mem    : S3(read);
 6632     DECODE : S0;        // any decoder for FPU PUSH
 6633     D0     : S1;        // big decoder only
 6634     FPU    : S4;
 6635     MEM    : S3;        // any mem
 6636 %}
 6637 
 6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6639 %{
 6640     instruction_count(3);
 6641     src1   : S3(read);
 6642     src2   : S3(read);
 6643     mem    : S3(read);
 6644     DECODE : S0(2);     // any decoder for FPU PUSH
 6645     D0     : S1;        // big decoder only
 6646     FPU    : S4;
 6647     MEM    : S3;        // any mem
 6648 %}
 6649 
 6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6651 %{
 6652     instruction_count(3);
 6653     src1   : S3(read);
 6654     src2   : S3(read);
 6655     mem    : S4(read);
 6656     DECODE : S0;        // any decoder for FPU PUSH
 6657     D0     : S0(2);     // big decoder only
 6658     FPU    : S4;
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem(memory dst, memory src1)
 6663 %{
 6664     instruction_count(2);
 6665     src1   : S3(read);
 6666     dst    : S4(read);
 6667     D0     : S0(2);     // big decoder only
 6668     MEM    : S3(2);     // any mem
 6669 %}
 6670 
 6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6672 %{
 6673     instruction_count(3);
 6674     src1   : S3(read);
 6675     src2   : S3(read);
 6676     dst    : S4(read);
 6677     D0     : S0(3);     // big decoder only
 6678     FPU    : S4;
 6679     MEM    : S3(3);     // any mem
 6680 %}
 6681 
 6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6683 %{
 6684     instruction_count(3);
 6685     src1   : S4(read);
 6686     mem    : S4(read);
 6687     DECODE : S0;        // any decoder for FPU PUSH
 6688     D0     : S0(2);     // big decoder only
 6689     FPU    : S4;
 6690     MEM    : S3(2);     // any mem
 6691 %}
 6692 
 6693 // Float load constant
 6694 pipe_class fpu_reg_con(regD dst)
 6695 %{
 6696     instruction_count(2);
 6697     dst    : S5(write);
 6698     D0     : S0;        // big decoder only for the load
 6699     DECODE : S1;        // any decoder for FPU POP
 6700     FPU    : S4;
 6701     MEM    : S3;        // any mem
 6702 %}
 6703 
 6704 // Float load constant
 6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6706 %{
 6707     instruction_count(3);
 6708     dst    : S5(write);
 6709     src    : S3(read);
 6710     D0     : S0;        // big decoder only for the load
 6711     DECODE : S1(2);     // any decoder for FPU POP
 6712     FPU    : S4;
 6713     MEM    : S3;        // any mem
 6714 %}
 6715 
 6716 // UnConditional branch
 6717 pipe_class pipe_jmp(label labl)
 6718 %{
 6719     single_instruction;
 6720     BR   : S3;
 6721 %}
 6722 
 6723 // Conditional branch
 6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6725 %{
 6726     single_instruction;
 6727     cr    : S1(read);
 6728     BR    : S3;
 6729 %}
 6730 
 6731 // Allocation idiom
 6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6733 %{
 6734     instruction_count(1); force_serialization;
 6735     fixed_latency(6);
 6736     heap_ptr : S3(read);
 6737     DECODE   : S0(3);
 6738     D0       : S2;
 6739     MEM      : S3;
 6740     ALU      : S3(2);
 6741     dst      : S5(write);
 6742     BR       : S5;
 6743 %}
 6744 
 6745 // Generic big/slow expanded idiom
 6746 pipe_class pipe_slow()
 6747 %{
 6748     instruction_count(10); multiple_bundles; force_serialization;
 6749     fixed_latency(100);
 6750     D0  : S0(2);
 6751     MEM : S3(2);
 6752 %}
 6753 
 6754 // The real do-nothing guy
 6755 pipe_class empty()
 6756 %{
 6757     instruction_count(0);
 6758 %}
 6759 
 6760 // Define the class for the Nop node
 6761 define
 6762 %{
 6763    MachNop = empty;
 6764 %}
 6765 
 6766 %}
 6767 
 6768 //----------INSTRUCTIONS-------------------------------------------------------
 6769 //
 6770 // match      -- States which machine-independent subtree may be replaced
 6771 //               by this instruction.
 6772 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6773 //               selection to identify a minimum cost tree of machine
 6774 //               instructions that matches a tree of machine-independent
 6775 //               instructions.
 6776 // format     -- A string providing the disassembly for this instruction.
 6777 //               The value of an instruction's operand may be inserted
 6778 //               by referring to it with a '$' prefix.
 6779 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6780 //               to within an encode class as $primary, $secondary, and $tertiary
 6781 //               rrspectively.  The primary opcode is commonly used to
 6782 //               indicate the type of machine instruction, while secondary
 6783 //               and tertiary are often used for prefix options or addressing
 6784 //               modes.
 6785 // ins_encode -- A list of encode classes with parameters. The encode class
 6786 //               name must have been defined in an 'enc_class' specification
 6787 //               in the encode section of the architecture description.
 6788 
 6789 // ============================================================================
 6790 
 6791 instruct ShouldNotReachHere() %{
 6792   match(Halt);
 6793   format %{ "stop\t# ShouldNotReachHere" %}
 6794   ins_encode %{
 6795     if (is_reachable()) {
 6796       const char* str = __ code_string(_halt_reason);
 6797       __ stop(str);
 6798     }
 6799   %}
 6800   ins_pipe(pipe_slow);
 6801 %}
 6802 
 6803 // ============================================================================
 6804 
 6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6806 // Load Float
 6807 instruct MoveF2VL(vlRegF dst, regF src) %{
 6808   match(Set dst src);
 6809   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6810   ins_encode %{
 6811     ShouldNotReachHere();
 6812   %}
 6813   ins_pipe( fpu_reg_reg );
 6814 %}
 6815 
 6816 // Load Float
 6817 instruct MoveF2LEG(legRegF dst, regF src) %{
 6818   match(Set dst src);
 6819   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6820   ins_encode %{
 6821     ShouldNotReachHere();
 6822   %}
 6823   ins_pipe( fpu_reg_reg );
 6824 %}
 6825 
 6826 // Load Float
 6827 instruct MoveVL2F(regF dst, vlRegF src) %{
 6828   match(Set dst src);
 6829   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6830   ins_encode %{
 6831     ShouldNotReachHere();
 6832   %}
 6833   ins_pipe( fpu_reg_reg );
 6834 %}
 6835 
 6836 // Load Float
 6837 instruct MoveLEG2F(regF dst, legRegF src) %{
 6838   match(Set dst src);
 6839   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6840   ins_encode %{
 6841     ShouldNotReachHere();
 6842   %}
 6843   ins_pipe( fpu_reg_reg );
 6844 %}
 6845 
 6846 // Load Double
 6847 instruct MoveD2VL(vlRegD dst, regD src) %{
 6848   match(Set dst src);
 6849   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6850   ins_encode %{
 6851     ShouldNotReachHere();
 6852   %}
 6853   ins_pipe( fpu_reg_reg );
 6854 %}
 6855 
 6856 // Load Double
 6857 instruct MoveD2LEG(legRegD dst, regD src) %{
 6858   match(Set dst src);
 6859   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6860   ins_encode %{
 6861     ShouldNotReachHere();
 6862   %}
 6863   ins_pipe( fpu_reg_reg );
 6864 %}
 6865 
 6866 // Load Double
 6867 instruct MoveVL2D(regD dst, vlRegD src) %{
 6868   match(Set dst src);
 6869   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6870   ins_encode %{
 6871     ShouldNotReachHere();
 6872   %}
 6873   ins_pipe( fpu_reg_reg );
 6874 %}
 6875 
 6876 // Load Double
 6877 instruct MoveLEG2D(regD dst, legRegD src) %{
 6878   match(Set dst src);
 6879   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6880   ins_encode %{
 6881     ShouldNotReachHere();
 6882   %}
 6883   ins_pipe( fpu_reg_reg );
 6884 %}
 6885 
 6886 //----------Load/Store/Move Instructions---------------------------------------
 6887 //----------Load Instructions--------------------------------------------------
 6888 
 6889 // Load Byte (8 bit signed)
 6890 instruct loadB(rRegI dst, memory mem)
 6891 %{
 6892   match(Set dst (LoadB mem));
 6893 
 6894   ins_cost(125);
 6895   format %{ "movsbl  $dst, $mem\t# byte" %}
 6896 
 6897   ins_encode %{
 6898     __ movsbl($dst$$Register, $mem$$Address);
 6899   %}
 6900 
 6901   ins_pipe(ialu_reg_mem);
 6902 %}
 6903 
 6904 // Load Byte (8 bit signed) into Long Register
 6905 instruct loadB2L(rRegL dst, memory mem)
 6906 %{
 6907   match(Set dst (ConvI2L (LoadB mem)));
 6908 
 6909   ins_cost(125);
 6910   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6911 
 6912   ins_encode %{
 6913     __ movsbq($dst$$Register, $mem$$Address);
 6914   %}
 6915 
 6916   ins_pipe(ialu_reg_mem);
 6917 %}
 6918 
 6919 // Load Unsigned Byte (8 bit UNsigned)
 6920 instruct loadUB(rRegI dst, memory mem)
 6921 %{
 6922   match(Set dst (LoadUB mem));
 6923 
 6924   ins_cost(125);
 6925   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6926 
 6927   ins_encode %{
 6928     __ movzbl($dst$$Register, $mem$$Address);
 6929   %}
 6930 
 6931   ins_pipe(ialu_reg_mem);
 6932 %}
 6933 
 6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6935 instruct loadUB2L(rRegL dst, memory mem)
 6936 %{
 6937   match(Set dst (ConvI2L (LoadUB mem)));
 6938 
 6939   ins_cost(125);
 6940   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6941 
 6942   ins_encode %{
 6943     __ movzbq($dst$$Register, $mem$$Address);
 6944   %}
 6945 
 6946   ins_pipe(ialu_reg_mem);
 6947 %}
 6948 
 6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6951   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6952   effect(KILL cr);
 6953 
 6954   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6955             "andl    $dst, right_n_bits($mask, 8)" %}
 6956   ins_encode %{
 6957     Register Rdst = $dst$$Register;
 6958     __ movzbq(Rdst, $mem$$Address);
 6959     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6960   %}
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Short (16 bit signed)
 6965 instruct loadS(rRegI dst, memory mem)
 6966 %{
 6967   match(Set dst (LoadS mem));
 6968 
 6969   ins_cost(125);
 6970   format %{ "movswl $dst, $mem\t# short" %}
 6971 
 6972   ins_encode %{
 6973     __ movswl($dst$$Register, $mem$$Address);
 6974   %}
 6975 
 6976   ins_pipe(ialu_reg_mem);
 6977 %}
 6978 
 6979 // Load Short (16 bit signed) to Byte (8 bit signed)
 6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6981   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6982 
 6983   ins_cost(125);
 6984   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6985   ins_encode %{
 6986     __ movsbl($dst$$Register, $mem$$Address);
 6987   %}
 6988   ins_pipe(ialu_reg_mem);
 6989 %}
 6990 
 6991 // Load Short (16 bit signed) into Long Register
 6992 instruct loadS2L(rRegL dst, memory mem)
 6993 %{
 6994   match(Set dst (ConvI2L (LoadS mem)));
 6995 
 6996   ins_cost(125);
 6997   format %{ "movswq $dst, $mem\t# short -> long" %}
 6998 
 6999   ins_encode %{
 7000     __ movswq($dst$$Register, $mem$$Address);
 7001   %}
 7002 
 7003   ins_pipe(ialu_reg_mem);
 7004 %}
 7005 
 7006 // Load Unsigned Short/Char (16 bit UNsigned)
 7007 instruct loadUS(rRegI dst, memory mem)
 7008 %{
 7009   match(Set dst (LoadUS mem));
 7010 
 7011   ins_cost(125);
 7012   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7013 
 7014   ins_encode %{
 7015     __ movzwl($dst$$Register, $mem$$Address);
 7016   %}
 7017 
 7018   ins_pipe(ialu_reg_mem);
 7019 %}
 7020 
 7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7023   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7024 
 7025   ins_cost(125);
 7026   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7027   ins_encode %{
 7028     __ movsbl($dst$$Register, $mem$$Address);
 7029   %}
 7030   ins_pipe(ialu_reg_mem);
 7031 %}
 7032 
 7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7034 instruct loadUS2L(rRegL dst, memory mem)
 7035 %{
 7036   match(Set dst (ConvI2L (LoadUS mem)));
 7037 
 7038   ins_cost(125);
 7039   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7040 
 7041   ins_encode %{
 7042     __ movzwq($dst$$Register, $mem$$Address);
 7043   %}
 7044 
 7045   ins_pipe(ialu_reg_mem);
 7046 %}
 7047 
 7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7050   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7051 
 7052   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7053   ins_encode %{
 7054     __ movzbq($dst$$Register, $mem$$Address);
 7055   %}
 7056   ins_pipe(ialu_reg_mem);
 7057 %}
 7058 
 7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7061   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7062   effect(KILL cr);
 7063 
 7064   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7065             "andl    $dst, right_n_bits($mask, 16)" %}
 7066   ins_encode %{
 7067     Register Rdst = $dst$$Register;
 7068     __ movzwq(Rdst, $mem$$Address);
 7069     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7070   %}
 7071   ins_pipe(ialu_reg_mem);
 7072 %}
 7073 
 7074 // Load Integer
 7075 instruct loadI(rRegI dst, memory mem)
 7076 %{
 7077   match(Set dst (LoadI mem));
 7078 
 7079   ins_cost(125);
 7080   format %{ "movl    $dst, $mem\t# int" %}
 7081 
 7082   ins_encode %{
 7083     __ movl($dst$$Register, $mem$$Address);
 7084   %}
 7085 
 7086   ins_pipe(ialu_reg_mem);
 7087 %}
 7088 
 7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7091   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7092 
 7093   ins_cost(125);
 7094   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7095   ins_encode %{
 7096     __ movsbl($dst$$Register, $mem$$Address);
 7097   %}
 7098   ins_pipe(ialu_reg_mem);
 7099 %}
 7100 
 7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7103   match(Set dst (AndI (LoadI mem) mask));
 7104 
 7105   ins_cost(125);
 7106   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7107   ins_encode %{
 7108     __ movzbl($dst$$Register, $mem$$Address);
 7109   %}
 7110   ins_pipe(ialu_reg_mem);
 7111 %}
 7112 
 7113 // Load Integer (32 bit signed) to Short (16 bit signed)
 7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7115   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7116 
 7117   ins_cost(125);
 7118   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7119   ins_encode %{
 7120     __ movswl($dst$$Register, $mem$$Address);
 7121   %}
 7122   ins_pipe(ialu_reg_mem);
 7123 %}
 7124 
 7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7127   match(Set dst (AndI (LoadI mem) mask));
 7128 
 7129   ins_cost(125);
 7130   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7131   ins_encode %{
 7132     __ movzwl($dst$$Register, $mem$$Address);
 7133   %}
 7134   ins_pipe(ialu_reg_mem);
 7135 %}
 7136 
 7137 // Load Integer into Long Register
 7138 instruct loadI2L(rRegL dst, memory mem)
 7139 %{
 7140   match(Set dst (ConvI2L (LoadI mem)));
 7141 
 7142   ins_cost(125);
 7143   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7144 
 7145   ins_encode %{
 7146     __ movslq($dst$$Register, $mem$$Address);
 7147   %}
 7148 
 7149   ins_pipe(ialu_reg_mem);
 7150 %}
 7151 
 7152 // Load Integer with mask 0xFF into Long Register
 7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7154   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7155 
 7156   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7157   ins_encode %{
 7158     __ movzbq($dst$$Register, $mem$$Address);
 7159   %}
 7160   ins_pipe(ialu_reg_mem);
 7161 %}
 7162 
 7163 // Load Integer with mask 0xFFFF into Long Register
 7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7165   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7166 
 7167   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7168   ins_encode %{
 7169     __ movzwq($dst$$Register, $mem$$Address);
 7170   %}
 7171   ins_pipe(ialu_reg_mem);
 7172 %}
 7173 
 7174 // Load Integer with a 31-bit mask into Long Register
 7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7176   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7177   effect(KILL cr);
 7178 
 7179   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7180             "andl    $dst, $mask" %}
 7181   ins_encode %{
 7182     Register Rdst = $dst$$Register;
 7183     __ movl(Rdst, $mem$$Address);
 7184     __ andl(Rdst, $mask$$constant);
 7185   %}
 7186   ins_pipe(ialu_reg_mem);
 7187 %}
 7188 
 7189 // Load Unsigned Integer into Long Register
 7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7191 %{
 7192   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7193 
 7194   ins_cost(125);
 7195   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7196 
 7197   ins_encode %{
 7198     __ movl($dst$$Register, $mem$$Address);
 7199   %}
 7200 
 7201   ins_pipe(ialu_reg_mem);
 7202 %}
 7203 
 7204 // Load Long
 7205 instruct loadL(rRegL dst, memory mem)
 7206 %{
 7207   match(Set dst (LoadL mem));
 7208 
 7209   ins_cost(125);
 7210   format %{ "movq    $dst, $mem\t# long" %}
 7211 
 7212   ins_encode %{
 7213     __ movq($dst$$Register, $mem$$Address);
 7214   %}
 7215 
 7216   ins_pipe(ialu_reg_mem); // XXX
 7217 %}
 7218 
 7219 // Load Range
 7220 instruct loadRange(rRegI dst, memory mem)
 7221 %{
 7222   match(Set dst (LoadRange mem));
 7223 
 7224   ins_cost(125); // XXX
 7225   format %{ "movl    $dst, $mem\t# range" %}
 7226   ins_encode %{
 7227     __ movl($dst$$Register, $mem$$Address);
 7228   %}
 7229   ins_pipe(ialu_reg_mem);
 7230 %}
 7231 
 7232 // Load Pointer
 7233 instruct loadP(rRegP dst, memory mem)
 7234 %{
 7235   match(Set dst (LoadP mem));
 7236   predicate(n->as_Load()->barrier_data() == 0);
 7237 
 7238   ins_cost(125); // XXX
 7239   format %{ "movq    $dst, $mem\t# ptr" %}
 7240   ins_encode %{
 7241     __ movq($dst$$Register, $mem$$Address);
 7242   %}
 7243   ins_pipe(ialu_reg_mem); // XXX
 7244 %}
 7245 
 7246 // Load Compressed Pointer
 7247 instruct loadN(rRegN dst, memory mem)
 7248 %{
 7249    predicate(n->as_Load()->barrier_data() == 0);
 7250    match(Set dst (LoadN mem));
 7251 
 7252    ins_cost(125); // XXX
 7253    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7254    ins_encode %{
 7255      __ movl($dst$$Register, $mem$$Address);
 7256    %}
 7257    ins_pipe(ialu_reg_mem); // XXX
 7258 %}
 7259 
 7260 
 7261 // Load Klass Pointer
 7262 instruct loadKlass(rRegP dst, memory mem)
 7263 %{
 7264   match(Set dst (LoadKlass mem));
 7265 
 7266   ins_cost(125); // XXX
 7267   format %{ "movq    $dst, $mem\t# class" %}
 7268   ins_encode %{
 7269     __ movq($dst$$Register, $mem$$Address);
 7270   %}
 7271   ins_pipe(ialu_reg_mem); // XXX
 7272 %}
 7273 
 7274 // Load narrow Klass Pointer
 7275 instruct loadNKlass(rRegN dst, memory mem)
 7276 %{
 7277   predicate(!UseCompactObjectHeaders);
 7278   match(Set dst (LoadNKlass mem));
 7279 
 7280   ins_cost(125); // XXX
 7281   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7282   ins_encode %{
 7283     __ movl($dst$$Register, $mem$$Address);
 7284   %}
 7285   ins_pipe(ialu_reg_mem); // XXX
 7286 %}
 7287 
 7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7289 %{
 7290   predicate(UseCompactObjectHeaders);
 7291   match(Set dst (LoadNKlass mem));
 7292   effect(KILL cr);
 7293   ins_cost(125);
 7294   format %{
 7295     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7296     "shrl    $dst, markWord::klass_shift_at_offset"
 7297   %}
 7298   ins_encode %{
 7299     if (UseAPX) {
 7300       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7301     }
 7302     else {
 7303       __ movl($dst$$Register, $mem$$Address);
 7304       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7305     }
 7306   %}
 7307   ins_pipe(ialu_reg_mem);
 7308 %}
 7309 
 7310 // Load Float
 7311 instruct loadF(regF dst, memory mem)
 7312 %{
 7313   match(Set dst (LoadF mem));
 7314 
 7315   ins_cost(145); // XXX
 7316   format %{ "movss   $dst, $mem\t# float" %}
 7317   ins_encode %{
 7318     __ movflt($dst$$XMMRegister, $mem$$Address);
 7319   %}
 7320   ins_pipe(pipe_slow); // XXX
 7321 %}
 7322 
 7323 // Load Double
 7324 instruct loadD_partial(regD dst, memory mem)
 7325 %{
 7326   predicate(!UseXmmLoadAndClearUpper);
 7327   match(Set dst (LoadD mem));
 7328 
 7329   ins_cost(145); // XXX
 7330   format %{ "movlpd  $dst, $mem\t# double" %}
 7331   ins_encode %{
 7332     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7333   %}
 7334   ins_pipe(pipe_slow); // XXX
 7335 %}
 7336 
 7337 instruct loadD(regD dst, memory mem)
 7338 %{
 7339   predicate(UseXmmLoadAndClearUpper);
 7340   match(Set dst (LoadD mem));
 7341 
 7342   ins_cost(145); // XXX
 7343   format %{ "movsd   $dst, $mem\t# double" %}
 7344   ins_encode %{
 7345     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7346   %}
 7347   ins_pipe(pipe_slow); // XXX
 7348 %}
 7349 
 7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7351 %{
 7352   match(Set dst con);
 7353 
 7354   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7355 
 7356   ins_encode %{
 7357     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7358   %}
 7359 
 7360   ins_pipe(ialu_reg_fat);
 7361 %}
 7362 
 7363 // min = java.lang.Math.min(float a, float b)
 7364 // max = java.lang.Math.max(float a, float b)
 7365 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7366 %{
 7367   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7368   match(Set dst (MaxF a b));
 7369   match(Set dst (MinF a b));
 7370 
 7371   format %{ "minmaxF $dst, $a, $b" %}
 7372   ins_encode %{
 7373     int opcode = this->ideal_Opcode();
 7374     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7375   %}
 7376   ins_pipe( pipe_slow );
 7377 %}
 7378 
 7379 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
 7380 %{
 7381   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7382   match(Set dst (MaxF a b));
 7383   match(Set dst (MinF a b));
 7384   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7385 
 7386   format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7387   ins_encode %{
 7388     int opcode = this->ideal_Opcode();
 7389     bool min = (opcode == Op_MinF) ? true : false;
 7390     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7391                     min, fp_prec_flt /*pt*/);
 7392   %}
 7393   ins_pipe( pipe_slow );
 7394 %}
 7395 
 7396 // min = java.lang.Math.min(float a, float b)
 7397 // max = java.lang.Math.max(float a, float b)
 7398 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7399 %{
 7400   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7401   match(Set dst (MaxF a b));
 7402   match(Set dst (MinF a b));
 7403   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7404 
 7405   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7406   ins_encode %{
 7407     int opcode = this->ideal_Opcode();
 7408     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7409     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7410                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7411   %}
 7412   ins_pipe( pipe_slow );
 7413 %}
 7414 
 7415 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
 7416 %{
 7417   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7418   match(Set dst (MaxF a b));
 7419   match(Set dst (MinF a b));
 7420   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7421 
 7422   format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7423   ins_encode %{
 7424     int opcode = this->ideal_Opcode();
 7425     bool min = (opcode == Op_MinF) ? true : false;
 7426     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7427                     min, fp_prec_flt /*pt*/);
 7428   %}
 7429   ins_pipe( pipe_slow );
 7430 %}
 7431 
 7432 // min = java.lang.Math.min(double a, double b)
 7433 // max = java.lang.Math.max(double a, double b)
 7434 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7435 %{
 7436   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7437   match(Set dst (MaxD a b));
 7438   match(Set dst (MinD a b));
 7439 
 7440   format %{ "minmaxD $dst, $a, $b" %}
 7441   ins_encode %{
 7442     int opcode = this->ideal_Opcode();
 7443     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7444   %}
 7445   ins_pipe( pipe_slow );
 7446 %}
 7447 
 7448 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
 7449 %{
 7450   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7451   match(Set dst (MaxD a b));
 7452   match(Set dst (MinD a b));
 7453   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7454 
 7455   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7456   ins_encode %{
 7457     int opcode = this->ideal_Opcode();
 7458     bool min = (opcode == Op_MinD) ? true : false;
 7459     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7460                     min, fp_prec_dbl /*pt*/);
 7461   %}
 7462   ins_pipe( pipe_slow );
 7463 %}
 7464 
 7465 // min = java.lang.Math.min(double a, double b)
 7466 // max = java.lang.Math.max(double a, double b)
 7467 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7468 %{
 7469   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7470   match(Set dst (MaxD a b));
 7471   match(Set dst (MinD a b));
 7472   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7473 
 7474   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7475   ins_encode %{
 7476     int opcode = this->ideal_Opcode();
 7477     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7478     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7479                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7480   %}
 7481   ins_pipe( pipe_slow );
 7482 %}
 7483 
 7484 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
 7485 %{
 7486   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7487   match(Set dst (MaxD a b));
 7488   match(Set dst (MinD a b));
 7489   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7490 
 7491   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7492   ins_encode %{
 7493     int opcode = this->ideal_Opcode();
 7494     bool min = (opcode == Op_MinD) ? true : false;
 7495     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7496                     min, fp_prec_dbl /*pt*/);
 7497   %}
 7498   ins_pipe( pipe_slow );
 7499 %}
 7500 
 7501 // Load Effective Address
 7502 instruct leaP8(rRegP dst, indOffset8 mem)
 7503 %{
 7504   match(Set dst mem);
 7505 
 7506   ins_cost(110); // XXX
 7507   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7508   ins_encode %{
 7509     __ leaq($dst$$Register, $mem$$Address);
 7510   %}
 7511   ins_pipe(ialu_reg_reg_fat);
 7512 %}
 7513 
 7514 instruct leaP32(rRegP dst, indOffset32 mem)
 7515 %{
 7516   match(Set dst mem);
 7517 
 7518   ins_cost(110);
 7519   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7520   ins_encode %{
 7521     __ leaq($dst$$Register, $mem$$Address);
 7522   %}
 7523   ins_pipe(ialu_reg_reg_fat);
 7524 %}
 7525 
 7526 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7527 %{
 7528   match(Set dst mem);
 7529 
 7530   ins_cost(110);
 7531   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7532   ins_encode %{
 7533     __ leaq($dst$$Register, $mem$$Address);
 7534   %}
 7535   ins_pipe(ialu_reg_reg_fat);
 7536 %}
 7537 
 7538 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7539 %{
 7540   match(Set dst mem);
 7541 
 7542   ins_cost(110);
 7543   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7544   ins_encode %{
 7545     __ leaq($dst$$Register, $mem$$Address);
 7546   %}
 7547   ins_pipe(ialu_reg_reg_fat);
 7548 %}
 7549 
 7550 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7551 %{
 7552   match(Set dst mem);
 7553 
 7554   ins_cost(110);
 7555   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7556   ins_encode %{
 7557     __ leaq($dst$$Register, $mem$$Address);
 7558   %}
 7559   ins_pipe(ialu_reg_reg_fat);
 7560 %}
 7561 
 7562 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7563 %{
 7564   match(Set dst mem);
 7565 
 7566   ins_cost(110);
 7567   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7568   ins_encode %{
 7569     __ leaq($dst$$Register, $mem$$Address);
 7570   %}
 7571   ins_pipe(ialu_reg_reg_fat);
 7572 %}
 7573 
 7574 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7575 %{
 7576   match(Set dst mem);
 7577 
 7578   ins_cost(110);
 7579   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7580   ins_encode %{
 7581     __ leaq($dst$$Register, $mem$$Address);
 7582   %}
 7583   ins_pipe(ialu_reg_reg_fat);
 7584 %}
 7585 
 7586 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7587 %{
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 // Load Effective Address which uses Narrow (32-bits) oop
 7599 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7600 %{
 7601   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7602   match(Set dst mem);
 7603 
 7604   ins_cost(110);
 7605   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7606   ins_encode %{
 7607     __ leaq($dst$$Register, $mem$$Address);
 7608   %}
 7609   ins_pipe(ialu_reg_reg_fat);
 7610 %}
 7611 
 7612 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7613 %{
 7614   predicate(CompressedOops::shift() == 0);
 7615   match(Set dst mem);
 7616 
 7617   ins_cost(110); // XXX
 7618   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7619   ins_encode %{
 7620     __ leaq($dst$$Register, $mem$$Address);
 7621   %}
 7622   ins_pipe(ialu_reg_reg_fat);
 7623 %}
 7624 
 7625 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7626 %{
 7627   predicate(CompressedOops::shift() == 0);
 7628   match(Set dst mem);
 7629 
 7630   ins_cost(110);
 7631   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7632   ins_encode %{
 7633     __ leaq($dst$$Register, $mem$$Address);
 7634   %}
 7635   ins_pipe(ialu_reg_reg_fat);
 7636 %}
 7637 
 7638 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7639 %{
 7640   predicate(CompressedOops::shift() == 0);
 7641   match(Set dst mem);
 7642 
 7643   ins_cost(110);
 7644   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7645   ins_encode %{
 7646     __ leaq($dst$$Register, $mem$$Address);
 7647   %}
 7648   ins_pipe(ialu_reg_reg_fat);
 7649 %}
 7650 
 7651 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7652 %{
 7653   predicate(CompressedOops::shift() == 0);
 7654   match(Set dst mem);
 7655 
 7656   ins_cost(110);
 7657   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7658   ins_encode %{
 7659     __ leaq($dst$$Register, $mem$$Address);
 7660   %}
 7661   ins_pipe(ialu_reg_reg_fat);
 7662 %}
 7663 
 7664 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7665 %{
 7666   predicate(CompressedOops::shift() == 0);
 7667   match(Set dst mem);
 7668 
 7669   ins_cost(110);
 7670   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7671   ins_encode %{
 7672     __ leaq($dst$$Register, $mem$$Address);
 7673   %}
 7674   ins_pipe(ialu_reg_reg_fat);
 7675 %}
 7676 
 7677 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7678 %{
 7679   predicate(CompressedOops::shift() == 0);
 7680   match(Set dst mem);
 7681 
 7682   ins_cost(110);
 7683   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7684   ins_encode %{
 7685     __ leaq($dst$$Register, $mem$$Address);
 7686   %}
 7687   ins_pipe(ialu_reg_reg_fat);
 7688 %}
 7689 
 7690 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7691 %{
 7692   predicate(CompressedOops::shift() == 0);
 7693   match(Set dst mem);
 7694 
 7695   ins_cost(110);
 7696   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7697   ins_encode %{
 7698     __ leaq($dst$$Register, $mem$$Address);
 7699   %}
 7700   ins_pipe(ialu_reg_reg_fat);
 7701 %}
 7702 
 7703 instruct loadConI(rRegI dst, immI src)
 7704 %{
 7705   match(Set dst src);
 7706 
 7707   format %{ "movl    $dst, $src\t# int" %}
 7708   ins_encode %{
 7709     __ movl($dst$$Register, $src$$constant);
 7710   %}
 7711   ins_pipe(ialu_reg_fat); // XXX
 7712 %}
 7713 
 7714 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7715 %{
 7716   match(Set dst src);
 7717   effect(KILL cr);
 7718 
 7719   ins_cost(50);
 7720   format %{ "xorl    $dst, $dst\t# int" %}
 7721   ins_encode %{
 7722     __ xorl($dst$$Register, $dst$$Register);
 7723   %}
 7724   ins_pipe(ialu_reg);
 7725 %}
 7726 
 7727 instruct loadConL(rRegL dst, immL src)
 7728 %{
 7729   match(Set dst src);
 7730 
 7731   ins_cost(150);
 7732   format %{ "movq    $dst, $src\t# long" %}
 7733   ins_encode %{
 7734     __ mov64($dst$$Register, $src$$constant);
 7735   %}
 7736   ins_pipe(ialu_reg);
 7737 %}
 7738 
 7739 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7740 %{
 7741   match(Set dst src);
 7742   effect(KILL cr);
 7743 
 7744   ins_cost(50);
 7745   format %{ "xorl    $dst, $dst\t# long" %}
 7746   ins_encode %{
 7747     __ xorl($dst$$Register, $dst$$Register);
 7748   %}
 7749   ins_pipe(ialu_reg); // XXX
 7750 %}
 7751 
 7752 instruct loadConUL32(rRegL dst, immUL32 src)
 7753 %{
 7754   match(Set dst src);
 7755 
 7756   ins_cost(60);
 7757   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7758   ins_encode %{
 7759     __ movl($dst$$Register, $src$$constant);
 7760   %}
 7761   ins_pipe(ialu_reg);
 7762 %}
 7763 
 7764 instruct loadConL32(rRegL dst, immL32 src)
 7765 %{
 7766   match(Set dst src);
 7767 
 7768   ins_cost(70);
 7769   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7770   ins_encode %{
 7771     __ movq($dst$$Register, $src$$constant);
 7772   %}
 7773   ins_pipe(ialu_reg);
 7774 %}
 7775 
 7776 instruct loadConP(rRegP dst, immP con) %{
 7777   match(Set dst con);
 7778 
 7779   format %{ "movq    $dst, $con\t# ptr" %}
 7780   ins_encode %{
 7781     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7782   %}
 7783   ins_pipe(ialu_reg_fat); // XXX
 7784 %}
 7785 
 7786 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7787 %{
 7788   match(Set dst src);
 7789   effect(KILL cr);
 7790 
 7791   ins_cost(50);
 7792   format %{ "xorl    $dst, $dst\t# ptr" %}
 7793   ins_encode %{
 7794     __ xorl($dst$$Register, $dst$$Register);
 7795   %}
 7796   ins_pipe(ialu_reg);
 7797 %}
 7798 
 7799 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7800 %{
 7801   match(Set dst src);
 7802   effect(KILL cr);
 7803 
 7804   ins_cost(60);
 7805   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7806   ins_encode %{
 7807     __ movl($dst$$Register, $src$$constant);
 7808   %}
 7809   ins_pipe(ialu_reg);
 7810 %}
 7811 
 7812 instruct loadConF(regF dst, immF con) %{
 7813   match(Set dst con);
 7814   ins_cost(125);
 7815   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7816   ins_encode %{
 7817     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7818   %}
 7819   ins_pipe(pipe_slow);
 7820 %}
 7821 
 7822 instruct loadConH(regF dst, immH con) %{
 7823   match(Set dst con);
 7824   ins_cost(125);
 7825   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7826   ins_encode %{
 7827     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7828   %}
 7829   ins_pipe(pipe_slow);
 7830 %}
 7831 
 7832 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7833   match(Set dst src);
 7834   effect(KILL cr);
 7835   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7836   ins_encode %{
 7837     __ xorq($dst$$Register, $dst$$Register);
 7838   %}
 7839   ins_pipe(ialu_reg);
 7840 %}
 7841 
 7842 instruct loadConN(rRegN dst, immN src) %{
 7843   match(Set dst src);
 7844 
 7845   ins_cost(125);
 7846   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7847   ins_encode %{
 7848     address con = (address)$src$$constant;
 7849     if (con == nullptr) {
 7850       ShouldNotReachHere();
 7851     } else {
 7852       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7853     }
 7854   %}
 7855   ins_pipe(ialu_reg_fat); // XXX
 7856 %}
 7857 
 7858 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7859   match(Set dst src);
 7860 
 7861   ins_cost(125);
 7862   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7863   ins_encode %{
 7864     address con = (address)$src$$constant;
 7865     if (con == nullptr) {
 7866       ShouldNotReachHere();
 7867     } else {
 7868       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7869     }
 7870   %}
 7871   ins_pipe(ialu_reg_fat); // XXX
 7872 %}
 7873 
 7874 instruct loadConF0(regF dst, immF0 src)
 7875 %{
 7876   match(Set dst src);
 7877   ins_cost(100);
 7878 
 7879   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7880   ins_encode %{
 7881     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7882   %}
 7883   ins_pipe(pipe_slow);
 7884 %}
 7885 
 7886 // Use the same format since predicate() can not be used here.
 7887 instruct loadConD(regD dst, immD con) %{
 7888   match(Set dst con);
 7889   ins_cost(125);
 7890   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7891   ins_encode %{
 7892     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7893   %}
 7894   ins_pipe(pipe_slow);
 7895 %}
 7896 
 7897 instruct loadConD0(regD dst, immD0 src)
 7898 %{
 7899   match(Set dst src);
 7900   ins_cost(100);
 7901 
 7902   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7903   ins_encode %{
 7904     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7905   %}
 7906   ins_pipe(pipe_slow);
 7907 %}
 7908 
 7909 instruct loadSSI(rRegI dst, stackSlotI src)
 7910 %{
 7911   match(Set dst src);
 7912 
 7913   ins_cost(125);
 7914   format %{ "movl    $dst, $src\t# int stk" %}
 7915   ins_encode %{
 7916     __ movl($dst$$Register, $src$$Address);
 7917   %}
 7918   ins_pipe(ialu_reg_mem);
 7919 %}
 7920 
 7921 instruct loadSSL(rRegL dst, stackSlotL src)
 7922 %{
 7923   match(Set dst src);
 7924 
 7925   ins_cost(125);
 7926   format %{ "movq    $dst, $src\t# long stk" %}
 7927   ins_encode %{
 7928     __ movq($dst$$Register, $src$$Address);
 7929   %}
 7930   ins_pipe(ialu_reg_mem);
 7931 %}
 7932 
 7933 instruct loadSSP(rRegP dst, stackSlotP src)
 7934 %{
 7935   match(Set dst src);
 7936 
 7937   ins_cost(125);
 7938   format %{ "movq    $dst, $src\t# ptr stk" %}
 7939   ins_encode %{
 7940     __ movq($dst$$Register, $src$$Address);
 7941   %}
 7942   ins_pipe(ialu_reg_mem);
 7943 %}
 7944 
 7945 instruct loadSSF(regF dst, stackSlotF src)
 7946 %{
 7947   match(Set dst src);
 7948 
 7949   ins_cost(125);
 7950   format %{ "movss   $dst, $src\t# float stk" %}
 7951   ins_encode %{
 7952     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7953   %}
 7954   ins_pipe(pipe_slow); // XXX
 7955 %}
 7956 
 7957 // Use the same format since predicate() can not be used here.
 7958 instruct loadSSD(regD dst, stackSlotD src)
 7959 %{
 7960   match(Set dst src);
 7961 
 7962   ins_cost(125);
 7963   format %{ "movsd   $dst, $src\t# double stk" %}
 7964   ins_encode  %{
 7965     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7966   %}
 7967   ins_pipe(pipe_slow); // XXX
 7968 %}
 7969 
 7970 // Prefetch instructions for allocation.
 7971 // Must be safe to execute with invalid address (cannot fault).
 7972 
 7973 instruct prefetchAlloc( memory mem ) %{
 7974   predicate(AllocatePrefetchInstr==3);
 7975   match(PrefetchAllocation mem);
 7976   ins_cost(125);
 7977 
 7978   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7979   ins_encode %{
 7980     __ prefetchw($mem$$Address);
 7981   %}
 7982   ins_pipe(ialu_mem);
 7983 %}
 7984 
 7985 instruct prefetchAllocNTA( memory mem ) %{
 7986   predicate(AllocatePrefetchInstr==0);
 7987   match(PrefetchAllocation mem);
 7988   ins_cost(125);
 7989 
 7990   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7991   ins_encode %{
 7992     __ prefetchnta($mem$$Address);
 7993   %}
 7994   ins_pipe(ialu_mem);
 7995 %}
 7996 
 7997 instruct prefetchAllocT0( memory mem ) %{
 7998   predicate(AllocatePrefetchInstr==1);
 7999   match(PrefetchAllocation mem);
 8000   ins_cost(125);
 8001 
 8002   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8003   ins_encode %{
 8004     __ prefetcht0($mem$$Address);
 8005   %}
 8006   ins_pipe(ialu_mem);
 8007 %}
 8008 
 8009 instruct prefetchAllocT2( memory mem ) %{
 8010   predicate(AllocatePrefetchInstr==2);
 8011   match(PrefetchAllocation mem);
 8012   ins_cost(125);
 8013 
 8014   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8015   ins_encode %{
 8016     __ prefetcht2($mem$$Address);
 8017   %}
 8018   ins_pipe(ialu_mem);
 8019 %}
 8020 
 8021 //----------Store Instructions-------------------------------------------------
 8022 
 8023 // Store Byte
 8024 instruct storeB(memory mem, rRegI src)
 8025 %{
 8026   match(Set mem (StoreB mem src));
 8027 
 8028   ins_cost(125); // XXX
 8029   format %{ "movb    $mem, $src\t# byte" %}
 8030   ins_encode %{
 8031     __ movb($mem$$Address, $src$$Register);
 8032   %}
 8033   ins_pipe(ialu_mem_reg);
 8034 %}
 8035 
 8036 // Store Char/Short
 8037 instruct storeC(memory mem, rRegI src)
 8038 %{
 8039   match(Set mem (StoreC mem src));
 8040 
 8041   ins_cost(125); // XXX
 8042   format %{ "movw    $mem, $src\t# char/short" %}
 8043   ins_encode %{
 8044     __ movw($mem$$Address, $src$$Register);
 8045   %}
 8046   ins_pipe(ialu_mem_reg);
 8047 %}
 8048 
 8049 // Store Integer
 8050 instruct storeI(memory mem, rRegI src)
 8051 %{
 8052   match(Set mem (StoreI mem src));
 8053 
 8054   ins_cost(125); // XXX
 8055   format %{ "movl    $mem, $src\t# int" %}
 8056   ins_encode %{
 8057     __ movl($mem$$Address, $src$$Register);
 8058   %}
 8059   ins_pipe(ialu_mem_reg);
 8060 %}
 8061 
 8062 // Store Long
 8063 instruct storeL(memory mem, rRegL src)
 8064 %{
 8065   match(Set mem (StoreL mem src));
 8066 
 8067   ins_cost(125); // XXX
 8068   format %{ "movq    $mem, $src\t# long" %}
 8069   ins_encode %{
 8070     __ movq($mem$$Address, $src$$Register);
 8071   %}
 8072   ins_pipe(ialu_mem_reg); // XXX
 8073 %}
 8074 
 8075 // Store Pointer
 8076 instruct storeP(memory mem, any_RegP src)
 8077 %{
 8078   predicate(n->as_Store()->barrier_data() == 0);
 8079   match(Set mem (StoreP mem src));
 8080 
 8081   ins_cost(125); // XXX
 8082   format %{ "movq    $mem, $src\t# ptr" %}
 8083   ins_encode %{
 8084     __ movq($mem$$Address, $src$$Register);
 8085   %}
 8086   ins_pipe(ialu_mem_reg);
 8087 %}
 8088 
 8089 instruct storeImmP0(memory mem, immP0 zero)
 8090 %{
 8091   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8092   match(Set mem (StoreP mem zero));
 8093 
 8094   ins_cost(125); // XXX
 8095   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8096   ins_encode %{
 8097     __ movq($mem$$Address, r12);
 8098   %}
 8099   ins_pipe(ialu_mem_reg);
 8100 %}
 8101 
 8102 // Store Null Pointer, mark word, or other simple pointer constant.
 8103 instruct storeImmP(memory mem, immP31 src)
 8104 %{
 8105   predicate(n->as_Store()->barrier_data() == 0);
 8106   match(Set mem (StoreP mem src));
 8107 
 8108   ins_cost(150); // XXX
 8109   format %{ "movq    $mem, $src\t# ptr" %}
 8110   ins_encode %{
 8111     __ movq($mem$$Address, $src$$constant);
 8112   %}
 8113   ins_pipe(ialu_mem_imm);
 8114 %}
 8115 
 8116 // Store Compressed Pointer
 8117 instruct storeN(memory mem, rRegN src)
 8118 %{
 8119   predicate(n->as_Store()->barrier_data() == 0);
 8120   match(Set mem (StoreN mem src));
 8121 
 8122   ins_cost(125); // XXX
 8123   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8124   ins_encode %{
 8125     __ movl($mem$$Address, $src$$Register);
 8126   %}
 8127   ins_pipe(ialu_mem_reg);
 8128 %}
 8129 
 8130 instruct storeNKlass(memory mem, rRegN src)
 8131 %{
 8132   match(Set mem (StoreNKlass mem src));
 8133 
 8134   ins_cost(125); // XXX
 8135   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8136   ins_encode %{
 8137     __ movl($mem$$Address, $src$$Register);
 8138   %}
 8139   ins_pipe(ialu_mem_reg);
 8140 %}
 8141 
 8142 instruct storeImmN0(memory mem, immN0 zero)
 8143 %{
 8144   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8145   match(Set mem (StoreN mem zero));
 8146 
 8147   ins_cost(125); // XXX
 8148   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8149   ins_encode %{
 8150     __ movl($mem$$Address, r12);
 8151   %}
 8152   ins_pipe(ialu_mem_reg);
 8153 %}
 8154 
 8155 instruct storeImmN(memory mem, immN src)
 8156 %{
 8157   predicate(n->as_Store()->barrier_data() == 0);
 8158   match(Set mem (StoreN mem src));
 8159 
 8160   ins_cost(150); // XXX
 8161   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8162   ins_encode %{
 8163     address con = (address)$src$$constant;
 8164     if (con == nullptr) {
 8165       __ movl($mem$$Address, 0);
 8166     } else {
 8167       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8168     }
 8169   %}
 8170   ins_pipe(ialu_mem_imm);
 8171 %}
 8172 
 8173 instruct storeImmNKlass(memory mem, immNKlass src)
 8174 %{
 8175   match(Set mem (StoreNKlass mem src));
 8176 
 8177   ins_cost(150); // XXX
 8178   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8179   ins_encode %{
 8180     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8181   %}
 8182   ins_pipe(ialu_mem_imm);
 8183 %}
 8184 
 8185 // Store Integer Immediate
 8186 instruct storeImmI0(memory mem, immI_0 zero)
 8187 %{
 8188   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8189   match(Set mem (StoreI mem zero));
 8190 
 8191   ins_cost(125); // XXX
 8192   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8193   ins_encode %{
 8194     __ movl($mem$$Address, r12);
 8195   %}
 8196   ins_pipe(ialu_mem_reg);
 8197 %}
 8198 
 8199 instruct storeImmI(memory mem, immI src)
 8200 %{
 8201   match(Set mem (StoreI mem src));
 8202 
 8203   ins_cost(150);
 8204   format %{ "movl    $mem, $src\t# int" %}
 8205   ins_encode %{
 8206     __ movl($mem$$Address, $src$$constant);
 8207   %}
 8208   ins_pipe(ialu_mem_imm);
 8209 %}
 8210 
 8211 // Store Long Immediate
 8212 instruct storeImmL0(memory mem, immL0 zero)
 8213 %{
 8214   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8215   match(Set mem (StoreL mem zero));
 8216 
 8217   ins_cost(125); // XXX
 8218   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8219   ins_encode %{
 8220     __ movq($mem$$Address, r12);
 8221   %}
 8222   ins_pipe(ialu_mem_reg);
 8223 %}
 8224 
 8225 instruct storeImmL(memory mem, immL32 src)
 8226 %{
 8227   match(Set mem (StoreL mem src));
 8228 
 8229   ins_cost(150);
 8230   format %{ "movq    $mem, $src\t# long" %}
 8231   ins_encode %{
 8232     __ movq($mem$$Address, $src$$constant);
 8233   %}
 8234   ins_pipe(ialu_mem_imm);
 8235 %}
 8236 
 8237 // Store Short/Char Immediate
 8238 instruct storeImmC0(memory mem, immI_0 zero)
 8239 %{
 8240   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8241   match(Set mem (StoreC mem zero));
 8242 
 8243   ins_cost(125); // XXX
 8244   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8245   ins_encode %{
 8246     __ movw($mem$$Address, r12);
 8247   %}
 8248   ins_pipe(ialu_mem_reg);
 8249 %}
 8250 
 8251 instruct storeImmI16(memory mem, immI16 src)
 8252 %{
 8253   predicate(UseStoreImmI16);
 8254   match(Set mem (StoreC mem src));
 8255 
 8256   ins_cost(150);
 8257   format %{ "movw    $mem, $src\t# short/char" %}
 8258   ins_encode %{
 8259     __ movw($mem$$Address, $src$$constant);
 8260   %}
 8261   ins_pipe(ialu_mem_imm);
 8262 %}
 8263 
 8264 // Store Byte Immediate
 8265 instruct storeImmB0(memory mem, immI_0 zero)
 8266 %{
 8267   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8268   match(Set mem (StoreB mem zero));
 8269 
 8270   ins_cost(125); // XXX
 8271   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8272   ins_encode %{
 8273     __ movb($mem$$Address, r12);
 8274   %}
 8275   ins_pipe(ialu_mem_reg);
 8276 %}
 8277 
 8278 instruct storeImmB(memory mem, immI8 src)
 8279 %{
 8280   match(Set mem (StoreB mem src));
 8281 
 8282   ins_cost(150); // XXX
 8283   format %{ "movb    $mem, $src\t# byte" %}
 8284   ins_encode %{
 8285     __ movb($mem$$Address, $src$$constant);
 8286   %}
 8287   ins_pipe(ialu_mem_imm);
 8288 %}
 8289 
 8290 // Store Float
 8291 instruct storeF(memory mem, regF src)
 8292 %{
 8293   match(Set mem (StoreF mem src));
 8294 
 8295   ins_cost(95); // XXX
 8296   format %{ "movss   $mem, $src\t# float" %}
 8297   ins_encode %{
 8298     __ movflt($mem$$Address, $src$$XMMRegister);
 8299   %}
 8300   ins_pipe(pipe_slow); // XXX
 8301 %}
 8302 
 8303 // Store immediate Float value (it is faster than store from XMM register)
 8304 instruct storeF0(memory mem, immF0 zero)
 8305 %{
 8306   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8307   match(Set mem (StoreF mem zero));
 8308 
 8309   ins_cost(25); // XXX
 8310   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8311   ins_encode %{
 8312     __ movl($mem$$Address, r12);
 8313   %}
 8314   ins_pipe(ialu_mem_reg);
 8315 %}
 8316 
 8317 instruct storeF_imm(memory mem, immF src)
 8318 %{
 8319   match(Set mem (StoreF mem src));
 8320 
 8321   ins_cost(50);
 8322   format %{ "movl    $mem, $src\t# float" %}
 8323   ins_encode %{
 8324     __ movl($mem$$Address, jint_cast($src$$constant));
 8325   %}
 8326   ins_pipe(ialu_mem_imm);
 8327 %}
 8328 
 8329 // Store Double
 8330 instruct storeD(memory mem, regD src)
 8331 %{
 8332   match(Set mem (StoreD mem src));
 8333 
 8334   ins_cost(95); // XXX
 8335   format %{ "movsd   $mem, $src\t# double" %}
 8336   ins_encode %{
 8337     __ movdbl($mem$$Address, $src$$XMMRegister);
 8338   %}
 8339   ins_pipe(pipe_slow); // XXX
 8340 %}
 8341 
 8342 // Store immediate double 0.0 (it is faster than store from XMM register)
 8343 instruct storeD0_imm(memory mem, immD0 src)
 8344 %{
 8345   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8346   match(Set mem (StoreD mem src));
 8347 
 8348   ins_cost(50);
 8349   format %{ "movq    $mem, $src\t# double 0." %}
 8350   ins_encode %{
 8351     __ movq($mem$$Address, $src$$constant);
 8352   %}
 8353   ins_pipe(ialu_mem_imm);
 8354 %}
 8355 
 8356 instruct storeD0(memory mem, immD0 zero)
 8357 %{
 8358   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8359   match(Set mem (StoreD mem zero));
 8360 
 8361   ins_cost(25); // XXX
 8362   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8363   ins_encode %{
 8364     __ movq($mem$$Address, r12);
 8365   %}
 8366   ins_pipe(ialu_mem_reg);
 8367 %}
 8368 
 8369 instruct storeSSI(stackSlotI dst, rRegI src)
 8370 %{
 8371   match(Set dst src);
 8372 
 8373   ins_cost(100);
 8374   format %{ "movl    $dst, $src\t# int stk" %}
 8375   ins_encode %{
 8376     __ movl($dst$$Address, $src$$Register);
 8377   %}
 8378   ins_pipe( ialu_mem_reg );
 8379 %}
 8380 
 8381 instruct storeSSL(stackSlotL dst, rRegL src)
 8382 %{
 8383   match(Set dst src);
 8384 
 8385   ins_cost(100);
 8386   format %{ "movq    $dst, $src\t# long stk" %}
 8387   ins_encode %{
 8388     __ movq($dst$$Address, $src$$Register);
 8389   %}
 8390   ins_pipe(ialu_mem_reg);
 8391 %}
 8392 
 8393 instruct storeSSP(stackSlotP dst, rRegP src)
 8394 %{
 8395   match(Set dst src);
 8396 
 8397   ins_cost(100);
 8398   format %{ "movq    $dst, $src\t# ptr stk" %}
 8399   ins_encode %{
 8400     __ movq($dst$$Address, $src$$Register);
 8401   %}
 8402   ins_pipe(ialu_mem_reg);
 8403 %}
 8404 
 8405 instruct storeSSF(stackSlotF dst, regF src)
 8406 %{
 8407   match(Set dst src);
 8408 
 8409   ins_cost(95); // XXX
 8410   format %{ "movss   $dst, $src\t# float stk" %}
 8411   ins_encode %{
 8412     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8413   %}
 8414   ins_pipe(pipe_slow); // XXX
 8415 %}
 8416 
 8417 instruct storeSSD(stackSlotD dst, regD src)
 8418 %{
 8419   match(Set dst src);
 8420 
 8421   ins_cost(95); // XXX
 8422   format %{ "movsd   $dst, $src\t# double stk" %}
 8423   ins_encode %{
 8424     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8425   %}
 8426   ins_pipe(pipe_slow); // XXX
 8427 %}
 8428 
 8429 instruct cacheWB(indirect addr)
 8430 %{
 8431   predicate(VM_Version::supports_data_cache_line_flush());
 8432   match(CacheWB addr);
 8433 
 8434   ins_cost(100);
 8435   format %{"cache wb $addr" %}
 8436   ins_encode %{
 8437     assert($addr->index_position() < 0, "should be");
 8438     assert($addr$$disp == 0, "should be");
 8439     __ cache_wb(Address($addr$$base$$Register, 0));
 8440   %}
 8441   ins_pipe(pipe_slow); // XXX
 8442 %}
 8443 
 8444 instruct cacheWBPreSync()
 8445 %{
 8446   predicate(VM_Version::supports_data_cache_line_flush());
 8447   match(CacheWBPreSync);
 8448 
 8449   ins_cost(100);
 8450   format %{"cache wb presync" %}
 8451   ins_encode %{
 8452     __ cache_wbsync(true);
 8453   %}
 8454   ins_pipe(pipe_slow); // XXX
 8455 %}
 8456 
 8457 instruct cacheWBPostSync()
 8458 %{
 8459   predicate(VM_Version::supports_data_cache_line_flush());
 8460   match(CacheWBPostSync);
 8461 
 8462   ins_cost(100);
 8463   format %{"cache wb postsync" %}
 8464   ins_encode %{
 8465     __ cache_wbsync(false);
 8466   %}
 8467   ins_pipe(pipe_slow); // XXX
 8468 %}
 8469 
 8470 //----------BSWAP Instructions-------------------------------------------------
 8471 instruct bytes_reverse_int(rRegI dst) %{
 8472   match(Set dst (ReverseBytesI dst));
 8473 
 8474   format %{ "bswapl  $dst" %}
 8475   ins_encode %{
 8476     __ bswapl($dst$$Register);
 8477   %}
 8478   ins_pipe( ialu_reg );
 8479 %}
 8480 
 8481 instruct bytes_reverse_long(rRegL dst) %{
 8482   match(Set dst (ReverseBytesL dst));
 8483 
 8484   format %{ "bswapq  $dst" %}
 8485   ins_encode %{
 8486     __ bswapq($dst$$Register);
 8487   %}
 8488   ins_pipe( ialu_reg);
 8489 %}
 8490 
 8491 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8492   match(Set dst (ReverseBytesUS dst));
 8493   effect(KILL cr);
 8494 
 8495   format %{ "bswapl  $dst\n\t"
 8496             "shrl    $dst,16\n\t" %}
 8497   ins_encode %{
 8498     __ bswapl($dst$$Register);
 8499     __ shrl($dst$$Register, 16);
 8500   %}
 8501   ins_pipe( ialu_reg );
 8502 %}
 8503 
 8504 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8505   match(Set dst (ReverseBytesS dst));
 8506   effect(KILL cr);
 8507 
 8508   format %{ "bswapl  $dst\n\t"
 8509             "sar     $dst,16\n\t" %}
 8510   ins_encode %{
 8511     __ bswapl($dst$$Register);
 8512     __ sarl($dst$$Register, 16);
 8513   %}
 8514   ins_pipe( ialu_reg );
 8515 %}
 8516 
 8517 //---------- Zeros Count Instructions ------------------------------------------
 8518 
 8519 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8520   predicate(UseCountLeadingZerosInstruction);
 8521   match(Set dst (CountLeadingZerosI src));
 8522   effect(KILL cr);
 8523 
 8524   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8525   ins_encode %{
 8526     __ lzcntl($dst$$Register, $src$$Register);
 8527   %}
 8528   ins_pipe(ialu_reg);
 8529 %}
 8530 
 8531 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8532   predicate(UseCountLeadingZerosInstruction);
 8533   match(Set dst (CountLeadingZerosI (LoadI src)));
 8534   effect(KILL cr);
 8535   ins_cost(175);
 8536   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8537   ins_encode %{
 8538     __ lzcntl($dst$$Register, $src$$Address);
 8539   %}
 8540   ins_pipe(ialu_reg_mem);
 8541 %}
 8542 
 8543 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8544   predicate(!UseCountLeadingZerosInstruction);
 8545   match(Set dst (CountLeadingZerosI src));
 8546   effect(KILL cr);
 8547 
 8548   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8549             "jnz     skip\n\t"
 8550             "movl    $dst, -1\n"
 8551       "skip:\n\t"
 8552             "negl    $dst\n\t"
 8553             "addl    $dst, 31" %}
 8554   ins_encode %{
 8555     Register Rdst = $dst$$Register;
 8556     Register Rsrc = $src$$Register;
 8557     Label skip;
 8558     __ bsrl(Rdst, Rsrc);
 8559     __ jccb(Assembler::notZero, skip);
 8560     __ movl(Rdst, -1);
 8561     __ bind(skip);
 8562     __ negl(Rdst);
 8563     __ addl(Rdst, BitsPerInt - 1);
 8564   %}
 8565   ins_pipe(ialu_reg);
 8566 %}
 8567 
 8568 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8569   predicate(UseCountLeadingZerosInstruction);
 8570   match(Set dst (CountLeadingZerosL src));
 8571   effect(KILL cr);
 8572 
 8573   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8574   ins_encode %{
 8575     __ lzcntq($dst$$Register, $src$$Register);
 8576   %}
 8577   ins_pipe(ialu_reg);
 8578 %}
 8579 
 8580 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8581   predicate(UseCountLeadingZerosInstruction);
 8582   match(Set dst (CountLeadingZerosL (LoadL src)));
 8583   effect(KILL cr);
 8584   ins_cost(175);
 8585   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8586   ins_encode %{
 8587     __ lzcntq($dst$$Register, $src$$Address);
 8588   %}
 8589   ins_pipe(ialu_reg_mem);
 8590 %}
 8591 
 8592 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8593   predicate(!UseCountLeadingZerosInstruction);
 8594   match(Set dst (CountLeadingZerosL src));
 8595   effect(KILL cr);
 8596 
 8597   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8598             "jnz     skip\n\t"
 8599             "movl    $dst, -1\n"
 8600       "skip:\n\t"
 8601             "negl    $dst\n\t"
 8602             "addl    $dst, 63" %}
 8603   ins_encode %{
 8604     Register Rdst = $dst$$Register;
 8605     Register Rsrc = $src$$Register;
 8606     Label skip;
 8607     __ bsrq(Rdst, Rsrc);
 8608     __ jccb(Assembler::notZero, skip);
 8609     __ movl(Rdst, -1);
 8610     __ bind(skip);
 8611     __ negl(Rdst);
 8612     __ addl(Rdst, BitsPerLong - 1);
 8613   %}
 8614   ins_pipe(ialu_reg);
 8615 %}
 8616 
 8617 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8618   predicate(UseCountTrailingZerosInstruction);
 8619   match(Set dst (CountTrailingZerosI src));
 8620   effect(KILL cr);
 8621 
 8622   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8623   ins_encode %{
 8624     __ tzcntl($dst$$Register, $src$$Register);
 8625   %}
 8626   ins_pipe(ialu_reg);
 8627 %}
 8628 
 8629 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8630   predicate(UseCountTrailingZerosInstruction);
 8631   match(Set dst (CountTrailingZerosI (LoadI src)));
 8632   effect(KILL cr);
 8633   ins_cost(175);
 8634   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8635   ins_encode %{
 8636     __ tzcntl($dst$$Register, $src$$Address);
 8637   %}
 8638   ins_pipe(ialu_reg_mem);
 8639 %}
 8640 
 8641 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8642   predicate(!UseCountTrailingZerosInstruction);
 8643   match(Set dst (CountTrailingZerosI src));
 8644   effect(KILL cr);
 8645 
 8646   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8647             "jnz     done\n\t"
 8648             "movl    $dst, 32\n"
 8649       "done:" %}
 8650   ins_encode %{
 8651     Register Rdst = $dst$$Register;
 8652     Label done;
 8653     __ bsfl(Rdst, $src$$Register);
 8654     __ jccb(Assembler::notZero, done);
 8655     __ movl(Rdst, BitsPerInt);
 8656     __ bind(done);
 8657   %}
 8658   ins_pipe(ialu_reg);
 8659 %}
 8660 
 8661 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8662   predicate(UseCountTrailingZerosInstruction);
 8663   match(Set dst (CountTrailingZerosL src));
 8664   effect(KILL cr);
 8665 
 8666   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8667   ins_encode %{
 8668     __ tzcntq($dst$$Register, $src$$Register);
 8669   %}
 8670   ins_pipe(ialu_reg);
 8671 %}
 8672 
 8673 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8674   predicate(UseCountTrailingZerosInstruction);
 8675   match(Set dst (CountTrailingZerosL (LoadL src)));
 8676   effect(KILL cr);
 8677   ins_cost(175);
 8678   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8679   ins_encode %{
 8680     __ tzcntq($dst$$Register, $src$$Address);
 8681   %}
 8682   ins_pipe(ialu_reg_mem);
 8683 %}
 8684 
 8685 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8686   predicate(!UseCountTrailingZerosInstruction);
 8687   match(Set dst (CountTrailingZerosL src));
 8688   effect(KILL cr);
 8689 
 8690   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8691             "jnz     done\n\t"
 8692             "movl    $dst, 64\n"
 8693       "done:" %}
 8694   ins_encode %{
 8695     Register Rdst = $dst$$Register;
 8696     Label done;
 8697     __ bsfq(Rdst, $src$$Register);
 8698     __ jccb(Assembler::notZero, done);
 8699     __ movl(Rdst, BitsPerLong);
 8700     __ bind(done);
 8701   %}
 8702   ins_pipe(ialu_reg);
 8703 %}
 8704 
 8705 //--------------- Reverse Operation Instructions ----------------
 8706 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8707   predicate(!VM_Version::supports_gfni());
 8708   match(Set dst (ReverseI src));
 8709   effect(TEMP dst, TEMP rtmp, KILL cr);
 8710   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8711   ins_encode %{
 8712     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8713   %}
 8714   ins_pipe( ialu_reg );
 8715 %}
 8716 
 8717 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8718   predicate(VM_Version::supports_gfni());
 8719   match(Set dst (ReverseI src));
 8720   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8721   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8722   ins_encode %{
 8723     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8724   %}
 8725   ins_pipe( ialu_reg );
 8726 %}
 8727 
 8728 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8729   predicate(!VM_Version::supports_gfni());
 8730   match(Set dst (ReverseL src));
 8731   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8732   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8733   ins_encode %{
 8734     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8735   %}
 8736   ins_pipe( ialu_reg );
 8737 %}
 8738 
 8739 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8740   predicate(VM_Version::supports_gfni());
 8741   match(Set dst (ReverseL src));
 8742   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8743   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8744   ins_encode %{
 8745     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8746   %}
 8747   ins_pipe( ialu_reg );
 8748 %}
 8749 
 8750 //---------- Population Count Instructions -------------------------------------
 8751 
 8752 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8753   predicate(UsePopCountInstruction);
 8754   match(Set dst (PopCountI src));
 8755   effect(KILL cr);
 8756 
 8757   format %{ "popcnt  $dst, $src" %}
 8758   ins_encode %{
 8759     __ popcntl($dst$$Register, $src$$Register);
 8760   %}
 8761   ins_pipe(ialu_reg);
 8762 %}
 8763 
 8764 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8765   predicate(UsePopCountInstruction);
 8766   match(Set dst (PopCountI (LoadI mem)));
 8767   effect(KILL cr);
 8768 
 8769   format %{ "popcnt  $dst, $mem" %}
 8770   ins_encode %{
 8771     __ popcntl($dst$$Register, $mem$$Address);
 8772   %}
 8773   ins_pipe(ialu_reg);
 8774 %}
 8775 
 8776 // Note: Long.bitCount(long) returns an int.
 8777 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8778   predicate(UsePopCountInstruction);
 8779   match(Set dst (PopCountL src));
 8780   effect(KILL cr);
 8781 
 8782   format %{ "popcnt  $dst, $src" %}
 8783   ins_encode %{
 8784     __ popcntq($dst$$Register, $src$$Register);
 8785   %}
 8786   ins_pipe(ialu_reg);
 8787 %}
 8788 
 8789 // Note: Long.bitCount(long) returns an int.
 8790 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8791   predicate(UsePopCountInstruction);
 8792   match(Set dst (PopCountL (LoadL mem)));
 8793   effect(KILL cr);
 8794 
 8795   format %{ "popcnt  $dst, $mem" %}
 8796   ins_encode %{
 8797     __ popcntq($dst$$Register, $mem$$Address);
 8798   %}
 8799   ins_pipe(ialu_reg);
 8800 %}
 8801 
 8802 
 8803 //----------MemBar Instructions-----------------------------------------------
 8804 // Memory barrier flavors
 8805 
 8806 instruct membar_acquire()
 8807 %{
 8808   match(MemBarAcquire);
 8809   match(LoadFence);
 8810   ins_cost(0);
 8811 
 8812   size(0);
 8813   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8814   ins_encode();
 8815   ins_pipe(empty);
 8816 %}
 8817 
 8818 instruct membar_acquire_lock()
 8819 %{
 8820   match(MemBarAcquireLock);
 8821   ins_cost(0);
 8822 
 8823   size(0);
 8824   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8825   ins_encode();
 8826   ins_pipe(empty);
 8827 %}
 8828 
 8829 instruct membar_release()
 8830 %{
 8831   match(MemBarRelease);
 8832   match(StoreFence);
 8833   ins_cost(0);
 8834 
 8835   size(0);
 8836   format %{ "MEMBAR-release ! (empty encoding)" %}
 8837   ins_encode();
 8838   ins_pipe(empty);
 8839 %}
 8840 
 8841 instruct membar_release_lock()
 8842 %{
 8843   match(MemBarReleaseLock);
 8844   ins_cost(0);
 8845 
 8846   size(0);
 8847   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8848   ins_encode();
 8849   ins_pipe(empty);
 8850 %}
 8851 
 8852 instruct membar_storeload(rFlagsReg cr) %{
 8853   match(MemBarStoreLoad);
 8854   effect(KILL cr);
 8855   ins_cost(400);
 8856 
 8857   format %{
 8858     $$template
 8859     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8860   %}
 8861   ins_encode %{
 8862     __ membar(Assembler::StoreLoad);
 8863   %}
 8864   ins_pipe(pipe_slow);
 8865 %}
 8866 
 8867 instruct membar_volatile(rFlagsReg cr) %{
 8868   match(MemBarVolatile);
 8869   effect(KILL cr);
 8870   ins_cost(400);
 8871 
 8872   format %{
 8873     $$template
 8874     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8875   %}
 8876   ins_encode %{
 8877     __ membar(Assembler::StoreLoad);
 8878   %}
 8879   ins_pipe(pipe_slow);
 8880 %}
 8881 
 8882 instruct unnecessary_membar_volatile()
 8883 %{
 8884   match(MemBarVolatile);
 8885   predicate(Matcher::post_store_load_barrier(n));
 8886   ins_cost(0);
 8887 
 8888   size(0);
 8889   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8890   ins_encode();
 8891   ins_pipe(empty);
 8892 %}
 8893 
 8894 instruct membar_full(rFlagsReg cr) %{
 8895   match(MemBarFull);
 8896   effect(KILL cr);
 8897   ins_cost(400);
 8898 
 8899   format %{
 8900     $$template
 8901     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8902   %}
 8903   ins_encode %{
 8904     __ membar(Assembler::StoreLoad);
 8905   %}
 8906   ins_pipe(pipe_slow);
 8907 %}
 8908 
 8909 instruct membar_storestore() %{
 8910   match(MemBarStoreStore);
 8911   match(StoreStoreFence);
 8912   ins_cost(0);
 8913 
 8914   size(0);
 8915   format %{ "MEMBAR-storestore (empty encoding)" %}
 8916   ins_encode( );
 8917   ins_pipe(empty);
 8918 %}
 8919 
 8920 //----------Move Instructions--------------------------------------------------
 8921 
 8922 instruct castX2P(rRegP dst, rRegL src)
 8923 %{
 8924   match(Set dst (CastX2P src));
 8925 
 8926   format %{ "movq    $dst, $src\t# long->ptr" %}
 8927   ins_encode %{
 8928     if ($dst$$reg != $src$$reg) {
 8929       __ movptr($dst$$Register, $src$$Register);
 8930     }
 8931   %}
 8932   ins_pipe(ialu_reg_reg); // XXX
 8933 %}
 8934 
 8935 instruct castP2X(rRegL dst, rRegP src)
 8936 %{
 8937   match(Set dst (CastP2X src));
 8938 
 8939   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8940   ins_encode %{
 8941     if ($dst$$reg != $src$$reg) {
 8942       __ movptr($dst$$Register, $src$$Register);
 8943     }
 8944   %}
 8945   ins_pipe(ialu_reg_reg); // XXX
 8946 %}
 8947 
 8948 // Convert oop into int for vectors alignment masking
 8949 instruct convP2I(rRegI dst, rRegP src)
 8950 %{
 8951   match(Set dst (ConvL2I (CastP2X src)));
 8952 
 8953   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8954   ins_encode %{
 8955     __ movl($dst$$Register, $src$$Register);
 8956   %}
 8957   ins_pipe(ialu_reg_reg); // XXX
 8958 %}
 8959 
 8960 // Convert compressed oop into int for vectors alignment masking
 8961 // in case of 32bit oops (heap < 4Gb).
 8962 instruct convN2I(rRegI dst, rRegN src)
 8963 %{
 8964   predicate(CompressedOops::shift() == 0);
 8965   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8966 
 8967   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8968   ins_encode %{
 8969     __ movl($dst$$Register, $src$$Register);
 8970   %}
 8971   ins_pipe(ialu_reg_reg); // XXX
 8972 %}
 8973 
 8974 // Convert oop pointer into compressed form
 8975 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8976   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8977   match(Set dst (EncodeP src));
 8978   effect(KILL cr);
 8979   format %{ "encode_heap_oop $dst,$src" %}
 8980   ins_encode %{
 8981     Register s = $src$$Register;
 8982     Register d = $dst$$Register;
 8983     if (s != d) {
 8984       __ movq(d, s);
 8985     }
 8986     __ encode_heap_oop(d);
 8987   %}
 8988   ins_pipe(ialu_reg_long);
 8989 %}
 8990 
 8991 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8992   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8993   match(Set dst (EncodeP src));
 8994   effect(KILL cr);
 8995   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8996   ins_encode %{
 8997     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8998   %}
 8999   ins_pipe(ialu_reg_long);
 9000 %}
 9001 
 9002 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9003   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9004             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9005   match(Set dst (DecodeN src));
 9006   effect(KILL cr);
 9007   format %{ "decode_heap_oop $dst,$src" %}
 9008   ins_encode %{
 9009     Register s = $src$$Register;
 9010     Register d = $dst$$Register;
 9011     if (s != d) {
 9012       __ movq(d, s);
 9013     }
 9014     __ decode_heap_oop(d);
 9015   %}
 9016   ins_pipe(ialu_reg_long);
 9017 %}
 9018 
 9019 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9020   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9021             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9022   match(Set dst (DecodeN src));
 9023   effect(KILL cr);
 9024   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9025   ins_encode %{
 9026     Register s = $src$$Register;
 9027     Register d = $dst$$Register;
 9028     if (s != d) {
 9029       __ decode_heap_oop_not_null(d, s);
 9030     } else {
 9031       __ decode_heap_oop_not_null(d);
 9032     }
 9033   %}
 9034   ins_pipe(ialu_reg_long);
 9035 %}
 9036 
 9037 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9038   match(Set dst (EncodePKlass src));
 9039   effect(TEMP dst, KILL cr);
 9040   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9041   ins_encode %{
 9042     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9043   %}
 9044   ins_pipe(ialu_reg_long);
 9045 %}
 9046 
 9047 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9048   match(Set dst (DecodeNKlass src));
 9049   effect(TEMP dst, KILL cr);
 9050   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9051   ins_encode %{
 9052     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9053   %}
 9054   ins_pipe(ialu_reg_long);
 9055 %}
 9056 
 9057 //----------Conditional Move---------------------------------------------------
 9058 // Jump
 9059 // dummy instruction for generating temp registers
 9060 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9061   match(Jump (LShiftL switch_val shift));
 9062   ins_cost(350);
 9063   predicate(false);
 9064   effect(TEMP dest);
 9065 
 9066   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9067             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9068   ins_encode %{
 9069     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9070     // to do that and the compiler is using that register as one it can allocate.
 9071     // So we build it all by hand.
 9072     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9073     // ArrayAddress dispatch(table, index);
 9074     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9075     __ lea($dest$$Register, $constantaddress);
 9076     __ jmp(dispatch);
 9077   %}
 9078   ins_pipe(pipe_jmp);
 9079 %}
 9080 
 9081 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9082   match(Jump (AddL (LShiftL switch_val shift) offset));
 9083   ins_cost(350);
 9084   effect(TEMP dest);
 9085 
 9086   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9087             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9088   ins_encode %{
 9089     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9090     // to do that and the compiler is using that register as one it can allocate.
 9091     // So we build it all by hand.
 9092     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9093     // ArrayAddress dispatch(table, index);
 9094     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9095     __ lea($dest$$Register, $constantaddress);
 9096     __ jmp(dispatch);
 9097   %}
 9098   ins_pipe(pipe_jmp);
 9099 %}
 9100 
 9101 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9102   match(Jump switch_val);
 9103   ins_cost(350);
 9104   effect(TEMP dest);
 9105 
 9106   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9107             "jmp     [$dest + $switch_val]\n\t" %}
 9108   ins_encode %{
 9109     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9110     // to do that and the compiler is using that register as one it can allocate.
 9111     // So we build it all by hand.
 9112     // Address index(noreg, switch_reg, Address::times_1);
 9113     // ArrayAddress dispatch(table, index);
 9114     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9115     __ lea($dest$$Register, $constantaddress);
 9116     __ jmp(dispatch);
 9117   %}
 9118   ins_pipe(pipe_jmp);
 9119 %}
 9120 
 9121 // Conditional move
 9122 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9123 %{
 9124   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9125   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9126 
 9127   ins_cost(100); // XXX
 9128   format %{ "setbn$cop $dst\t# signed, int" %}
 9129   ins_encode %{
 9130     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9131     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9132   %}
 9133   ins_pipe(ialu_reg);
 9134 %}
 9135 
 9136 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9137 %{
 9138   predicate(!UseAPX);
 9139   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9140 
 9141   ins_cost(200); // XXX
 9142   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9143   ins_encode %{
 9144     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9145   %}
 9146   ins_pipe(pipe_cmov_reg);
 9147 %}
 9148 
 9149 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9150 %{
 9151   predicate(UseAPX);
 9152   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9153 
 9154   ins_cost(200);
 9155   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9156   ins_encode %{
 9157     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9158   %}
 9159   ins_pipe(pipe_cmov_reg);
 9160 %}
 9161 
 9162 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9163 %{
 9164   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9165   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9166 
 9167   ins_cost(100); // XXX
 9168   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9169   ins_encode %{
 9170     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9171     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9172   %}
 9173   ins_pipe(ialu_reg);
 9174 %}
 9175 
 9176 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9177   predicate(!UseAPX);
 9178   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9179 
 9180   ins_cost(200); // XXX
 9181   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9182   ins_encode %{
 9183     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9184   %}
 9185   ins_pipe(pipe_cmov_reg);
 9186 %}
 9187 
 9188 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9189   predicate(UseAPX);
 9190   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9191 
 9192   ins_cost(200);
 9193   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9194   ins_encode %{
 9195     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9196   %}
 9197   ins_pipe(pipe_cmov_reg);
 9198 %}
 9199 
 9200 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9201 %{
 9202   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9203   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9204 
 9205   ins_cost(100); // XXX
 9206   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9207   ins_encode %{
 9208     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9209     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9210   %}
 9211   ins_pipe(ialu_reg);
 9212 %}
 9213 
 9214 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9215 %{
 9216   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9217   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9218 
 9219   ins_cost(100); // XXX
 9220   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9221   ins_encode %{
 9222     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9223     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9224   %}
 9225   ins_pipe(ialu_reg);
 9226 %}
 9227 
 9228 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9229   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9230 
 9231   ins_cost(200);
 9232   expand %{
 9233     cmovI_regU(cop, cr, dst, src);
 9234   %}
 9235 %}
 9236 
 9237 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9238   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9239 
 9240   ins_cost(200);
 9241   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9242   ins_encode %{
 9243     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9244   %}
 9245   ins_pipe(pipe_cmov_reg);
 9246 %}
 9247 
 9248 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9249   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9250   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9251 
 9252   ins_cost(200); // XXX
 9253   format %{ "cmovpl  $dst, $src\n\t"
 9254             "cmovnel $dst, $src" %}
 9255   ins_encode %{
 9256     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9257     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9258   %}
 9259   ins_pipe(pipe_cmov_reg);
 9260 %}
 9261 
 9262 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9263 // inputs of the CMove
 9264 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9265   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9266   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9267   effect(TEMP dst);
 9268 
 9269   ins_cost(200); // XXX
 9270   format %{ "cmovpl  $dst, $src\n\t"
 9271             "cmovnel $dst, $src" %}
 9272   ins_encode %{
 9273     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9274     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9275   %}
 9276   ins_pipe(pipe_cmov_reg);
 9277 %}
 9278 
 9279 // Conditional move
 9280 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9281   predicate(!UseAPX);
 9282   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9283 
 9284   ins_cost(250); // XXX
 9285   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9286   ins_encode %{
 9287     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9288   %}
 9289   ins_pipe(pipe_cmov_mem);
 9290 %}
 9291 
 9292 // Conditional move
 9293 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9294 %{
 9295   predicate(UseAPX);
 9296   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9297 
 9298   ins_cost(250);
 9299   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9300   ins_encode %{
 9301     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9302   %}
 9303   ins_pipe(pipe_cmov_mem);
 9304 %}
 9305 
 9306 // Conditional move
 9307 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9308 %{
 9309   predicate(!UseAPX);
 9310   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9311 
 9312   ins_cost(250); // XXX
 9313   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9314   ins_encode %{
 9315     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9316   %}
 9317   ins_pipe(pipe_cmov_mem);
 9318 %}
 9319 
 9320 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9321   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9322 
 9323   ins_cost(250);
 9324   expand %{
 9325     cmovI_memU(cop, cr, dst, src);
 9326   %}
 9327 %}
 9328 
 9329 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9330 %{
 9331   predicate(UseAPX);
 9332   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9333 
 9334   ins_cost(250);
 9335   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9336   ins_encode %{
 9337     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9338   %}
 9339   ins_pipe(pipe_cmov_mem);
 9340 %}
 9341 
 9342 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9343 %{
 9344   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9345 
 9346   ins_cost(250);
 9347   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9348   ins_encode %{
 9349     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9350   %}
 9351   ins_pipe(pipe_cmov_mem);
 9352 %}
 9353 
 9354 // Conditional move
 9355 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9356 %{
 9357   predicate(!UseAPX);
 9358   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9359 
 9360   ins_cost(200); // XXX
 9361   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9362   ins_encode %{
 9363     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9364   %}
 9365   ins_pipe(pipe_cmov_reg);
 9366 %}
 9367 
 9368 // Conditional move ndd
 9369 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9370 %{
 9371   predicate(UseAPX);
 9372   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9373 
 9374   ins_cost(200);
 9375   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9376   ins_encode %{
 9377     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9378   %}
 9379   ins_pipe(pipe_cmov_reg);
 9380 %}
 9381 
 9382 // Conditional move
 9383 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9384 %{
 9385   predicate(!UseAPX);
 9386   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9387 
 9388   ins_cost(200); // XXX
 9389   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9390   ins_encode %{
 9391     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9392   %}
 9393   ins_pipe(pipe_cmov_reg);
 9394 %}
 9395 
 9396 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9397   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9398 
 9399   ins_cost(200);
 9400   expand %{
 9401     cmovN_regU(cop, cr, dst, src);
 9402   %}
 9403 %}
 9404 
 9405 // Conditional move ndd
 9406 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9407 %{
 9408   predicate(UseAPX);
 9409   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9410 
 9411   ins_cost(200);
 9412   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9413   ins_encode %{
 9414     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9415   %}
 9416   ins_pipe(pipe_cmov_reg);
 9417 %}
 9418 
 9419 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9420   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9421 
 9422   ins_cost(200);
 9423   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9424   ins_encode %{
 9425     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9426   %}
 9427   ins_pipe(pipe_cmov_reg);
 9428 %}
 9429 
 9430 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9431   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9432   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9433 
 9434   ins_cost(200); // XXX
 9435   format %{ "cmovpl  $dst, $src\n\t"
 9436             "cmovnel $dst, $src" %}
 9437   ins_encode %{
 9438     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9439     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9440   %}
 9441   ins_pipe(pipe_cmov_reg);
 9442 %}
 9443 
 9444 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9445 // inputs of the CMove
 9446 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9447   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9448   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9449 
 9450   ins_cost(200); // XXX
 9451   format %{ "cmovpl  $dst, $src\n\t"
 9452             "cmovnel $dst, $src" %}
 9453   ins_encode %{
 9454     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9455     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9456   %}
 9457   ins_pipe(pipe_cmov_reg);
 9458 %}
 9459 
 9460 // Conditional move
 9461 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9462 %{
 9463   predicate(!UseAPX);
 9464   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9465 
 9466   ins_cost(200); // XXX
 9467   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9468   ins_encode %{
 9469     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9470   %}
 9471   ins_pipe(pipe_cmov_reg);  // XXX
 9472 %}
 9473 
 9474 // Conditional move ndd
 9475 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9476 %{
 9477   predicate(UseAPX);
 9478   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9479 
 9480   ins_cost(200);
 9481   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9482   ins_encode %{
 9483     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9484   %}
 9485   ins_pipe(pipe_cmov_reg);
 9486 %}
 9487 
 9488 // Conditional move
 9489 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9490 %{
 9491   predicate(!UseAPX);
 9492   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9493 
 9494   ins_cost(200); // XXX
 9495   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9496   ins_encode %{
 9497     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9498   %}
 9499   ins_pipe(pipe_cmov_reg); // XXX
 9500 %}
 9501 
 9502 // Conditional move ndd
 9503 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9504 %{
 9505   predicate(UseAPX);
 9506   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9507 
 9508   ins_cost(200);
 9509   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9510   ins_encode %{
 9511     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9512   %}
 9513   ins_pipe(pipe_cmov_reg);
 9514 %}
 9515 
 9516 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9517   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9518 
 9519   ins_cost(200);
 9520   expand %{
 9521     cmovP_regU(cop, cr, dst, src);
 9522   %}
 9523 %}
 9524 
 9525 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9526   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9527 
 9528   ins_cost(200);
 9529   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9530   ins_encode %{
 9531     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9532   %}
 9533   ins_pipe(pipe_cmov_reg);
 9534 %}
 9535 
 9536 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9537   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9538   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9539 
 9540   ins_cost(200); // XXX
 9541   format %{ "cmovpq  $dst, $src\n\t"
 9542             "cmovneq $dst, $src" %}
 9543   ins_encode %{
 9544     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9545     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9546   %}
 9547   ins_pipe(pipe_cmov_reg);
 9548 %}
 9549 
 9550 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9551 // inputs of the CMove
 9552 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9553   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9554   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9555 
 9556   ins_cost(200); // XXX
 9557   format %{ "cmovpq  $dst, $src\n\t"
 9558             "cmovneq $dst, $src" %}
 9559   ins_encode %{
 9560     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9561     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9562   %}
 9563   ins_pipe(pipe_cmov_reg);
 9564 %}
 9565 
 9566 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9567 %{
 9568   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9569   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9570 
 9571   ins_cost(100); // XXX
 9572   format %{ "setbn$cop $dst\t# signed, long" %}
 9573   ins_encode %{
 9574     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9575     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9576   %}
 9577   ins_pipe(ialu_reg);
 9578 %}
 9579 
 9580 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9581 %{
 9582   predicate(!UseAPX);
 9583   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9584 
 9585   ins_cost(200); // XXX
 9586   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9587   ins_encode %{
 9588     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9589   %}
 9590   ins_pipe(pipe_cmov_reg);  // XXX
 9591 %}
 9592 
 9593 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9594 %{
 9595   predicate(UseAPX);
 9596   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9597 
 9598   ins_cost(200);
 9599   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9600   ins_encode %{
 9601     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9602   %}
 9603   ins_pipe(pipe_cmov_reg);
 9604 %}
 9605 
 9606 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9607 %{
 9608   predicate(!UseAPX);
 9609   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9610 
 9611   ins_cost(200); // XXX
 9612   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9613   ins_encode %{
 9614     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9615   %}
 9616   ins_pipe(pipe_cmov_mem);  // XXX
 9617 %}
 9618 
 9619 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9620 %{
 9621   predicate(UseAPX);
 9622   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9623 
 9624   ins_cost(200);
 9625   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9626   ins_encode %{
 9627     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9628   %}
 9629   ins_pipe(pipe_cmov_mem);
 9630 %}
 9631 
 9632 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9633 %{
 9634   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9635   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9636 
 9637   ins_cost(100); // XXX
 9638   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9639   ins_encode %{
 9640     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9641     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9642   %}
 9643   ins_pipe(ialu_reg);
 9644 %}
 9645 
 9646 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9647 %{
 9648   predicate(!UseAPX);
 9649   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9650 
 9651   ins_cost(200); // XXX
 9652   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9653   ins_encode %{
 9654     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9655   %}
 9656   ins_pipe(pipe_cmov_reg); // XXX
 9657 %}
 9658 
 9659 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9660 %{
 9661   predicate(UseAPX);
 9662   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9663 
 9664   ins_cost(200);
 9665   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9666   ins_encode %{
 9667     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9668   %}
 9669   ins_pipe(pipe_cmov_reg);
 9670 %}
 9671 
 9672 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9673 %{
 9674   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9675   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9676 
 9677   ins_cost(100); // XXX
 9678   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9679   ins_encode %{
 9680     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9681     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9682   %}
 9683   ins_pipe(ialu_reg);
 9684 %}
 9685 
 9686 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9687 %{
 9688   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9689   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9690 
 9691   ins_cost(100); // XXX
 9692   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9693   ins_encode %{
 9694     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9695     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9696   %}
 9697   ins_pipe(ialu_reg);
 9698 %}
 9699 
 9700 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9701   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9702 
 9703   ins_cost(200);
 9704   expand %{
 9705     cmovL_regU(cop, cr, dst, src);
 9706   %}
 9707 %}
 9708 
 9709 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9710 %{
 9711   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9712 
 9713   ins_cost(200);
 9714   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9715   ins_encode %{
 9716     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9717   %}
 9718   ins_pipe(pipe_cmov_reg);
 9719 %}
 9720 
 9721 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9722   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9723   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9724 
 9725   ins_cost(200); // XXX
 9726   format %{ "cmovpq  $dst, $src\n\t"
 9727             "cmovneq $dst, $src" %}
 9728   ins_encode %{
 9729     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9730     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9731   %}
 9732   ins_pipe(pipe_cmov_reg);
 9733 %}
 9734 
 9735 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9736 // inputs of the CMove
 9737 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9738   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9739   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9740 
 9741   ins_cost(200); // XXX
 9742   format %{ "cmovpq  $dst, $src\n\t"
 9743             "cmovneq $dst, $src" %}
 9744   ins_encode %{
 9745     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9746     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9747   %}
 9748   ins_pipe(pipe_cmov_reg);
 9749 %}
 9750 
 9751 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9752 %{
 9753   predicate(!UseAPX);
 9754   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9755 
 9756   ins_cost(200); // XXX
 9757   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9758   ins_encode %{
 9759     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9760   %}
 9761   ins_pipe(pipe_cmov_mem); // XXX
 9762 %}
 9763 
 9764 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9765   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9766 
 9767   ins_cost(200);
 9768   expand %{
 9769     cmovL_memU(cop, cr, dst, src);
 9770   %}
 9771 %}
 9772 
 9773 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9774 %{
 9775   predicate(UseAPX);
 9776   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9777 
 9778   ins_cost(200);
 9779   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9780   ins_encode %{
 9781     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9782   %}
 9783   ins_pipe(pipe_cmov_mem);
 9784 %}
 9785 
 9786 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9787 %{
 9788   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9789 
 9790   ins_cost(200);
 9791   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9792   ins_encode %{
 9793     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9794   %}
 9795   ins_pipe(pipe_cmov_mem);
 9796 %}
 9797 
 9798 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9799 %{
 9800   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9801 
 9802   ins_cost(200); // XXX
 9803   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9804             "movss     $dst, $src\n"
 9805     "skip:" %}
 9806   ins_encode %{
 9807     Label Lskip;
 9808     // Invert sense of branch from sense of CMOV
 9809     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9810     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9811     __ bind(Lskip);
 9812   %}
 9813   ins_pipe(pipe_slow);
 9814 %}
 9815 
 9816 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9817 %{
 9818   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9819 
 9820   ins_cost(200); // XXX
 9821   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9822             "movss     $dst, $src\n"
 9823     "skip:" %}
 9824   ins_encode %{
 9825     Label Lskip;
 9826     // Invert sense of branch from sense of CMOV
 9827     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9828     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9829     __ bind(Lskip);
 9830   %}
 9831   ins_pipe(pipe_slow);
 9832 %}
 9833 
 9834 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9835   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9836 
 9837   ins_cost(200);
 9838   expand %{
 9839     cmovF_regU(cop, cr, dst, src);
 9840   %}
 9841 %}
 9842 
 9843 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9844 %{
 9845   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9846 
 9847   ins_cost(200); // XXX
 9848   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9849             "movss     $dst, $src\n"
 9850     "skip:" %}
 9851   ins_encode %{
 9852     Label Lskip;
 9853     // Invert sense of branch from sense of CMOV
 9854     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9855     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9856     __ bind(Lskip);
 9857   %}
 9858   ins_pipe(pipe_slow);
 9859 %}
 9860 
 9861 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9862 %{
 9863   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9864 
 9865   ins_cost(200); // XXX
 9866   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9867             "movsd     $dst, $src\n"
 9868     "skip:" %}
 9869   ins_encode %{
 9870     Label Lskip;
 9871     // Invert sense of branch from sense of CMOV
 9872     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9873     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9874     __ bind(Lskip);
 9875   %}
 9876   ins_pipe(pipe_slow);
 9877 %}
 9878 
 9879 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9880 %{
 9881   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9882 
 9883   ins_cost(200); // XXX
 9884   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9885             "movsd     $dst, $src\n"
 9886     "skip:" %}
 9887   ins_encode %{
 9888     Label Lskip;
 9889     // Invert sense of branch from sense of CMOV
 9890     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9891     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9892     __ bind(Lskip);
 9893   %}
 9894   ins_pipe(pipe_slow);
 9895 %}
 9896 
 9897 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9899 
 9900   ins_cost(200);
 9901   expand %{
 9902     cmovD_regU(cop, cr, dst, src);
 9903   %}
 9904 %}
 9905 
 9906 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9907 %{
 9908   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9909 
 9910   ins_cost(200); // XXX
 9911   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9912             "movsd     $dst, $src\n"
 9913     "skip:" %}
 9914   ins_encode %{
 9915     Label Lskip;
 9916     // Invert sense of branch from sense of CMOV
 9917     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9918     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9919     __ bind(Lskip);
 9920   %}
 9921   ins_pipe(pipe_slow);
 9922 %}
 9923 
 9924 //----------Arithmetic Instructions--------------------------------------------
 9925 //----------Addition Instructions----------------------------------------------
 9926 
 9927 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9928 %{
 9929   predicate(!UseAPX);
 9930   match(Set dst (AddI dst src));
 9931   effect(KILL cr);
 9932   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9933   format %{ "addl    $dst, $src\t# int" %}
 9934   ins_encode %{
 9935     __ addl($dst$$Register, $src$$Register);
 9936   %}
 9937   ins_pipe(ialu_reg_reg);
 9938 %}
 9939 
 9940 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9941 %{
 9942   predicate(UseAPX);
 9943   match(Set dst (AddI src1 src2));
 9944   effect(KILL cr);
 9945   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9946 
 9947   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9948   ins_encode %{
 9949     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9950   %}
 9951   ins_pipe(ialu_reg_reg);
 9952 %}
 9953 
 9954 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9955 %{
 9956   predicate(!UseAPX);
 9957   match(Set dst (AddI dst src));
 9958   effect(KILL cr);
 9959   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9960 
 9961   format %{ "addl    $dst, $src\t# int" %}
 9962   ins_encode %{
 9963     __ addl($dst$$Register, $src$$constant);
 9964   %}
 9965   ins_pipe( ialu_reg );
 9966 %}
 9967 
 9968 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9969 %{
 9970   predicate(UseAPX);
 9971   match(Set dst (AddI src1 src2));
 9972   effect(KILL cr);
 9973   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9974 
 9975   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9976   ins_encode %{
 9977     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9978   %}
 9979   ins_pipe( ialu_reg );
 9980 %}
 9981 
 9982 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9983 %{
 9984   predicate(UseAPX);
 9985   match(Set dst (AddI (LoadI src1) src2));
 9986   effect(KILL cr);
 9987   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9988 
 9989   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9990   ins_encode %{
 9991     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9992   %}
 9993   ins_pipe( ialu_reg );
 9994 %}
 9995 
 9996 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9997 %{
 9998   predicate(!UseAPX);
 9999   match(Set dst (AddI dst (LoadI src)));
10000   effect(KILL cr);
10001   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10002 
10003   ins_cost(150); // XXX
10004   format %{ "addl    $dst, $src\t# int" %}
10005   ins_encode %{
10006     __ addl($dst$$Register, $src$$Address);
10007   %}
10008   ins_pipe(ialu_reg_mem);
10009 %}
10010 
10011 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10012 %{
10013   predicate(UseAPX);
10014   match(Set dst (AddI src1 (LoadI src2)));
10015   effect(KILL cr);
10016   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10017 
10018   ins_cost(150);
10019   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10020   ins_encode %{
10021     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10022   %}
10023   ins_pipe(ialu_reg_mem);
10024 %}
10025 
10026 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10027 %{
10028   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029   effect(KILL cr);
10030   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10031 
10032   ins_cost(150); // XXX
10033   format %{ "addl    $dst, $src\t# int" %}
10034   ins_encode %{
10035     __ addl($dst$$Address, $src$$Register);
10036   %}
10037   ins_pipe(ialu_mem_reg);
10038 %}
10039 
10040 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10041 %{
10042   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10043   effect(KILL cr);
10044   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10045 
10046 
10047   ins_cost(125); // XXX
10048   format %{ "addl    $dst, $src\t# int" %}
10049   ins_encode %{
10050     __ addl($dst$$Address, $src$$constant);
10051   %}
10052   ins_pipe(ialu_mem_imm);
10053 %}
10054 
10055 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10056 %{
10057   predicate(!UseAPX && UseIncDec);
10058   match(Set dst (AddI dst src));
10059   effect(KILL cr);
10060 
10061   format %{ "incl    $dst\t# int" %}
10062   ins_encode %{
10063     __ incrementl($dst$$Register);
10064   %}
10065   ins_pipe(ialu_reg);
10066 %}
10067 
10068 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10069 %{
10070   predicate(UseAPX && UseIncDec);
10071   match(Set dst (AddI src val));
10072   effect(KILL cr);
10073   flag(PD::Flag_ndd_demotable_opr1);
10074 
10075   format %{ "eincl    $dst, $src\t# int ndd" %}
10076   ins_encode %{
10077     __ eincl($dst$$Register, $src$$Register, false);
10078   %}
10079   ins_pipe(ialu_reg);
10080 %}
10081 
10082 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10083 %{
10084   predicate(UseAPX && UseIncDec);
10085   match(Set dst (AddI (LoadI src) val));
10086   effect(KILL cr);
10087 
10088   format %{ "eincl    $dst, $src\t# int ndd" %}
10089   ins_encode %{
10090     __ eincl($dst$$Register, $src$$Address, false);
10091   %}
10092   ins_pipe(ialu_reg);
10093 %}
10094 
10095 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10096 %{
10097   predicate(UseIncDec);
10098   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10099   effect(KILL cr);
10100 
10101   ins_cost(125); // XXX
10102   format %{ "incl    $dst\t# int" %}
10103   ins_encode %{
10104     __ incrementl($dst$$Address);
10105   %}
10106   ins_pipe(ialu_mem_imm);
10107 %}
10108 
10109 // XXX why does that use AddI
10110 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10111 %{
10112   predicate(!UseAPX && UseIncDec);
10113   match(Set dst (AddI dst src));
10114   effect(KILL cr);
10115 
10116   format %{ "decl    $dst\t# int" %}
10117   ins_encode %{
10118     __ decrementl($dst$$Register);
10119   %}
10120   ins_pipe(ialu_reg);
10121 %}
10122 
10123 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10124 %{
10125   predicate(UseAPX && UseIncDec);
10126   match(Set dst (AddI src val));
10127   effect(KILL cr);
10128   flag(PD::Flag_ndd_demotable_opr1);
10129 
10130   format %{ "edecl    $dst, $src\t# int ndd" %}
10131   ins_encode %{
10132     __ edecl($dst$$Register, $src$$Register, false);
10133   %}
10134   ins_pipe(ialu_reg);
10135 %}
10136 
10137 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10138 %{
10139   predicate(UseAPX && UseIncDec);
10140   match(Set dst (AddI (LoadI src) val));
10141   effect(KILL cr);
10142 
10143   format %{ "edecl    $dst, $src\t# int ndd" %}
10144   ins_encode %{
10145     __ edecl($dst$$Register, $src$$Address, false);
10146   %}
10147   ins_pipe(ialu_reg);
10148 %}
10149 
10150 // XXX why does that use AddI
10151 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10152 %{
10153   predicate(UseIncDec);
10154   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10155   effect(KILL cr);
10156 
10157   ins_cost(125); // XXX
10158   format %{ "decl    $dst\t# int" %}
10159   ins_encode %{
10160     __ decrementl($dst$$Address);
10161   %}
10162   ins_pipe(ialu_mem_imm);
10163 %}
10164 
10165 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10166 %{
10167   predicate(VM_Version::supports_fast_2op_lea());
10168   match(Set dst (AddI (LShiftI index scale) disp));
10169 
10170   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10171   ins_encode %{
10172     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10173     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10174   %}
10175   ins_pipe(ialu_reg_reg);
10176 %}
10177 
10178 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10179 %{
10180   predicate(VM_Version::supports_fast_3op_lea());
10181   match(Set dst (AddI (AddI base index) disp));
10182 
10183   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10184   ins_encode %{
10185     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10186   %}
10187   ins_pipe(ialu_reg_reg);
10188 %}
10189 
10190 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10191 %{
10192   predicate(VM_Version::supports_fast_2op_lea());
10193   match(Set dst (AddI base (LShiftI index scale)));
10194 
10195   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10196   ins_encode %{
10197     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10198     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10199   %}
10200   ins_pipe(ialu_reg_reg);
10201 %}
10202 
10203 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10204 %{
10205   predicate(VM_Version::supports_fast_3op_lea());
10206   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10207 
10208   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10209   ins_encode %{
10210     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10211     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10212   %}
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10217 %{
10218   predicate(!UseAPX);
10219   match(Set dst (AddL dst src));
10220   effect(KILL cr);
10221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222 
10223   format %{ "addq    $dst, $src\t# long" %}
10224   ins_encode %{
10225     __ addq($dst$$Register, $src$$Register);
10226   %}
10227   ins_pipe(ialu_reg_reg);
10228 %}
10229 
10230 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10231 %{
10232   predicate(UseAPX);
10233   match(Set dst (AddL src1 src2));
10234   effect(KILL cr);
10235   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10236 
10237   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10238   ins_encode %{
10239     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10240   %}
10241   ins_pipe(ialu_reg_reg);
10242 %}
10243 
10244 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10245 %{
10246   predicate(!UseAPX);
10247   match(Set dst (AddL dst src));
10248   effect(KILL cr);
10249   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250 
10251   format %{ "addq    $dst, $src\t# long" %}
10252   ins_encode %{
10253     __ addq($dst$$Register, $src$$constant);
10254   %}
10255   ins_pipe( ialu_reg );
10256 %}
10257 
10258 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10259 %{
10260   predicate(UseAPX);
10261   match(Set dst (AddL src1 src2));
10262   effect(KILL cr);
10263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10264 
10265   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10266   ins_encode %{
10267     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10268   %}
10269   ins_pipe( ialu_reg );
10270 %}
10271 
10272 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10273 %{
10274   predicate(UseAPX);
10275   match(Set dst (AddL (LoadL src1) src2));
10276   effect(KILL cr);
10277   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10278 
10279   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10280   ins_encode %{
10281     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10282   %}
10283   ins_pipe( ialu_reg );
10284 %}
10285 
10286 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10287 %{
10288   predicate(!UseAPX);
10289   match(Set dst (AddL dst (LoadL src)));
10290   effect(KILL cr);
10291   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10292 
10293   ins_cost(150); // XXX
10294   format %{ "addq    $dst, $src\t# long" %}
10295   ins_encode %{
10296     __ addq($dst$$Register, $src$$Address);
10297   %}
10298   ins_pipe(ialu_reg_mem);
10299 %}
10300 
10301 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10302 %{
10303   predicate(UseAPX);
10304   match(Set dst (AddL src1 (LoadL src2)));
10305   effect(KILL cr);
10306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10307 
10308   ins_cost(150);
10309   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10310   ins_encode %{
10311     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10312   %}
10313   ins_pipe(ialu_reg_mem);
10314 %}
10315 
10316 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10317 %{
10318   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319   effect(KILL cr);
10320   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10321 
10322   ins_cost(150); // XXX
10323   format %{ "addq    $dst, $src\t# long" %}
10324   ins_encode %{
10325     __ addq($dst$$Address, $src$$Register);
10326   %}
10327   ins_pipe(ialu_mem_reg);
10328 %}
10329 
10330 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10331 %{
10332   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10333   effect(KILL cr);
10334   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10335 
10336   ins_cost(125); // XXX
10337   format %{ "addq    $dst, $src\t# long" %}
10338   ins_encode %{
10339     __ addq($dst$$Address, $src$$constant);
10340   %}
10341   ins_pipe(ialu_mem_imm);
10342 %}
10343 
10344 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10345 %{
10346   predicate(!UseAPX && UseIncDec);
10347   match(Set dst (AddL dst src));
10348   effect(KILL cr);
10349 
10350   format %{ "incq    $dst\t# long" %}
10351   ins_encode %{
10352     __ incrementq($dst$$Register);
10353   %}
10354   ins_pipe(ialu_reg);
10355 %}
10356 
10357 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10358 %{
10359   predicate(UseAPX && UseIncDec);
10360   match(Set dst (AddL src val));
10361   effect(KILL cr);
10362   flag(PD::Flag_ndd_demotable_opr1);
10363 
10364   format %{ "eincq    $dst, $src\t# long ndd" %}
10365   ins_encode %{
10366     __ eincq($dst$$Register, $src$$Register, false);
10367   %}
10368   ins_pipe(ialu_reg);
10369 %}
10370 
10371 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10372 %{
10373   predicate(UseAPX && UseIncDec);
10374   match(Set dst (AddL (LoadL src) val));
10375   effect(KILL cr);
10376 
10377   format %{ "eincq    $dst, $src\t# long ndd" %}
10378   ins_encode %{
10379     __ eincq($dst$$Register, $src$$Address, false);
10380   %}
10381   ins_pipe(ialu_reg);
10382 %}
10383 
10384 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10385 %{
10386   predicate(UseIncDec);
10387   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10388   effect(KILL cr);
10389 
10390   ins_cost(125); // XXX
10391   format %{ "incq    $dst\t# long" %}
10392   ins_encode %{
10393     __ incrementq($dst$$Address);
10394   %}
10395   ins_pipe(ialu_mem_imm);
10396 %}
10397 
10398 // XXX why does that use AddL
10399 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10400 %{
10401   predicate(!UseAPX && UseIncDec);
10402   match(Set dst (AddL dst src));
10403   effect(KILL cr);
10404 
10405   format %{ "decq    $dst\t# long" %}
10406   ins_encode %{
10407     __ decrementq($dst$$Register);
10408   %}
10409   ins_pipe(ialu_reg);
10410 %}
10411 
10412 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10413 %{
10414   predicate(UseAPX && UseIncDec);
10415   match(Set dst (AddL src val));
10416   effect(KILL cr);
10417   flag(PD::Flag_ndd_demotable_opr1);
10418 
10419   format %{ "edecq    $dst, $src\t# long ndd" %}
10420   ins_encode %{
10421     __ edecq($dst$$Register, $src$$Register, false);
10422   %}
10423   ins_pipe(ialu_reg);
10424 %}
10425 
10426 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10427 %{
10428   predicate(UseAPX && UseIncDec);
10429   match(Set dst (AddL (LoadL src) val));
10430   effect(KILL cr);
10431 
10432   format %{ "edecq    $dst, $src\t# long ndd" %}
10433   ins_encode %{
10434     __ edecq($dst$$Register, $src$$Address, false);
10435   %}
10436   ins_pipe(ialu_reg);
10437 %}
10438 
10439 // XXX why does that use AddL
10440 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10441 %{
10442   predicate(UseIncDec);
10443   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10444   effect(KILL cr);
10445 
10446   ins_cost(125); // XXX
10447   format %{ "decq    $dst\t# long" %}
10448   ins_encode %{
10449     __ decrementq($dst$$Address);
10450   %}
10451   ins_pipe(ialu_mem_imm);
10452 %}
10453 
10454 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10455 %{
10456   predicate(VM_Version::supports_fast_2op_lea());
10457   match(Set dst (AddL (LShiftL index scale) disp));
10458 
10459   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10460   ins_encode %{
10461     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10462     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10463   %}
10464   ins_pipe(ialu_reg_reg);
10465 %}
10466 
10467 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10468 %{
10469   predicate(VM_Version::supports_fast_3op_lea());
10470   match(Set dst (AddL (AddL base index) disp));
10471 
10472   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10473   ins_encode %{
10474     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10475   %}
10476   ins_pipe(ialu_reg_reg);
10477 %}
10478 
10479 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10480 %{
10481   predicate(VM_Version::supports_fast_2op_lea());
10482   match(Set dst (AddL base (LShiftL index scale)));
10483 
10484   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10485   ins_encode %{
10486     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10487     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10488   %}
10489   ins_pipe(ialu_reg_reg);
10490 %}
10491 
10492 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10493 %{
10494   predicate(VM_Version::supports_fast_3op_lea());
10495   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10496 
10497   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10498   ins_encode %{
10499     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10500     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10501   %}
10502   ins_pipe(ialu_reg_reg);
10503 %}
10504 
10505 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10506 %{
10507   match(Set dst (AddP dst src));
10508   effect(KILL cr);
10509   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10510 
10511   format %{ "addq    $dst, $src\t# ptr" %}
10512   ins_encode %{
10513     __ addq($dst$$Register, $src$$Register);
10514   %}
10515   ins_pipe(ialu_reg_reg);
10516 %}
10517 
10518 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10519 %{
10520   match(Set dst (AddP dst src));
10521   effect(KILL cr);
10522   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10523 
10524   format %{ "addq    $dst, $src\t# ptr" %}
10525   ins_encode %{
10526     __ addq($dst$$Register, $src$$constant);
10527   %}
10528   ins_pipe( ialu_reg );
10529 %}
10530 
10531 // XXX addP mem ops ????
10532 
10533 instruct checkCastPP(rRegP dst)
10534 %{
10535   match(Set dst (CheckCastPP dst));
10536 
10537   size(0);
10538   format %{ "# checkcastPP of $dst" %}
10539   ins_encode(/* empty encoding */);
10540   ins_pipe(empty);
10541 %}
10542 
10543 instruct castPP(rRegP dst)
10544 %{
10545   match(Set dst (CastPP dst));
10546 
10547   size(0);
10548   format %{ "# castPP of $dst" %}
10549   ins_encode(/* empty encoding */);
10550   ins_pipe(empty);
10551 %}
10552 
10553 instruct castII(rRegI dst)
10554 %{
10555   predicate(VerifyConstraintCasts == 0);
10556   match(Set dst (CastII dst));
10557 
10558   size(0);
10559   format %{ "# castII of $dst" %}
10560   ins_encode(/* empty encoding */);
10561   ins_cost(0);
10562   ins_pipe(empty);
10563 %}
10564 
10565 instruct castII_checked(rRegI dst, rFlagsReg cr)
10566 %{
10567   predicate(VerifyConstraintCasts > 0);
10568   match(Set dst (CastII dst));
10569 
10570   effect(KILL cr);
10571   format %{ "# cast_checked_II $dst" %}
10572   ins_encode %{
10573     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10574   %}
10575   ins_pipe(pipe_slow);
10576 %}
10577 
10578 instruct castLL(rRegL dst)
10579 %{
10580   predicate(VerifyConstraintCasts == 0);
10581   match(Set dst (CastLL dst));
10582 
10583   size(0);
10584   format %{ "# castLL of $dst" %}
10585   ins_encode(/* empty encoding */);
10586   ins_cost(0);
10587   ins_pipe(empty);
10588 %}
10589 
10590 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10591 %{
10592   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10593   match(Set dst (CastLL dst));
10594 
10595   effect(KILL cr);
10596   format %{ "# cast_checked_LL $dst" %}
10597   ins_encode %{
10598     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10599   %}
10600   ins_pipe(pipe_slow);
10601 %}
10602 
10603 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10604 %{
10605   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10606   match(Set dst (CastLL dst));
10607 
10608   effect(KILL cr, TEMP tmp);
10609   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10610   ins_encode %{
10611     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10612   %}
10613   ins_pipe(pipe_slow);
10614 %}
10615 
10616 instruct castFF(regF dst)
10617 %{
10618   match(Set dst (CastFF dst));
10619 
10620   size(0);
10621   format %{ "# castFF of $dst" %}
10622   ins_encode(/* empty encoding */);
10623   ins_cost(0);
10624   ins_pipe(empty);
10625 %}
10626 
10627 instruct castHH(regF dst)
10628 %{
10629   match(Set dst (CastHH dst));
10630 
10631   size(0);
10632   format %{ "# castHH of $dst" %}
10633   ins_encode(/* empty encoding */);
10634   ins_cost(0);
10635   ins_pipe(empty);
10636 %}
10637 
10638 instruct castDD(regD dst)
10639 %{
10640   match(Set dst (CastDD dst));
10641 
10642   size(0);
10643   format %{ "# castDD of $dst" %}
10644   ins_encode(/* empty encoding */);
10645   ins_cost(0);
10646   ins_pipe(empty);
10647 %}
10648 
10649 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10650 instruct compareAndSwapP(rRegI res,
10651                          memory mem_ptr,
10652                          rax_RegP oldval, rRegP newval,
10653                          rFlagsReg cr)
10654 %{
10655   predicate(n->as_LoadStore()->barrier_data() == 0);
10656   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10657   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10658   effect(KILL cr, KILL oldval);
10659 
10660   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10661             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10662             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10663   ins_encode %{
10664     __ lock();
10665     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10666     __ setcc(Assembler::equal, $res$$Register);
10667   %}
10668   ins_pipe( pipe_cmpxchg );
10669 %}
10670 
10671 instruct compareAndSwapL(rRegI res,
10672                          memory mem_ptr,
10673                          rax_RegL oldval, rRegL newval,
10674                          rFlagsReg cr)
10675 %{
10676   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10677   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10678   effect(KILL cr, KILL oldval);
10679 
10680   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10682             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10683   ins_encode %{
10684     __ lock();
10685     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10686     __ setcc(Assembler::equal, $res$$Register);
10687   %}
10688   ins_pipe( pipe_cmpxchg );
10689 %}
10690 
10691 instruct compareAndSwapI(rRegI res,
10692                          memory mem_ptr,
10693                          rax_RegI oldval, rRegI newval,
10694                          rFlagsReg cr)
10695 %{
10696   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10697   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10698   effect(KILL cr, KILL oldval);
10699 
10700   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10701             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10702             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10703   ins_encode %{
10704     __ lock();
10705     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10706     __ setcc(Assembler::equal, $res$$Register);
10707   %}
10708   ins_pipe( pipe_cmpxchg );
10709 %}
10710 
10711 instruct compareAndSwapB(rRegI res,
10712                          memory mem_ptr,
10713                          rax_RegI oldval, rRegI newval,
10714                          rFlagsReg cr)
10715 %{
10716   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10717   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10718   effect(KILL cr, KILL oldval);
10719 
10720   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10721             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10722             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10723   ins_encode %{
10724     __ lock();
10725     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10726     __ setcc(Assembler::equal, $res$$Register);
10727   %}
10728   ins_pipe( pipe_cmpxchg );
10729 %}
10730 
10731 instruct compareAndSwapS(rRegI res,
10732                          memory mem_ptr,
10733                          rax_RegI oldval, rRegI newval,
10734                          rFlagsReg cr)
10735 %{
10736   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10737   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10738   effect(KILL cr, KILL oldval);
10739 
10740   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10741             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10742             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10743   ins_encode %{
10744     __ lock();
10745     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10746     __ setcc(Assembler::equal, $res$$Register);
10747   %}
10748   ins_pipe( pipe_cmpxchg );
10749 %}
10750 
10751 instruct compareAndSwapN(rRegI res,
10752                           memory mem_ptr,
10753                           rax_RegN oldval, rRegN newval,
10754                           rFlagsReg cr) %{
10755   predicate(n->as_LoadStore()->barrier_data() == 0);
10756   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10757   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10758   effect(KILL cr, KILL oldval);
10759 
10760   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10761             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10762             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10763   ins_encode %{
10764     __ lock();
10765     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10766     __ setcc(Assembler::equal, $res$$Register);
10767   %}
10768   ins_pipe( pipe_cmpxchg );
10769 %}
10770 
10771 instruct compareAndExchangeB(
10772                          memory mem_ptr,
10773                          rax_RegI oldval, rRegI newval,
10774                          rFlagsReg cr)
10775 %{
10776   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10777   effect(KILL cr);
10778 
10779   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10781   ins_encode %{
10782     __ lock();
10783     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10784   %}
10785   ins_pipe( pipe_cmpxchg );
10786 %}
10787 
10788 instruct compareAndExchangeS(
10789                          memory mem_ptr,
10790                          rax_RegI oldval, rRegI newval,
10791                          rFlagsReg cr)
10792 %{
10793   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10794   effect(KILL cr);
10795 
10796   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10797             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10798   ins_encode %{
10799     __ lock();
10800     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10801   %}
10802   ins_pipe( pipe_cmpxchg );
10803 %}
10804 
10805 instruct compareAndExchangeI(
10806                          memory mem_ptr,
10807                          rax_RegI oldval, rRegI newval,
10808                          rFlagsReg cr)
10809 %{
10810   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10811   effect(KILL cr);
10812 
10813   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10814             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10815   ins_encode %{
10816     __ lock();
10817     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10818   %}
10819   ins_pipe( pipe_cmpxchg );
10820 %}
10821 
10822 instruct compareAndExchangeL(
10823                          memory mem_ptr,
10824                          rax_RegL oldval, rRegL newval,
10825                          rFlagsReg cr)
10826 %{
10827   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10828   effect(KILL cr);
10829 
10830   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10831             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10832   ins_encode %{
10833     __ lock();
10834     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10835   %}
10836   ins_pipe( pipe_cmpxchg );
10837 %}
10838 
10839 instruct compareAndExchangeN(
10840                           memory mem_ptr,
10841                           rax_RegN oldval, rRegN newval,
10842                           rFlagsReg cr) %{
10843   predicate(n->as_LoadStore()->barrier_data() == 0);
10844   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10845   effect(KILL cr);
10846 
10847   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849   ins_encode %{
10850     __ lock();
10851     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852   %}
10853   ins_pipe( pipe_cmpxchg );
10854 %}
10855 
10856 instruct compareAndExchangeP(
10857                          memory mem_ptr,
10858                          rax_RegP oldval, rRegP newval,
10859                          rFlagsReg cr)
10860 %{
10861   predicate(n->as_LoadStore()->barrier_data() == 0);
10862   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10863   effect(KILL cr);
10864 
10865   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10866             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10867   ins_encode %{
10868     __ lock();
10869     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10870   %}
10871   ins_pipe( pipe_cmpxchg );
10872 %}
10873 
10874 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10875   predicate(n->as_LoadStore()->result_not_used());
10876   match(Set dummy (GetAndAddB mem add));
10877   effect(KILL cr);
10878   format %{ "addb_lock   $mem, $add" %}
10879   ins_encode %{
10880     __ lock();
10881     __ addb($mem$$Address, $add$$Register);
10882   %}
10883   ins_pipe(pipe_cmpxchg);
10884 %}
10885 
10886 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10887   predicate(n->as_LoadStore()->result_not_used());
10888   match(Set dummy (GetAndAddB mem add));
10889   effect(KILL cr);
10890   format %{ "addb_lock   $mem, $add" %}
10891   ins_encode %{
10892     __ lock();
10893     __ addb($mem$$Address, $add$$constant);
10894   %}
10895   ins_pipe(pipe_cmpxchg);
10896 %}
10897 
10898 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10899   predicate(!n->as_LoadStore()->result_not_used());
10900   match(Set newval (GetAndAddB mem newval));
10901   effect(KILL cr);
10902   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10903   ins_encode %{
10904     __ lock();
10905     __ xaddb($mem$$Address, $newval$$Register);
10906     __ narrow_subword_type($newval$$Register, T_BYTE);
10907   %}
10908   ins_pipe(pipe_cmpxchg);
10909 %}
10910 
10911 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10912   predicate(n->as_LoadStore()->result_not_used());
10913   match(Set dummy (GetAndAddS mem add));
10914   effect(KILL cr);
10915   format %{ "addw_lock   $mem, $add" %}
10916   ins_encode %{
10917     __ lock();
10918     __ addw($mem$$Address, $add$$Register);
10919   %}
10920   ins_pipe(pipe_cmpxchg);
10921 %}
10922 
10923 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10924   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10925   match(Set dummy (GetAndAddS mem add));
10926   effect(KILL cr);
10927   format %{ "addw_lock   $mem, $add" %}
10928   ins_encode %{
10929     __ lock();
10930     __ addw($mem$$Address, $add$$constant);
10931   %}
10932   ins_pipe(pipe_cmpxchg);
10933 %}
10934 
10935 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10936   predicate(!n->as_LoadStore()->result_not_used());
10937   match(Set newval (GetAndAddS mem newval));
10938   effect(KILL cr);
10939   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10940   ins_encode %{
10941     __ lock();
10942     __ xaddw($mem$$Address, $newval$$Register);
10943     __ narrow_subword_type($newval$$Register, T_SHORT);
10944   %}
10945   ins_pipe(pipe_cmpxchg);
10946 %}
10947 
10948 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10949   predicate(n->as_LoadStore()->result_not_used());
10950   match(Set dummy (GetAndAddI mem add));
10951   effect(KILL cr);
10952   format %{ "addl_lock   $mem, $add" %}
10953   ins_encode %{
10954     __ lock();
10955     __ addl($mem$$Address, $add$$Register);
10956   %}
10957   ins_pipe(pipe_cmpxchg);
10958 %}
10959 
10960 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10961   predicate(n->as_LoadStore()->result_not_used());
10962   match(Set dummy (GetAndAddI mem add));
10963   effect(KILL cr);
10964   format %{ "addl_lock   $mem, $add" %}
10965   ins_encode %{
10966     __ lock();
10967     __ addl($mem$$Address, $add$$constant);
10968   %}
10969   ins_pipe(pipe_cmpxchg);
10970 %}
10971 
10972 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10973   predicate(!n->as_LoadStore()->result_not_used());
10974   match(Set newval (GetAndAddI mem newval));
10975   effect(KILL cr);
10976   format %{ "xaddl_lock  $mem, $newval" %}
10977   ins_encode %{
10978     __ lock();
10979     __ xaddl($mem$$Address, $newval$$Register);
10980   %}
10981   ins_pipe(pipe_cmpxchg);
10982 %}
10983 
10984 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10985   predicate(n->as_LoadStore()->result_not_used());
10986   match(Set dummy (GetAndAddL mem add));
10987   effect(KILL cr);
10988   format %{ "addq_lock   $mem, $add" %}
10989   ins_encode %{
10990     __ lock();
10991     __ addq($mem$$Address, $add$$Register);
10992   %}
10993   ins_pipe(pipe_cmpxchg);
10994 %}
10995 
10996 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10997   predicate(n->as_LoadStore()->result_not_used());
10998   match(Set dummy (GetAndAddL mem add));
10999   effect(KILL cr);
11000   format %{ "addq_lock   $mem, $add" %}
11001   ins_encode %{
11002     __ lock();
11003     __ addq($mem$$Address, $add$$constant);
11004   %}
11005   ins_pipe(pipe_cmpxchg);
11006 %}
11007 
11008 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11009   predicate(!n->as_LoadStore()->result_not_used());
11010   match(Set newval (GetAndAddL mem newval));
11011   effect(KILL cr);
11012   format %{ "xaddq_lock  $mem, $newval" %}
11013   ins_encode %{
11014     __ lock();
11015     __ xaddq($mem$$Address, $newval$$Register);
11016   %}
11017   ins_pipe(pipe_cmpxchg);
11018 %}
11019 
11020 instruct xchgB( memory mem, rRegI newval) %{
11021   match(Set newval (GetAndSetB mem newval));
11022   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
11023   ins_encode %{
11024     __ xchgb($newval$$Register, $mem$$Address);
11025     __ narrow_subword_type($newval$$Register, T_BYTE);
11026   %}
11027   ins_pipe( pipe_cmpxchg );
11028 %}
11029 
11030 instruct xchgS( memory mem, rRegI newval) %{
11031   match(Set newval (GetAndSetS mem newval));
11032   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
11033   ins_encode %{
11034     __ xchgw($newval$$Register, $mem$$Address);
11035     __ narrow_subword_type($newval$$Register, T_SHORT);
11036   %}
11037   ins_pipe( pipe_cmpxchg );
11038 %}
11039 
11040 instruct xchgI( memory mem, rRegI newval) %{
11041   match(Set newval (GetAndSetI mem newval));
11042   format %{ "XCHGL  $newval,[$mem]" %}
11043   ins_encode %{
11044     __ xchgl($newval$$Register, $mem$$Address);
11045   %}
11046   ins_pipe( pipe_cmpxchg );
11047 %}
11048 
11049 instruct xchgL( memory mem, rRegL newval) %{
11050   match(Set newval (GetAndSetL mem newval));
11051   format %{ "XCHGL  $newval,[$mem]" %}
11052   ins_encode %{
11053     __ xchgq($newval$$Register, $mem$$Address);
11054   %}
11055   ins_pipe( pipe_cmpxchg );
11056 %}
11057 
11058 instruct xchgP( memory mem, rRegP newval) %{
11059   match(Set newval (GetAndSetP mem newval));
11060   predicate(n->as_LoadStore()->barrier_data() == 0);
11061   format %{ "XCHGQ  $newval,[$mem]" %}
11062   ins_encode %{
11063     __ xchgq($newval$$Register, $mem$$Address);
11064   %}
11065   ins_pipe( pipe_cmpxchg );
11066 %}
11067 
11068 instruct xchgN( memory mem, rRegN newval) %{
11069   predicate(n->as_LoadStore()->barrier_data() == 0);
11070   match(Set newval (GetAndSetN mem newval));
11071   format %{ "XCHGL  $newval,$mem]" %}
11072   ins_encode %{
11073     __ xchgl($newval$$Register, $mem$$Address);
11074   %}
11075   ins_pipe( pipe_cmpxchg );
11076 %}
11077 
11078 //----------Abs Instructions-------------------------------------------
11079 
11080 // Integer Absolute Instructions
11081 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11082 %{
11083   match(Set dst (AbsI src));
11084   effect(TEMP dst, KILL cr);
11085   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11086             "subl    $dst, $src\n\t"
11087             "cmovll  $dst, $src" %}
11088   ins_encode %{
11089     __ xorl($dst$$Register, $dst$$Register);
11090     __ subl($dst$$Register, $src$$Register);
11091     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11092   %}
11093 
11094   ins_pipe(ialu_reg_reg);
11095 %}
11096 
11097 // Long Absolute Instructions
11098 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11099 %{
11100   match(Set dst (AbsL src));
11101   effect(TEMP dst, KILL cr);
11102   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11103             "subq    $dst, $src\n\t"
11104             "cmovlq  $dst, $src" %}
11105   ins_encode %{
11106     __ xorl($dst$$Register, $dst$$Register);
11107     __ subq($dst$$Register, $src$$Register);
11108     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11109   %}
11110 
11111   ins_pipe(ialu_reg_reg);
11112 %}
11113 
11114 //----------Subtraction Instructions-------------------------------------------
11115 
11116 // Integer Subtraction Instructions
11117 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11118 %{
11119   predicate(!UseAPX);
11120   match(Set dst (SubI dst src));
11121   effect(KILL cr);
11122   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11123 
11124   format %{ "subl    $dst, $src\t# int" %}
11125   ins_encode %{
11126     __ subl($dst$$Register, $src$$Register);
11127   %}
11128   ins_pipe(ialu_reg_reg);
11129 %}
11130 
11131 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11132 %{
11133   predicate(UseAPX);
11134   match(Set dst (SubI src1 src2));
11135   effect(KILL cr);
11136   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11137 
11138   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11139   ins_encode %{
11140     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11141   %}
11142   ins_pipe(ialu_reg_reg);
11143 %}
11144 
11145 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11146 %{
11147   predicate(UseAPX);
11148   match(Set dst (SubI src1 src2));
11149   effect(KILL cr);
11150   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11151 
11152   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11153   ins_encode %{
11154     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11155   %}
11156   ins_pipe(ialu_reg_reg);
11157 %}
11158 
11159 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11160 %{
11161   predicate(UseAPX);
11162   match(Set dst (SubI (LoadI src1) src2));
11163   effect(KILL cr);
11164   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11165 
11166   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11167   ins_encode %{
11168     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11169   %}
11170   ins_pipe(ialu_reg_reg);
11171 %}
11172 
11173 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11174 %{
11175   predicate(!UseAPX);
11176   match(Set dst (SubI dst (LoadI src)));
11177   effect(KILL cr);
11178   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11179 
11180   ins_cost(150);
11181   format %{ "subl    $dst, $src\t# int" %}
11182   ins_encode %{
11183     __ subl($dst$$Register, $src$$Address);
11184   %}
11185   ins_pipe(ialu_reg_mem);
11186 %}
11187 
11188 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11189 %{
11190   predicate(UseAPX);
11191   match(Set dst (SubI src1 (LoadI src2)));
11192   effect(KILL cr);
11193   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11194 
11195   ins_cost(150);
11196   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11197   ins_encode %{
11198     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11199   %}
11200   ins_pipe(ialu_reg_mem);
11201 %}
11202 
11203 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11204 %{
11205   predicate(UseAPX);
11206   match(Set dst (SubI (LoadI src1) src2));
11207   effect(KILL cr);
11208   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11209 
11210   ins_cost(150);
11211   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11212   ins_encode %{
11213     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11214   %}
11215   ins_pipe(ialu_reg_mem);
11216 %}
11217 
11218 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11219 %{
11220   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11221   effect(KILL cr);
11222   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11223 
11224   ins_cost(150);
11225   format %{ "subl    $dst, $src\t# int" %}
11226   ins_encode %{
11227     __ subl($dst$$Address, $src$$Register);
11228   %}
11229   ins_pipe(ialu_mem_reg);
11230 %}
11231 
11232 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11233 %{
11234   predicate(!UseAPX);
11235   match(Set dst (SubL dst src));
11236   effect(KILL cr);
11237   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11238 
11239   format %{ "subq    $dst, $src\t# long" %}
11240   ins_encode %{
11241     __ subq($dst$$Register, $src$$Register);
11242   %}
11243   ins_pipe(ialu_reg_reg);
11244 %}
11245 
11246 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11247 %{
11248   predicate(UseAPX);
11249   match(Set dst (SubL src1 src2));
11250   effect(KILL cr);
11251   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11252 
11253   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11254   ins_encode %{
11255     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11256   %}
11257   ins_pipe(ialu_reg_reg);
11258 %}
11259 
11260 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11261 %{
11262   predicate(UseAPX);
11263   match(Set dst (SubL src1 src2));
11264   effect(KILL cr);
11265   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11266 
11267   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11268   ins_encode %{
11269     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11270   %}
11271   ins_pipe(ialu_reg_reg);
11272 %}
11273 
11274 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11275 %{
11276   predicate(UseAPX);
11277   match(Set dst (SubL (LoadL src1) src2));
11278   effect(KILL cr);
11279   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11280 
11281   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11282   ins_encode %{
11283     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11284   %}
11285   ins_pipe(ialu_reg_reg);
11286 %}
11287 
11288 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11289 %{
11290   predicate(!UseAPX);
11291   match(Set dst (SubL dst (LoadL src)));
11292   effect(KILL cr);
11293   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11294 
11295   ins_cost(150);
11296   format %{ "subq    $dst, $src\t# long" %}
11297   ins_encode %{
11298     __ subq($dst$$Register, $src$$Address);
11299   %}
11300   ins_pipe(ialu_reg_mem);
11301 %}
11302 
11303 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11304 %{
11305   predicate(UseAPX);
11306   match(Set dst (SubL src1 (LoadL src2)));
11307   effect(KILL cr);
11308   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11309 
11310   ins_cost(150);
11311   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11312   ins_encode %{
11313     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11314   %}
11315   ins_pipe(ialu_reg_mem);
11316 %}
11317 
11318 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11319 %{
11320   predicate(UseAPX);
11321   match(Set dst (SubL (LoadL src1) src2));
11322   effect(KILL cr);
11323   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11324 
11325   ins_cost(150);
11326   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11327   ins_encode %{
11328     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11329   %}
11330   ins_pipe(ialu_reg_mem);
11331 %}
11332 
11333 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11334 %{
11335   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11336   effect(KILL cr);
11337   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11338 
11339   ins_cost(150);
11340   format %{ "subq    $dst, $src\t# long" %}
11341   ins_encode %{
11342     __ subq($dst$$Address, $src$$Register);
11343   %}
11344   ins_pipe(ialu_mem_reg);
11345 %}
11346 
11347 // Subtract from a pointer
11348 // XXX hmpf???
11349 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11350 %{
11351   match(Set dst (AddP dst (SubI zero src)));
11352   effect(KILL cr);
11353 
11354   format %{ "subq    $dst, $src\t# ptr - int" %}
11355   ins_encode %{
11356     __ subq($dst$$Register, $src$$Register);
11357   %}
11358   ins_pipe(ialu_reg_reg);
11359 %}
11360 
11361 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11362 %{
11363   predicate(!UseAPX);
11364   match(Set dst (SubI zero dst));
11365   effect(KILL cr);
11366   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11367 
11368   format %{ "negl    $dst\t# int" %}
11369   ins_encode %{
11370     __ negl($dst$$Register);
11371   %}
11372   ins_pipe(ialu_reg);
11373 %}
11374 
11375 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11376 %{
11377   predicate(UseAPX);
11378   match(Set dst (SubI zero src));
11379   effect(KILL cr);
11380   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11381 
11382   format %{ "enegl    $dst, $src\t# int ndd" %}
11383   ins_encode %{
11384     __ enegl($dst$$Register, $src$$Register, false);
11385   %}
11386   ins_pipe(ialu_reg);
11387 %}
11388 
11389 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11390 %{
11391   predicate(!UseAPX);
11392   match(Set dst (NegI dst));
11393   effect(KILL cr);
11394   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11395 
11396   format %{ "negl    $dst\t# int" %}
11397   ins_encode %{
11398     __ negl($dst$$Register);
11399   %}
11400   ins_pipe(ialu_reg);
11401 %}
11402 
11403 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11404 %{
11405   predicate(UseAPX);
11406   match(Set dst (NegI src));
11407   effect(KILL cr);
11408   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11409 
11410   format %{ "enegl    $dst, $src\t# int ndd" %}
11411   ins_encode %{
11412     __ enegl($dst$$Register, $src$$Register, false);
11413   %}
11414   ins_pipe(ialu_reg);
11415 %}
11416 
11417 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11418 %{
11419   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11420   effect(KILL cr);
11421   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11422 
11423   format %{ "negl    $dst\t# int" %}
11424   ins_encode %{
11425     __ negl($dst$$Address);
11426   %}
11427   ins_pipe(ialu_reg);
11428 %}
11429 
11430 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11431 %{
11432   predicate(!UseAPX);
11433   match(Set dst (SubL zero dst));
11434   effect(KILL cr);
11435   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11436 
11437   format %{ "negq    $dst\t# long" %}
11438   ins_encode %{
11439     __ negq($dst$$Register);
11440   %}
11441   ins_pipe(ialu_reg);
11442 %}
11443 
11444 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11445 %{
11446   predicate(UseAPX);
11447   match(Set dst (SubL zero src));
11448   effect(KILL cr);
11449   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11450 
11451   format %{ "enegq    $dst, $src\t# long ndd" %}
11452   ins_encode %{
11453     __ enegq($dst$$Register, $src$$Register, false);
11454   %}
11455   ins_pipe(ialu_reg);
11456 %}
11457 
11458 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11459 %{
11460   predicate(!UseAPX);
11461   match(Set dst (NegL dst));
11462   effect(KILL cr);
11463   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11464 
11465   format %{ "negq    $dst\t# int" %}
11466   ins_encode %{
11467     __ negq($dst$$Register);
11468   %}
11469   ins_pipe(ialu_reg);
11470 %}
11471 
11472 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11473 %{
11474   predicate(UseAPX);
11475   match(Set dst (NegL src));
11476   effect(KILL cr);
11477   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11478 
11479   format %{ "enegq    $dst, $src\t# long ndd" %}
11480   ins_encode %{
11481     __ enegq($dst$$Register, $src$$Register, false);
11482   %}
11483   ins_pipe(ialu_reg);
11484 %}
11485 
11486 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11487 %{
11488   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11489   effect(KILL cr);
11490   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11491 
11492   format %{ "negq    $dst\t# long" %}
11493   ins_encode %{
11494     __ negq($dst$$Address);
11495   %}
11496   ins_pipe(ialu_reg);
11497 %}
11498 
11499 //----------Multiplication/Division Instructions-------------------------------
11500 // Integer Multiplication Instructions
11501 // Multiply Register
11502 
11503 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11504 %{
11505   predicate(!UseAPX);
11506   match(Set dst (MulI dst src));
11507   effect(KILL cr);
11508 
11509   ins_cost(300);
11510   format %{ "imull   $dst, $src\t# int" %}
11511   ins_encode %{
11512     __ imull($dst$$Register, $src$$Register);
11513   %}
11514   ins_pipe(ialu_reg_reg_alu0);
11515 %}
11516 
11517 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11518 %{
11519   predicate(UseAPX);
11520   match(Set dst (MulI src1 src2));
11521   effect(KILL cr);
11522   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11523 
11524   ins_cost(300);
11525   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11526   ins_encode %{
11527     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11528   %}
11529   ins_pipe(ialu_reg_reg_alu0);
11530 %}
11531 
11532 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11533 %{
11534   match(Set dst (MulI src imm));
11535   effect(KILL cr);
11536 
11537   ins_cost(300);
11538   format %{ "imull   $dst, $src, $imm\t# int" %}
11539   ins_encode %{
11540     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11541   %}
11542   ins_pipe(ialu_reg_reg_alu0);
11543 %}
11544 
11545 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11546 %{
11547   predicate(!UseAPX);
11548   match(Set dst (MulI dst (LoadI src)));
11549   effect(KILL cr);
11550 
11551   ins_cost(350);
11552   format %{ "imull   $dst, $src\t# int" %}
11553   ins_encode %{
11554     __ imull($dst$$Register, $src$$Address);
11555   %}
11556   ins_pipe(ialu_reg_mem_alu0);
11557 %}
11558 
11559 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11560 %{
11561   predicate(UseAPX);
11562   match(Set dst (MulI src1 (LoadI src2)));
11563   effect(KILL cr);
11564   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11565 
11566   ins_cost(350);
11567   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11568   ins_encode %{
11569     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11570   %}
11571   ins_pipe(ialu_reg_mem_alu0);
11572 %}
11573 
11574 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11575 %{
11576   match(Set dst (MulI (LoadI src) imm));
11577   effect(KILL cr);
11578 
11579   ins_cost(300);
11580   format %{ "imull   $dst, $src, $imm\t# int" %}
11581   ins_encode %{
11582     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11583   %}
11584   ins_pipe(ialu_reg_mem_alu0);
11585 %}
11586 
11587 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11588 %{
11589   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11590   effect(KILL cr, KILL src2);
11591 
11592   expand %{ mulI_rReg(dst, src1, cr);
11593            mulI_rReg(src2, src3, cr);
11594            addI_rReg(dst, src2, cr); %}
11595 %}
11596 
11597 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11598 %{
11599   predicate(!UseAPX);
11600   match(Set dst (MulL dst src));
11601   effect(KILL cr);
11602 
11603   ins_cost(300);
11604   format %{ "imulq   $dst, $src\t# long" %}
11605   ins_encode %{
11606     __ imulq($dst$$Register, $src$$Register);
11607   %}
11608   ins_pipe(ialu_reg_reg_alu0);
11609 %}
11610 
11611 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11612 %{
11613   predicate(UseAPX);
11614   match(Set dst (MulL src1 src2));
11615   effect(KILL cr);
11616   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11617 
11618   ins_cost(300);
11619   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11620   ins_encode %{
11621     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11622   %}
11623   ins_pipe(ialu_reg_reg_alu0);
11624 %}
11625 
11626 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11627 %{
11628   match(Set dst (MulL src imm));
11629   effect(KILL cr);
11630 
11631   ins_cost(300);
11632   format %{ "imulq   $dst, $src, $imm\t# long" %}
11633   ins_encode %{
11634     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11635   %}
11636   ins_pipe(ialu_reg_reg_alu0);
11637 %}
11638 
11639 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11640 %{
11641   predicate(!UseAPX);
11642   match(Set dst (MulL dst (LoadL src)));
11643   effect(KILL cr);
11644 
11645   ins_cost(350);
11646   format %{ "imulq   $dst, $src\t# long" %}
11647   ins_encode %{
11648     __ imulq($dst$$Register, $src$$Address);
11649   %}
11650   ins_pipe(ialu_reg_mem_alu0);
11651 %}
11652 
11653 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11654 %{
11655   predicate(UseAPX);
11656   match(Set dst (MulL src1 (LoadL src2)));
11657   effect(KILL cr);
11658   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11659 
11660   ins_cost(350);
11661   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11662   ins_encode %{
11663     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11664   %}
11665   ins_pipe(ialu_reg_mem_alu0);
11666 %}
11667 
11668 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11669 %{
11670   match(Set dst (MulL (LoadL src) imm));
11671   effect(KILL cr);
11672 
11673   ins_cost(300);
11674   format %{ "imulq   $dst, $src, $imm\t# long" %}
11675   ins_encode %{
11676     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11677   %}
11678   ins_pipe(ialu_reg_mem_alu0);
11679 %}
11680 
11681 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11682 %{
11683   match(Set dst (MulHiL src rax));
11684   effect(USE_KILL rax, KILL cr);
11685 
11686   ins_cost(300);
11687   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11688   ins_encode %{
11689     __ imulq($src$$Register);
11690   %}
11691   ins_pipe(ialu_reg_reg_alu0);
11692 %}
11693 
11694 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11695 %{
11696   match(Set dst (UMulHiL src rax));
11697   effect(USE_KILL rax, KILL cr);
11698 
11699   ins_cost(300);
11700   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11701   ins_encode %{
11702     __ mulq($src$$Register);
11703   %}
11704   ins_pipe(ialu_reg_reg_alu0);
11705 %}
11706 
11707 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11708                    rFlagsReg cr)
11709 %{
11710   match(Set rax (DivI rax div));
11711   effect(KILL rdx, KILL cr);
11712 
11713   ins_cost(30*100+10*100); // XXX
11714   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11715             "jne,s   normal\n\t"
11716             "xorl    rdx, rdx\n\t"
11717             "cmpl    $div, -1\n\t"
11718             "je,s    done\n"
11719     "normal: cdql\n\t"
11720             "idivl   $div\n"
11721     "done:"        %}
11722   ins_encode(cdql_enc(div));
11723   ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725 
11726 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11727                    rFlagsReg cr)
11728 %{
11729   match(Set rax (DivL rax div));
11730   effect(KILL rdx, KILL cr);
11731 
11732   ins_cost(30*100+10*100); // XXX
11733   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11734             "cmpq    rax, rdx\n\t"
11735             "jne,s   normal\n\t"
11736             "xorl    rdx, rdx\n\t"
11737             "cmpq    $div, -1\n\t"
11738             "je,s    done\n"
11739     "normal: cdqq\n\t"
11740             "idivq   $div\n"
11741     "done:"        %}
11742   ins_encode(cdqq_enc(div));
11743   ins_pipe(ialu_reg_reg_alu0);
11744 %}
11745 
11746 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11747 %{
11748   match(Set rax (UDivI rax div));
11749   effect(KILL rdx, KILL cr);
11750 
11751   ins_cost(300);
11752   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11753   ins_encode %{
11754     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11755   %}
11756   ins_pipe(ialu_reg_reg_alu0);
11757 %}
11758 
11759 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11760 %{
11761   match(Set rax (UDivL rax div));
11762   effect(KILL rdx, KILL cr);
11763 
11764   ins_cost(300);
11765   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11766   ins_encode %{
11767      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11768   %}
11769   ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771 
11772 // Integer DIVMOD with Register, both quotient and mod results
11773 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11774                              rFlagsReg cr)
11775 %{
11776   match(DivModI rax div);
11777   effect(KILL cr);
11778 
11779   ins_cost(30*100+10*100); // XXX
11780   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11781             "jne,s   normal\n\t"
11782             "xorl    rdx, rdx\n\t"
11783             "cmpl    $div, -1\n\t"
11784             "je,s    done\n"
11785     "normal: cdql\n\t"
11786             "idivl   $div\n"
11787     "done:"        %}
11788   ins_encode(cdql_enc(div));
11789   ins_pipe(pipe_slow);
11790 %}
11791 
11792 // Long DIVMOD with Register, both quotient and mod results
11793 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11794                              rFlagsReg cr)
11795 %{
11796   match(DivModL rax div);
11797   effect(KILL cr);
11798 
11799   ins_cost(30*100+10*100); // XXX
11800   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11801             "cmpq    rax, rdx\n\t"
11802             "jne,s   normal\n\t"
11803             "xorl    rdx, rdx\n\t"
11804             "cmpq    $div, -1\n\t"
11805             "je,s    done\n"
11806     "normal: cdqq\n\t"
11807             "idivq   $div\n"
11808     "done:"        %}
11809   ins_encode(cdqq_enc(div));
11810   ins_pipe(pipe_slow);
11811 %}
11812 
11813 // Unsigned integer DIVMOD with Register, both quotient and mod results
11814 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11815                               no_rax_rdx_RegI div, rFlagsReg cr)
11816 %{
11817   match(UDivModI rax div);
11818   effect(TEMP tmp, KILL cr);
11819 
11820   ins_cost(300);
11821   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11822             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11823           %}
11824   ins_encode %{
11825     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11826   %}
11827   ins_pipe(pipe_slow);
11828 %}
11829 
11830 // Unsigned long DIVMOD with Register, both quotient and mod results
11831 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11832                               no_rax_rdx_RegL div, rFlagsReg cr)
11833 %{
11834   match(UDivModL rax div);
11835   effect(TEMP tmp, KILL cr);
11836 
11837   ins_cost(300);
11838   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11839             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11840           %}
11841   ins_encode %{
11842     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11843   %}
11844   ins_pipe(pipe_slow);
11845 %}
11846 
11847 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11848                    rFlagsReg cr)
11849 %{
11850   match(Set rdx (ModI rax div));
11851   effect(KILL rax, KILL cr);
11852 
11853   ins_cost(300); // XXX
11854   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11855             "jne,s   normal\n\t"
11856             "xorl    rdx, rdx\n\t"
11857             "cmpl    $div, -1\n\t"
11858             "je,s    done\n"
11859     "normal: cdql\n\t"
11860             "idivl   $div\n"
11861     "done:"        %}
11862   ins_encode(cdql_enc(div));
11863   ins_pipe(ialu_reg_reg_alu0);
11864 %}
11865 
11866 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11867                    rFlagsReg cr)
11868 %{
11869   match(Set rdx (ModL rax div));
11870   effect(KILL rax, KILL cr);
11871 
11872   ins_cost(300); // XXX
11873   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11874             "cmpq    rax, rdx\n\t"
11875             "jne,s   normal\n\t"
11876             "xorl    rdx, rdx\n\t"
11877             "cmpq    $div, -1\n\t"
11878             "je,s    done\n"
11879     "normal: cdqq\n\t"
11880             "idivq   $div\n"
11881     "done:"        %}
11882   ins_encode(cdqq_enc(div));
11883   ins_pipe(ialu_reg_reg_alu0);
11884 %}
11885 
11886 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11887 %{
11888   match(Set rdx (UModI rax div));
11889   effect(KILL rax, KILL cr);
11890 
11891   ins_cost(300);
11892   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11893   ins_encode %{
11894     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11895   %}
11896   ins_pipe(ialu_reg_reg_alu0);
11897 %}
11898 
11899 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11900 %{
11901   match(Set rdx (UModL rax div));
11902   effect(KILL rax, KILL cr);
11903 
11904   ins_cost(300);
11905   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11906   ins_encode %{
11907     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11908   %}
11909   ins_pipe(ialu_reg_reg_alu0);
11910 %}
11911 
11912 // Integer Shift Instructions
11913 // Shift Left by one, two, three
11914 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11915 %{
11916   predicate(!UseAPX);
11917   match(Set dst (LShiftI dst shift));
11918   effect(KILL cr);
11919 
11920   format %{ "sall    $dst, $shift" %}
11921   ins_encode %{
11922     __ sall($dst$$Register, $shift$$constant);
11923   %}
11924   ins_pipe(ialu_reg);
11925 %}
11926 
11927 // Shift Left by one, two, three
11928 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11929 %{
11930   predicate(UseAPX);
11931   match(Set dst (LShiftI src shift));
11932   effect(KILL cr);
11933   flag(PD::Flag_ndd_demotable_opr1);
11934 
11935   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11936   ins_encode %{
11937     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11938   %}
11939   ins_pipe(ialu_reg);
11940 %}
11941 
11942 // Shift Left by 8-bit immediate
11943 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11944 %{
11945   predicate(!UseAPX);
11946   match(Set dst (LShiftI dst shift));
11947   effect(KILL cr);
11948 
11949   format %{ "sall    $dst, $shift" %}
11950   ins_encode %{
11951     __ sall($dst$$Register, $shift$$constant);
11952   %}
11953   ins_pipe(ialu_reg);
11954 %}
11955 
11956 // Shift Left by 8-bit immediate
11957 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11958 %{
11959   predicate(UseAPX);
11960   match(Set dst (LShiftI src shift));
11961   effect(KILL cr);
11962   flag(PD::Flag_ndd_demotable_opr1);
11963 
11964   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11965   ins_encode %{
11966     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11967   %}
11968   ins_pipe(ialu_reg);
11969 %}
11970 
11971 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11972 %{
11973   predicate(UseAPX);
11974   match(Set dst (LShiftI (LoadI src) shift));
11975   effect(KILL cr);
11976 
11977   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11978   ins_encode %{
11979     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11980   %}
11981   ins_pipe(ialu_reg);
11982 %}
11983 
11984 // Shift Left by 8-bit immediate
11985 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11986 %{
11987   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11988   effect(KILL cr);
11989 
11990   format %{ "sall    $dst, $shift" %}
11991   ins_encode %{
11992     __ sall($dst$$Address, $shift$$constant);
11993   %}
11994   ins_pipe(ialu_mem_imm);
11995 %}
11996 
11997 // Shift Left by variable
11998 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11999 %{
12000   predicate(!VM_Version::supports_bmi2());
12001   match(Set dst (LShiftI dst shift));
12002   effect(KILL cr);
12003 
12004   format %{ "sall    $dst, $shift" %}
12005   ins_encode %{
12006     __ sall($dst$$Register);
12007   %}
12008   ins_pipe(ialu_reg_reg);
12009 %}
12010 
12011 // Shift Left by variable
12012 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12013 %{
12014   predicate(!VM_Version::supports_bmi2());
12015   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12016   effect(KILL cr);
12017 
12018   format %{ "sall    $dst, $shift" %}
12019   ins_encode %{
12020     __ sall($dst$$Address);
12021   %}
12022   ins_pipe(ialu_mem_reg);
12023 %}
12024 
12025 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12026 %{
12027   predicate(VM_Version::supports_bmi2());
12028   match(Set dst (LShiftI src shift));
12029 
12030   format %{ "shlxl   $dst, $src, $shift" %}
12031   ins_encode %{
12032     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12033   %}
12034   ins_pipe(ialu_reg_reg);
12035 %}
12036 
12037 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12038 %{
12039   predicate(VM_Version::supports_bmi2());
12040   match(Set dst (LShiftI (LoadI src) shift));
12041   ins_cost(175);
12042   format %{ "shlxl   $dst, $src, $shift" %}
12043   ins_encode %{
12044     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12045   %}
12046   ins_pipe(ialu_reg_mem);
12047 %}
12048 
12049 // Arithmetic Shift Right by 8-bit immediate
12050 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12051 %{
12052   predicate(!UseAPX);
12053   match(Set dst (RShiftI dst shift));
12054   effect(KILL cr);
12055 
12056   format %{ "sarl    $dst, $shift" %}
12057   ins_encode %{
12058     __ sarl($dst$$Register, $shift$$constant);
12059   %}
12060   ins_pipe(ialu_mem_imm);
12061 %}
12062 
12063 // Arithmetic Shift Right by 8-bit immediate
12064 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12065 %{
12066   predicate(UseAPX);
12067   match(Set dst (RShiftI src shift));
12068   effect(KILL cr);
12069   flag(PD::Flag_ndd_demotable_opr1);
12070 
12071   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12072   ins_encode %{
12073     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12074   %}
12075   ins_pipe(ialu_mem_imm);
12076 %}
12077 
12078 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12079 %{
12080   predicate(UseAPX);
12081   match(Set dst (RShiftI (LoadI src) shift));
12082   effect(KILL cr);
12083 
12084   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12085   ins_encode %{
12086     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12087   %}
12088   ins_pipe(ialu_mem_imm);
12089 %}
12090 
12091 // Arithmetic Shift Right by 8-bit immediate
12092 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12093 %{
12094   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12095   effect(KILL cr);
12096 
12097   format %{ "sarl    $dst, $shift" %}
12098   ins_encode %{
12099     __ sarl($dst$$Address, $shift$$constant);
12100   %}
12101   ins_pipe(ialu_mem_imm);
12102 %}
12103 
12104 // Arithmetic Shift Right by variable
12105 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12106 %{
12107   predicate(!VM_Version::supports_bmi2());
12108   match(Set dst (RShiftI dst shift));
12109   effect(KILL cr);
12110 
12111   format %{ "sarl    $dst, $shift" %}
12112   ins_encode %{
12113     __ sarl($dst$$Register);
12114   %}
12115   ins_pipe(ialu_reg_reg);
12116 %}
12117 
12118 // Arithmetic Shift Right by variable
12119 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12120 %{
12121   predicate(!VM_Version::supports_bmi2());
12122   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12123   effect(KILL cr);
12124 
12125   format %{ "sarl    $dst, $shift" %}
12126   ins_encode %{
12127     __ sarl($dst$$Address);
12128   %}
12129   ins_pipe(ialu_mem_reg);
12130 %}
12131 
12132 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12133 %{
12134   predicate(VM_Version::supports_bmi2());
12135   match(Set dst (RShiftI src shift));
12136 
12137   format %{ "sarxl   $dst, $src, $shift" %}
12138   ins_encode %{
12139     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12140   %}
12141   ins_pipe(ialu_reg_reg);
12142 %}
12143 
12144 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12145 %{
12146   predicate(VM_Version::supports_bmi2());
12147   match(Set dst (RShiftI (LoadI src) shift));
12148   ins_cost(175);
12149   format %{ "sarxl   $dst, $src, $shift" %}
12150   ins_encode %{
12151     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12152   %}
12153   ins_pipe(ialu_reg_mem);
12154 %}
12155 
12156 // Logical Shift Right by 8-bit immediate
12157 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12158 %{
12159   predicate(!UseAPX);
12160   match(Set dst (URShiftI dst shift));
12161   effect(KILL cr);
12162 
12163   format %{ "shrl    $dst, $shift" %}
12164   ins_encode %{
12165     __ shrl($dst$$Register, $shift$$constant);
12166   %}
12167   ins_pipe(ialu_reg);
12168 %}
12169 
12170 // Logical Shift Right by 8-bit immediate
12171 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12172 %{
12173   predicate(UseAPX);
12174   match(Set dst (URShiftI src shift));
12175   effect(KILL cr);
12176   flag(PD::Flag_ndd_demotable_opr1);
12177 
12178   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12179   ins_encode %{
12180     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12181   %}
12182   ins_pipe(ialu_reg);
12183 %}
12184 
12185 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12186 %{
12187   predicate(UseAPX);
12188   match(Set dst (URShiftI (LoadI src) shift));
12189   effect(KILL cr);
12190 
12191   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12192   ins_encode %{
12193     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12194   %}
12195   ins_pipe(ialu_reg);
12196 %}
12197 
12198 // Logical Shift Right by 8-bit immediate
12199 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12200 %{
12201   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12202   effect(KILL cr);
12203 
12204   format %{ "shrl    $dst, $shift" %}
12205   ins_encode %{
12206     __ shrl($dst$$Address, $shift$$constant);
12207   %}
12208   ins_pipe(ialu_mem_imm);
12209 %}
12210 
12211 // Logical Shift Right by variable
12212 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12213 %{
12214   predicate(!VM_Version::supports_bmi2());
12215   match(Set dst (URShiftI dst shift));
12216   effect(KILL cr);
12217 
12218   format %{ "shrl    $dst, $shift" %}
12219   ins_encode %{
12220     __ shrl($dst$$Register);
12221   %}
12222   ins_pipe(ialu_reg_reg);
12223 %}
12224 
12225 // Logical Shift Right by variable
12226 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12227 %{
12228   predicate(!VM_Version::supports_bmi2());
12229   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12230   effect(KILL cr);
12231 
12232   format %{ "shrl    $dst, $shift" %}
12233   ins_encode %{
12234     __ shrl($dst$$Address);
12235   %}
12236   ins_pipe(ialu_mem_reg);
12237 %}
12238 
12239 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12240 %{
12241   predicate(VM_Version::supports_bmi2());
12242   match(Set dst (URShiftI src shift));
12243 
12244   format %{ "shrxl   $dst, $src, $shift" %}
12245   ins_encode %{
12246     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12247   %}
12248   ins_pipe(ialu_reg_reg);
12249 %}
12250 
12251 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12252 %{
12253   predicate(VM_Version::supports_bmi2());
12254   match(Set dst (URShiftI (LoadI src) shift));
12255   ins_cost(175);
12256   format %{ "shrxl   $dst, $src, $shift" %}
12257   ins_encode %{
12258     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12259   %}
12260   ins_pipe(ialu_reg_mem);
12261 %}
12262 
12263 // Long Shift Instructions
12264 // Shift Left by one, two, three
12265 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12266 %{
12267   predicate(!UseAPX);
12268   match(Set dst (LShiftL dst shift));
12269   effect(KILL cr);
12270 
12271   format %{ "salq    $dst, $shift" %}
12272   ins_encode %{
12273     __ salq($dst$$Register, $shift$$constant);
12274   %}
12275   ins_pipe(ialu_reg);
12276 %}
12277 
12278 // Shift Left by one, two, three
12279 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12280 %{
12281   predicate(UseAPX);
12282   match(Set dst (LShiftL src shift));
12283   effect(KILL cr);
12284   flag(PD::Flag_ndd_demotable_opr1);
12285 
12286   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12287   ins_encode %{
12288     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12289   %}
12290   ins_pipe(ialu_reg);
12291 %}
12292 
12293 // Shift Left by 8-bit immediate
12294 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12295 %{
12296   predicate(!UseAPX);
12297   match(Set dst (LShiftL dst shift));
12298   effect(KILL cr);
12299 
12300   format %{ "salq    $dst, $shift" %}
12301   ins_encode %{
12302     __ salq($dst$$Register, $shift$$constant);
12303   %}
12304   ins_pipe(ialu_reg);
12305 %}
12306 
12307 // Shift Left by 8-bit immediate
12308 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12309 %{
12310   predicate(UseAPX);
12311   match(Set dst (LShiftL src shift));
12312   effect(KILL cr);
12313   flag(PD::Flag_ndd_demotable_opr1);
12314 
12315   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12316   ins_encode %{
12317     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12318   %}
12319   ins_pipe(ialu_reg);
12320 %}
12321 
12322 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12323 %{
12324   predicate(UseAPX);
12325   match(Set dst (LShiftL (LoadL src) shift));
12326   effect(KILL cr);
12327 
12328   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12329   ins_encode %{
12330     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12331   %}
12332   ins_pipe(ialu_reg);
12333 %}
12334 
12335 // Shift Left by 8-bit immediate
12336 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12337 %{
12338   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12339   effect(KILL cr);
12340 
12341   format %{ "salq    $dst, $shift" %}
12342   ins_encode %{
12343     __ salq($dst$$Address, $shift$$constant);
12344   %}
12345   ins_pipe(ialu_mem_imm);
12346 %}
12347 
12348 // Shift Left by variable
12349 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12350 %{
12351   predicate(!VM_Version::supports_bmi2());
12352   match(Set dst (LShiftL dst shift));
12353   effect(KILL cr);
12354 
12355   format %{ "salq    $dst, $shift" %}
12356   ins_encode %{
12357     __ salq($dst$$Register);
12358   %}
12359   ins_pipe(ialu_reg_reg);
12360 %}
12361 
12362 // Shift Left by variable
12363 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12364 %{
12365   predicate(!VM_Version::supports_bmi2());
12366   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12367   effect(KILL cr);
12368 
12369   format %{ "salq    $dst, $shift" %}
12370   ins_encode %{
12371     __ salq($dst$$Address);
12372   %}
12373   ins_pipe(ialu_mem_reg);
12374 %}
12375 
12376 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12377 %{
12378   predicate(VM_Version::supports_bmi2());
12379   match(Set dst (LShiftL src shift));
12380 
12381   format %{ "shlxq   $dst, $src, $shift" %}
12382   ins_encode %{
12383     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12384   %}
12385   ins_pipe(ialu_reg_reg);
12386 %}
12387 
12388 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12389 %{
12390   predicate(VM_Version::supports_bmi2());
12391   match(Set dst (LShiftL (LoadL src) shift));
12392   ins_cost(175);
12393   format %{ "shlxq   $dst, $src, $shift" %}
12394   ins_encode %{
12395     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12396   %}
12397   ins_pipe(ialu_reg_mem);
12398 %}
12399 
12400 // Arithmetic Shift Right by 8-bit immediate
12401 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12402 %{
12403   predicate(!UseAPX);
12404   match(Set dst (RShiftL dst shift));
12405   effect(KILL cr);
12406 
12407   format %{ "sarq    $dst, $shift" %}
12408   ins_encode %{
12409     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12410   %}
12411   ins_pipe(ialu_mem_imm);
12412 %}
12413 
12414 // Arithmetic Shift Right by 8-bit immediate
12415 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12416 %{
12417   predicate(UseAPX);
12418   match(Set dst (RShiftL src shift));
12419   effect(KILL cr);
12420   flag(PD::Flag_ndd_demotable_opr1);
12421 
12422   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12423   ins_encode %{
12424     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12425   %}
12426   ins_pipe(ialu_mem_imm);
12427 %}
12428 
12429 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12430 %{
12431   predicate(UseAPX);
12432   match(Set dst (RShiftL (LoadL src) shift));
12433   effect(KILL cr);
12434 
12435   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12436   ins_encode %{
12437     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12438   %}
12439   ins_pipe(ialu_mem_imm);
12440 %}
12441 
12442 // Arithmetic Shift Right by 8-bit immediate
12443 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12444 %{
12445   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12446   effect(KILL cr);
12447 
12448   format %{ "sarq    $dst, $shift" %}
12449   ins_encode %{
12450     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12451   %}
12452   ins_pipe(ialu_mem_imm);
12453 %}
12454 
12455 // Arithmetic Shift Right by variable
12456 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12457 %{
12458   predicate(!VM_Version::supports_bmi2());
12459   match(Set dst (RShiftL dst shift));
12460   effect(KILL cr);
12461 
12462   format %{ "sarq    $dst, $shift" %}
12463   ins_encode %{
12464     __ sarq($dst$$Register);
12465   %}
12466   ins_pipe(ialu_reg_reg);
12467 %}
12468 
12469 // Arithmetic Shift Right by variable
12470 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12471 %{
12472   predicate(!VM_Version::supports_bmi2());
12473   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12474   effect(KILL cr);
12475 
12476   format %{ "sarq    $dst, $shift" %}
12477   ins_encode %{
12478     __ sarq($dst$$Address);
12479   %}
12480   ins_pipe(ialu_mem_reg);
12481 %}
12482 
12483 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12484 %{
12485   predicate(VM_Version::supports_bmi2());
12486   match(Set dst (RShiftL src shift));
12487 
12488   format %{ "sarxq   $dst, $src, $shift" %}
12489   ins_encode %{
12490     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12491   %}
12492   ins_pipe(ialu_reg_reg);
12493 %}
12494 
12495 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12496 %{
12497   predicate(VM_Version::supports_bmi2());
12498   match(Set dst (RShiftL (LoadL src) shift));
12499   ins_cost(175);
12500   format %{ "sarxq   $dst, $src, $shift" %}
12501   ins_encode %{
12502     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12503   %}
12504   ins_pipe(ialu_reg_mem);
12505 %}
12506 
12507 // Logical Shift Right by 8-bit immediate
12508 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12509 %{
12510   predicate(!UseAPX);
12511   match(Set dst (URShiftL dst shift));
12512   effect(KILL cr);
12513 
12514   format %{ "shrq    $dst, $shift" %}
12515   ins_encode %{
12516     __ shrq($dst$$Register, $shift$$constant);
12517   %}
12518   ins_pipe(ialu_reg);
12519 %}
12520 
12521 // Logical Shift Right by 8-bit immediate
12522 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12523 %{
12524   predicate(UseAPX);
12525   match(Set dst (URShiftL src shift));
12526   effect(KILL cr);
12527   flag(PD::Flag_ndd_demotable_opr1);
12528 
12529   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12530   ins_encode %{
12531     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12532   %}
12533   ins_pipe(ialu_reg);
12534 %}
12535 
12536 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12537 %{
12538   predicate(UseAPX);
12539   match(Set dst (URShiftL (LoadL src) shift));
12540   effect(KILL cr);
12541 
12542   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12543   ins_encode %{
12544     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12545   %}
12546   ins_pipe(ialu_reg);
12547 %}
12548 
12549 // Logical Shift Right by 8-bit immediate
12550 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12551 %{
12552   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12553   effect(KILL cr);
12554 
12555   format %{ "shrq    $dst, $shift" %}
12556   ins_encode %{
12557     __ shrq($dst$$Address, $shift$$constant);
12558   %}
12559   ins_pipe(ialu_mem_imm);
12560 %}
12561 
12562 // Logical Shift Right by variable
12563 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12564 %{
12565   predicate(!VM_Version::supports_bmi2());
12566   match(Set dst (URShiftL dst shift));
12567   effect(KILL cr);
12568 
12569   format %{ "shrq    $dst, $shift" %}
12570   ins_encode %{
12571     __ shrq($dst$$Register);
12572   %}
12573   ins_pipe(ialu_reg_reg);
12574 %}
12575 
12576 // Logical Shift Right by variable
12577 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12578 %{
12579   predicate(!VM_Version::supports_bmi2());
12580   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12581   effect(KILL cr);
12582 
12583   format %{ "shrq    $dst, $shift" %}
12584   ins_encode %{
12585     __ shrq($dst$$Address);
12586   %}
12587   ins_pipe(ialu_mem_reg);
12588 %}
12589 
12590 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12591 %{
12592   predicate(VM_Version::supports_bmi2());
12593   match(Set dst (URShiftL src shift));
12594 
12595   format %{ "shrxq   $dst, $src, $shift" %}
12596   ins_encode %{
12597     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12598   %}
12599   ins_pipe(ialu_reg_reg);
12600 %}
12601 
12602 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12603 %{
12604   predicate(VM_Version::supports_bmi2());
12605   match(Set dst (URShiftL (LoadL src) shift));
12606   ins_cost(175);
12607   format %{ "shrxq   $dst, $src, $shift" %}
12608   ins_encode %{
12609     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12610   %}
12611   ins_pipe(ialu_reg_mem);
12612 %}
12613 
12614 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12615 // This idiom is used by the compiler for the i2b bytecode.
12616 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12617 %{
12618   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12619 
12620   format %{ "movsbl  $dst, $src\t# i2b" %}
12621   ins_encode %{
12622     __ movsbl($dst$$Register, $src$$Register);
12623   %}
12624   ins_pipe(ialu_reg_reg);
12625 %}
12626 
12627 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12628 // This idiom is used by the compiler the i2s bytecode.
12629 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12630 %{
12631   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12632 
12633   format %{ "movswl  $dst, $src\t# i2s" %}
12634   ins_encode %{
12635     __ movswl($dst$$Register, $src$$Register);
12636   %}
12637   ins_pipe(ialu_reg_reg);
12638 %}
12639 
12640 // ROL/ROR instructions
12641 
12642 // Rotate left by constant.
12643 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12644 %{
12645   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12646   match(Set dst (RotateLeft dst shift));
12647   effect(KILL cr);
12648   format %{ "roll    $dst, $shift" %}
12649   ins_encode %{
12650     __ roll($dst$$Register, $shift$$constant);
12651   %}
12652   ins_pipe(ialu_reg);
12653 %}
12654 
12655 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12656 %{
12657   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12658   match(Set dst (RotateLeft src shift));
12659   format %{ "rolxl   $dst, $src, $shift" %}
12660   ins_encode %{
12661     int shift = 32 - ($shift$$constant & 31);
12662     __ rorxl($dst$$Register, $src$$Register, shift);
12663   %}
12664   ins_pipe(ialu_reg_reg);
12665 %}
12666 
12667 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12668 %{
12669   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12670   match(Set dst (RotateLeft (LoadI src) shift));
12671   ins_cost(175);
12672   format %{ "rolxl   $dst, $src, $shift" %}
12673   ins_encode %{
12674     int shift = 32 - ($shift$$constant & 31);
12675     __ rorxl($dst$$Register, $src$$Address, shift);
12676   %}
12677   ins_pipe(ialu_reg_mem);
12678 %}
12679 
12680 // Rotate Left by variable
12681 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12682 %{
12683   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12684   match(Set dst (RotateLeft dst shift));
12685   effect(KILL cr);
12686   format %{ "roll    $dst, $shift" %}
12687   ins_encode %{
12688     __ roll($dst$$Register);
12689   %}
12690   ins_pipe(ialu_reg_reg);
12691 %}
12692 
12693 // Rotate Left by variable
12694 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12695 %{
12696   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12697   match(Set dst (RotateLeft src shift));
12698   effect(KILL cr);
12699   flag(PD::Flag_ndd_demotable_opr1);
12700 
12701   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12702   ins_encode %{
12703     __ eroll($dst$$Register, $src$$Register, false);
12704   %}
12705   ins_pipe(ialu_reg_reg);
12706 %}
12707 
12708 // Rotate Right by constant.
12709 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12710 %{
12711   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12712   match(Set dst (RotateRight dst shift));
12713   effect(KILL cr);
12714   format %{ "rorl    $dst, $shift" %}
12715   ins_encode %{
12716     __ rorl($dst$$Register, $shift$$constant);
12717   %}
12718   ins_pipe(ialu_reg);
12719 %}
12720 
12721 // Rotate Right by constant.
12722 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12723 %{
12724   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725   match(Set dst (RotateRight src shift));
12726   format %{ "rorxl   $dst, $src, $shift" %}
12727   ins_encode %{
12728     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12729   %}
12730   ins_pipe(ialu_reg_reg);
12731 %}
12732 
12733 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12734 %{
12735   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12736   match(Set dst (RotateRight (LoadI src) shift));
12737   ins_cost(175);
12738   format %{ "rorxl   $dst, $src, $shift" %}
12739   ins_encode %{
12740     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12741   %}
12742   ins_pipe(ialu_reg_mem);
12743 %}
12744 
12745 // Rotate Right by variable
12746 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12747 %{
12748   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12749   match(Set dst (RotateRight dst shift));
12750   effect(KILL cr);
12751   format %{ "rorl    $dst, $shift" %}
12752   ins_encode %{
12753     __ rorl($dst$$Register);
12754   %}
12755   ins_pipe(ialu_reg_reg);
12756 %}
12757 
12758 // Rotate Right by variable
12759 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12762   match(Set dst (RotateRight src shift));
12763   effect(KILL cr);
12764   flag(PD::Flag_ndd_demotable_opr1);
12765 
12766   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12767   ins_encode %{
12768     __ erorl($dst$$Register, $src$$Register, false);
12769   %}
12770   ins_pipe(ialu_reg_reg);
12771 %}
12772 
12773 // Rotate Left by constant.
12774 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12775 %{
12776   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12777   match(Set dst (RotateLeft dst shift));
12778   effect(KILL cr);
12779   format %{ "rolq    $dst, $shift" %}
12780   ins_encode %{
12781     __ rolq($dst$$Register, $shift$$constant);
12782   %}
12783   ins_pipe(ialu_reg);
12784 %}
12785 
12786 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12787 %{
12788   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12789   match(Set dst (RotateLeft src shift));
12790   format %{ "rolxq   $dst, $src, $shift" %}
12791   ins_encode %{
12792     int shift = 64 - ($shift$$constant & 63);
12793     __ rorxq($dst$$Register, $src$$Register, shift);
12794   %}
12795   ins_pipe(ialu_reg_reg);
12796 %}
12797 
12798 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12799 %{
12800   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12801   match(Set dst (RotateLeft (LoadL src) shift));
12802   ins_cost(175);
12803   format %{ "rolxq   $dst, $src, $shift" %}
12804   ins_encode %{
12805     int shift = 64 - ($shift$$constant & 63);
12806     __ rorxq($dst$$Register, $src$$Address, shift);
12807   %}
12808   ins_pipe(ialu_reg_mem);
12809 %}
12810 
12811 // Rotate Left by variable
12812 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12813 %{
12814   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12815   match(Set dst (RotateLeft dst shift));
12816   effect(KILL cr);
12817 
12818   format %{ "rolq    $dst, $shift" %}
12819   ins_encode %{
12820     __ rolq($dst$$Register);
12821   %}
12822   ins_pipe(ialu_reg_reg);
12823 %}
12824 
12825 // Rotate Left by variable
12826 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12827 %{
12828   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12829   match(Set dst (RotateLeft src shift));
12830   effect(KILL cr);
12831   flag(PD::Flag_ndd_demotable_opr1);
12832 
12833   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12834   ins_encode %{
12835     __ erolq($dst$$Register, $src$$Register, false);
12836   %}
12837   ins_pipe(ialu_reg_reg);
12838 %}
12839 
12840 // Rotate Right by constant.
12841 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12842 %{
12843   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12844   match(Set dst (RotateRight dst shift));
12845   effect(KILL cr);
12846   format %{ "rorq    $dst, $shift" %}
12847   ins_encode %{
12848     __ rorq($dst$$Register, $shift$$constant);
12849   %}
12850   ins_pipe(ialu_reg);
12851 %}
12852 
12853 // Rotate Right by constant
12854 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12855 %{
12856   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12857   match(Set dst (RotateRight src shift));
12858   format %{ "rorxq   $dst, $src, $shift" %}
12859   ins_encode %{
12860     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12861   %}
12862   ins_pipe(ialu_reg_reg);
12863 %}
12864 
12865 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12866 %{
12867   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12868   match(Set dst (RotateRight (LoadL src) shift));
12869   ins_cost(175);
12870   format %{ "rorxq   $dst, $src, $shift" %}
12871   ins_encode %{
12872     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12873   %}
12874   ins_pipe(ialu_reg_mem);
12875 %}
12876 
12877 // Rotate Right by variable
12878 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12879 %{
12880   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12881   match(Set dst (RotateRight dst shift));
12882   effect(KILL cr);
12883   format %{ "rorq    $dst, $shift" %}
12884   ins_encode %{
12885     __ rorq($dst$$Register);
12886   %}
12887   ins_pipe(ialu_reg_reg);
12888 %}
12889 
12890 // Rotate Right by variable
12891 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12892 %{
12893   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12894   match(Set dst (RotateRight src shift));
12895   effect(KILL cr);
12896   flag(PD::Flag_ndd_demotable_opr1);
12897 
12898   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12899   ins_encode %{
12900     __ erorq($dst$$Register, $src$$Register, false);
12901   %}
12902   ins_pipe(ialu_reg_reg);
12903 %}
12904 
12905 //----------------------------- CompressBits/ExpandBits ------------------------
12906 
12907 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12908   predicate(n->bottom_type()->isa_long());
12909   match(Set dst (CompressBits src mask));
12910   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12911   ins_encode %{
12912     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12913   %}
12914   ins_pipe( pipe_slow );
12915 %}
12916 
12917 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12918   predicate(n->bottom_type()->isa_long());
12919   match(Set dst (ExpandBits src mask));
12920   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12921   ins_encode %{
12922     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12923   %}
12924   ins_pipe( pipe_slow );
12925 %}
12926 
12927 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12928   predicate(n->bottom_type()->isa_long());
12929   match(Set dst (CompressBits src (LoadL mask)));
12930   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12931   ins_encode %{
12932     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12933   %}
12934   ins_pipe( pipe_slow );
12935 %}
12936 
12937 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12938   predicate(n->bottom_type()->isa_long());
12939   match(Set dst (ExpandBits src (LoadL mask)));
12940   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12941   ins_encode %{
12942     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12943   %}
12944   ins_pipe( pipe_slow );
12945 %}
12946 
12947 
12948 // Logical Instructions
12949 
12950 // Integer Logical Instructions
12951 
12952 // And Instructions
12953 // And Register with Register
12954 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12955 %{
12956   predicate(!UseAPX);
12957   match(Set dst (AndI dst src));
12958   effect(KILL cr);
12959   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12960 
12961   format %{ "andl    $dst, $src\t# int" %}
12962   ins_encode %{
12963     __ andl($dst$$Register, $src$$Register);
12964   %}
12965   ins_pipe(ialu_reg_reg);
12966 %}
12967 
12968 // And Register with Register using New Data Destination (NDD)
12969 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12970 %{
12971   predicate(UseAPX);
12972   match(Set dst (AndI src1 src2));
12973   effect(KILL cr);
12974   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12975 
12976   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12977   ins_encode %{
12978     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12979 
12980   %}
12981   ins_pipe(ialu_reg_reg);
12982 %}
12983 
12984 // And Register with Immediate 255
12985 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12986 %{
12987   match(Set dst (AndI src mask));
12988 
12989   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12990   ins_encode %{
12991     __ movzbl($dst$$Register, $src$$Register);
12992   %}
12993   ins_pipe(ialu_reg);
12994 %}
12995 
12996 // And Register with Immediate 255 and promote to long
12997 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12998 %{
12999   match(Set dst (ConvI2L (AndI src mask)));
13000 
13001   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13002   ins_encode %{
13003     __ movzbl($dst$$Register, $src$$Register);
13004   %}
13005   ins_pipe(ialu_reg);
13006 %}
13007 
13008 // And Register with Immediate 65535
13009 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13010 %{
13011   match(Set dst (AndI src mask));
13012 
13013   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13014   ins_encode %{
13015     __ movzwl($dst$$Register, $src$$Register);
13016   %}
13017   ins_pipe(ialu_reg);
13018 %}
13019 
13020 // And Register with Immediate 65535 and promote to long
13021 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13022 %{
13023   match(Set dst (ConvI2L (AndI src mask)));
13024 
13025   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13026   ins_encode %{
13027     __ movzwl($dst$$Register, $src$$Register);
13028   %}
13029   ins_pipe(ialu_reg);
13030 %}
13031 
13032 // Can skip int2long conversions after AND with small bitmask
13033 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13034 %{
13035   predicate(VM_Version::supports_bmi2());
13036   ins_cost(125);
13037   effect(TEMP tmp, KILL cr);
13038   match(Set dst (ConvI2L (AndI src mask)));
13039   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13040   ins_encode %{
13041     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13042     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13043   %}
13044   ins_pipe(ialu_reg_reg);
13045 %}
13046 
13047 // And Register with Immediate
13048 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13049 %{
13050   predicate(!UseAPX);
13051   match(Set dst (AndI dst src));
13052   effect(KILL cr);
13053   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13054 
13055   format %{ "andl    $dst, $src\t# int" %}
13056   ins_encode %{
13057     __ andl($dst$$Register, $src$$constant);
13058   %}
13059   ins_pipe(ialu_reg);
13060 %}
13061 
13062 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13063 %{
13064   predicate(UseAPX);
13065   match(Set dst (AndI src1 src2));
13066   effect(KILL cr);
13067   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13068 
13069   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13070   ins_encode %{
13071     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13072   %}
13073   ins_pipe(ialu_reg);
13074 %}
13075 
13076 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13077 %{
13078   predicate(UseAPX);
13079   match(Set dst (AndI (LoadI src1) src2));
13080   effect(KILL cr);
13081   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13082 
13083   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13084   ins_encode %{
13085     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13086   %}
13087   ins_pipe(ialu_reg);
13088 %}
13089 
13090 // And Register with Memory
13091 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13092 %{
13093   predicate(!UseAPX);
13094   match(Set dst (AndI dst (LoadI src)));
13095   effect(KILL cr);
13096   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13097 
13098   ins_cost(150);
13099   format %{ "andl    $dst, $src\t# int" %}
13100   ins_encode %{
13101     __ andl($dst$$Register, $src$$Address);
13102   %}
13103   ins_pipe(ialu_reg_mem);
13104 %}
13105 
13106 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13107 %{
13108   predicate(UseAPX);
13109   match(Set dst (AndI src1 (LoadI src2)));
13110   effect(KILL cr);
13111   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13112 
13113   ins_cost(150);
13114   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13115   ins_encode %{
13116     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13117   %}
13118   ins_pipe(ialu_reg_mem);
13119 %}
13120 
13121 // And Memory with Register
13122 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13123 %{
13124   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13125   effect(KILL cr);
13126   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13127 
13128   ins_cost(150);
13129   format %{ "andb    $dst, $src\t# byte" %}
13130   ins_encode %{
13131     __ andb($dst$$Address, $src$$Register);
13132   %}
13133   ins_pipe(ialu_mem_reg);
13134 %}
13135 
13136 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13137 %{
13138   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13139   effect(KILL cr);
13140   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13141 
13142   ins_cost(150);
13143   format %{ "andl    $dst, $src\t# int" %}
13144   ins_encode %{
13145     __ andl($dst$$Address, $src$$Register);
13146   %}
13147   ins_pipe(ialu_mem_reg);
13148 %}
13149 
13150 // And Memory with Immediate
13151 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13152 %{
13153   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13154   effect(KILL cr);
13155   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13156 
13157   ins_cost(125);
13158   format %{ "andl    $dst, $src\t# int" %}
13159   ins_encode %{
13160     __ andl($dst$$Address, $src$$constant);
13161   %}
13162   ins_pipe(ialu_mem_imm);
13163 %}
13164 
13165 // BMI1 instructions
13166 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13167   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13168   predicate(UseBMI1Instructions);
13169   effect(KILL cr);
13170   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13171 
13172   ins_cost(125);
13173   format %{ "andnl  $dst, $src1, $src2" %}
13174 
13175   ins_encode %{
13176     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13177   %}
13178   ins_pipe(ialu_reg_mem);
13179 %}
13180 
13181 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13182   match(Set dst (AndI (XorI src1 minus_1) src2));
13183   predicate(UseBMI1Instructions);
13184   effect(KILL cr);
13185   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13186 
13187   format %{ "andnl  $dst, $src1, $src2" %}
13188 
13189   ins_encode %{
13190     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13191   %}
13192   ins_pipe(ialu_reg);
13193 %}
13194 
13195 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13196   match(Set dst (AndI (SubI imm_zero src) src));
13197   predicate(UseBMI1Instructions);
13198   effect(KILL cr);
13199   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13200 
13201   format %{ "blsil  $dst, $src" %}
13202 
13203   ins_encode %{
13204     __ blsil($dst$$Register, $src$$Register);
13205   %}
13206   ins_pipe(ialu_reg);
13207 %}
13208 
13209 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13210   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13211   predicate(UseBMI1Instructions);
13212   effect(KILL cr);
13213   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13214 
13215   ins_cost(125);
13216   format %{ "blsil  $dst, $src" %}
13217 
13218   ins_encode %{
13219     __ blsil($dst$$Register, $src$$Address);
13220   %}
13221   ins_pipe(ialu_reg_mem);
13222 %}
13223 
13224 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13225 %{
13226   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13227   predicate(UseBMI1Instructions);
13228   effect(KILL cr);
13229   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13230 
13231   ins_cost(125);
13232   format %{ "blsmskl $dst, $src" %}
13233 
13234   ins_encode %{
13235     __ blsmskl($dst$$Register, $src$$Address);
13236   %}
13237   ins_pipe(ialu_reg_mem);
13238 %}
13239 
13240 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13241 %{
13242   match(Set dst (XorI (AddI src minus_1) src));
13243   predicate(UseBMI1Instructions);
13244   effect(KILL cr);
13245   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13246 
13247   format %{ "blsmskl $dst, $src" %}
13248 
13249   ins_encode %{
13250     __ blsmskl($dst$$Register, $src$$Register);
13251   %}
13252 
13253   ins_pipe(ialu_reg);
13254 %}
13255 
13256 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13257 %{
13258   match(Set dst (AndI (AddI src minus_1) src) );
13259   predicate(UseBMI1Instructions);
13260   effect(KILL cr);
13261   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13262 
13263   format %{ "blsrl  $dst, $src" %}
13264 
13265   ins_encode %{
13266     __ blsrl($dst$$Register, $src$$Register);
13267   %}
13268 
13269   ins_pipe(ialu_reg_mem);
13270 %}
13271 
13272 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13273 %{
13274   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13275   predicate(UseBMI1Instructions);
13276   effect(KILL cr);
13277   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13278 
13279   ins_cost(125);
13280   format %{ "blsrl  $dst, $src" %}
13281 
13282   ins_encode %{
13283     __ blsrl($dst$$Register, $src$$Address);
13284   %}
13285 
13286   ins_pipe(ialu_reg);
13287 %}
13288 
13289 // Or Instructions
13290 // Or Register with Register
13291 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13292 %{
13293   predicate(!UseAPX);
13294   match(Set dst (OrI dst src));
13295   effect(KILL cr);
13296   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13297 
13298   format %{ "orl     $dst, $src\t# int" %}
13299   ins_encode %{
13300     __ orl($dst$$Register, $src$$Register);
13301   %}
13302   ins_pipe(ialu_reg_reg);
13303 %}
13304 
13305 // Or Register with Register using New Data Destination (NDD)
13306 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13307 %{
13308   predicate(UseAPX);
13309   match(Set dst (OrI src1 src2));
13310   effect(KILL cr);
13311   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13312 
13313   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13314   ins_encode %{
13315     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13316   %}
13317   ins_pipe(ialu_reg_reg);
13318 %}
13319 
13320 // Or Register with Immediate
13321 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13322 %{
13323   predicate(!UseAPX);
13324   match(Set dst (OrI dst src));
13325   effect(KILL cr);
13326   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13327 
13328   format %{ "orl     $dst, $src\t# int" %}
13329   ins_encode %{
13330     __ orl($dst$$Register, $src$$constant);
13331   %}
13332   ins_pipe(ialu_reg);
13333 %}
13334 
13335 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13336 %{
13337   predicate(UseAPX);
13338   match(Set dst (OrI src1 src2));
13339   effect(KILL cr);
13340   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13341 
13342   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13343   ins_encode %{
13344     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13345   %}
13346   ins_pipe(ialu_reg);
13347 %}
13348 
13349 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13350 %{
13351   predicate(UseAPX);
13352   match(Set dst (OrI src1 src2));
13353   effect(KILL cr);
13354   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13355 
13356   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13357   ins_encode %{
13358     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13359   %}
13360   ins_pipe(ialu_reg);
13361 %}
13362 
13363 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13364 %{
13365   predicate(UseAPX);
13366   match(Set dst (OrI (LoadI src1) src2));
13367   effect(KILL cr);
13368   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13369 
13370   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13371   ins_encode %{
13372     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13373   %}
13374   ins_pipe(ialu_reg);
13375 %}
13376 
13377 // Or Register with Memory
13378 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13379 %{
13380   predicate(!UseAPX);
13381   match(Set dst (OrI dst (LoadI src)));
13382   effect(KILL cr);
13383   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384 
13385   ins_cost(150);
13386   format %{ "orl     $dst, $src\t# int" %}
13387   ins_encode %{
13388     __ orl($dst$$Register, $src$$Address);
13389   %}
13390   ins_pipe(ialu_reg_mem);
13391 %}
13392 
13393 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13394 %{
13395   predicate(UseAPX);
13396   match(Set dst (OrI src1 (LoadI src2)));
13397   effect(KILL cr);
13398   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13399 
13400   ins_cost(150);
13401   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13402   ins_encode %{
13403     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13404   %}
13405   ins_pipe(ialu_reg_mem);
13406 %}
13407 
13408 // Or Memory with Register
13409 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13410 %{
13411   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13412   effect(KILL cr);
13413   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414 
13415   ins_cost(150);
13416   format %{ "orb    $dst, $src\t# byte" %}
13417   ins_encode %{
13418     __ orb($dst$$Address, $src$$Register);
13419   %}
13420   ins_pipe(ialu_mem_reg);
13421 %}
13422 
13423 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13424 %{
13425   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13426   effect(KILL cr);
13427   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13428 
13429   ins_cost(150);
13430   format %{ "orl     $dst, $src\t# int" %}
13431   ins_encode %{
13432     __ orl($dst$$Address, $src$$Register);
13433   %}
13434   ins_pipe(ialu_mem_reg);
13435 %}
13436 
13437 // Or Memory with Immediate
13438 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13439 %{
13440   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13441   effect(KILL cr);
13442   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443 
13444   ins_cost(125);
13445   format %{ "orl     $dst, $src\t# int" %}
13446   ins_encode %{
13447     __ orl($dst$$Address, $src$$constant);
13448   %}
13449   ins_pipe(ialu_mem_imm);
13450 %}
13451 
13452 // Xor Instructions
13453 // Xor Register with Register
13454 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13455 %{
13456   predicate(!UseAPX);
13457   match(Set dst (XorI dst src));
13458   effect(KILL cr);
13459   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13460 
13461   format %{ "xorl    $dst, $src\t# int" %}
13462   ins_encode %{
13463     __ xorl($dst$$Register, $src$$Register);
13464   %}
13465   ins_pipe(ialu_reg_reg);
13466 %}
13467 
13468 // Xor Register with Register using New Data Destination (NDD)
13469 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13470 %{
13471   predicate(UseAPX);
13472   match(Set dst (XorI src1 src2));
13473   effect(KILL cr);
13474   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13475 
13476   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13477   ins_encode %{
13478     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13479   %}
13480   ins_pipe(ialu_reg_reg);
13481 %}
13482 
13483 // Xor Register with Immediate -1
13484 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13485 %{
13486   predicate(!UseAPX);
13487   match(Set dst (XorI dst imm));
13488 
13489   format %{ "notl    $dst" %}
13490   ins_encode %{
13491      __ notl($dst$$Register);
13492   %}
13493   ins_pipe(ialu_reg);
13494 %}
13495 
13496 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13497 %{
13498   match(Set dst (XorI src imm));
13499   predicate(UseAPX);
13500   flag(PD::Flag_ndd_demotable_opr1);
13501 
13502   format %{ "enotl    $dst, $src" %}
13503   ins_encode %{
13504      __ enotl($dst$$Register, $src$$Register);
13505   %}
13506   ins_pipe(ialu_reg);
13507 %}
13508 
13509 // Xor Register with Immediate
13510 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13511 %{
13512   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13513   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13514   match(Set dst (XorI dst src));
13515   effect(KILL cr);
13516   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13517 
13518   format %{ "xorl    $dst, $src\t# int" %}
13519   ins_encode %{
13520     __ xorl($dst$$Register, $src$$constant);
13521   %}
13522   ins_pipe(ialu_reg);
13523 %}
13524 
13525 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13526 %{
13527   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13528   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13529   match(Set dst (XorI src1 src2));
13530   effect(KILL cr);
13531   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13532 
13533   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13534   ins_encode %{
13535     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13536   %}
13537   ins_pipe(ialu_reg);
13538 %}
13539 
13540 // Xor Memory with Immediate
13541 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13542 %{
13543   predicate(UseAPX);
13544   match(Set dst (XorI (LoadI src1) src2));
13545   effect(KILL cr);
13546   ins_cost(150);
13547   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13548 
13549   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13550   ins_encode %{
13551     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13552   %}
13553   ins_pipe(ialu_reg);
13554 %}
13555 
13556 // Xor Register with Memory
13557 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13558 %{
13559   predicate(!UseAPX);
13560   match(Set dst (XorI dst (LoadI src)));
13561   effect(KILL cr);
13562   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13563 
13564   ins_cost(150);
13565   format %{ "xorl    $dst, $src\t# int" %}
13566   ins_encode %{
13567     __ xorl($dst$$Register, $src$$Address);
13568   %}
13569   ins_pipe(ialu_reg_mem);
13570 %}
13571 
13572 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13573 %{
13574   predicate(UseAPX);
13575   match(Set dst (XorI src1 (LoadI src2)));
13576   effect(KILL cr);
13577   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13578 
13579   ins_cost(150);
13580   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13581   ins_encode %{
13582     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13583   %}
13584   ins_pipe(ialu_reg_mem);
13585 %}
13586 
13587 // Xor Memory with Register
13588 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13589 %{
13590   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13591   effect(KILL cr);
13592   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593 
13594   ins_cost(150);
13595   format %{ "xorb    $dst, $src\t# byte" %}
13596   ins_encode %{
13597     __ xorb($dst$$Address, $src$$Register);
13598   %}
13599   ins_pipe(ialu_mem_reg);
13600 %}
13601 
13602 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13603 %{
13604   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13605   effect(KILL cr);
13606   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13607 
13608   ins_cost(150);
13609   format %{ "xorl    $dst, $src\t# int" %}
13610   ins_encode %{
13611     __ xorl($dst$$Address, $src$$Register);
13612   %}
13613   ins_pipe(ialu_mem_reg);
13614 %}
13615 
13616 // Xor Memory with Immediate
13617 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13618 %{
13619   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13620   effect(KILL cr);
13621   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13622 
13623   ins_cost(125);
13624   format %{ "xorl    $dst, $src\t# int" %}
13625   ins_encode %{
13626     __ xorl($dst$$Address, $src$$constant);
13627   %}
13628   ins_pipe(ialu_mem_imm);
13629 %}
13630 
13631 
13632 // Long Logical Instructions
13633 
13634 // And Instructions
13635 // And Register with Register
13636 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13637 %{
13638   predicate(!UseAPX);
13639   match(Set dst (AndL dst src));
13640   effect(KILL cr);
13641   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13642 
13643   format %{ "andq    $dst, $src\t# long" %}
13644   ins_encode %{
13645     __ andq($dst$$Register, $src$$Register);
13646   %}
13647   ins_pipe(ialu_reg_reg);
13648 %}
13649 
13650 // And Register with Register using New Data Destination (NDD)
13651 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13652 %{
13653   predicate(UseAPX);
13654   match(Set dst (AndL src1 src2));
13655   effect(KILL cr);
13656   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13657 
13658   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13659   ins_encode %{
13660     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13661 
13662   %}
13663   ins_pipe(ialu_reg_reg);
13664 %}
13665 
13666 // And Register with Immediate 255
13667 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13668 %{
13669   match(Set dst (AndL src mask));
13670 
13671   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13672   ins_encode %{
13673     // movzbl zeroes out the upper 32-bit and does not need REX.W
13674     __ movzbl($dst$$Register, $src$$Register);
13675   %}
13676   ins_pipe(ialu_reg);
13677 %}
13678 
13679 // And Register with Immediate 65535
13680 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13681 %{
13682   match(Set dst (AndL src mask));
13683 
13684   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13685   ins_encode %{
13686     // movzwl zeroes out the upper 32-bit and does not need REX.W
13687     __ movzwl($dst$$Register, $src$$Register);
13688   %}
13689   ins_pipe(ialu_reg);
13690 %}
13691 
13692 // And Register with Immediate
13693 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13694 %{
13695   predicate(!UseAPX);
13696   match(Set dst (AndL dst src));
13697   effect(KILL cr);
13698   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13699 
13700   format %{ "andq    $dst, $src\t# long" %}
13701   ins_encode %{
13702     __ andq($dst$$Register, $src$$constant);
13703   %}
13704   ins_pipe(ialu_reg);
13705 %}
13706 
13707 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13708 %{
13709   predicate(UseAPX);
13710   match(Set dst (AndL src1 src2));
13711   effect(KILL cr);
13712   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13713 
13714   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13715   ins_encode %{
13716     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13717   %}
13718   ins_pipe(ialu_reg);
13719 %}
13720 
13721 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13722 %{
13723   predicate(UseAPX);
13724   match(Set dst (AndL (LoadL src1) src2));
13725   effect(KILL cr);
13726   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13727 
13728   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13729   ins_encode %{
13730     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13731   %}
13732   ins_pipe(ialu_reg);
13733 %}
13734 
13735 // And Register with Memory
13736 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13737 %{
13738   predicate(!UseAPX);
13739   match(Set dst (AndL dst (LoadL src)));
13740   effect(KILL cr);
13741   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13742 
13743   ins_cost(150);
13744   format %{ "andq    $dst, $src\t# long" %}
13745   ins_encode %{
13746     __ andq($dst$$Register, $src$$Address);
13747   %}
13748   ins_pipe(ialu_reg_mem);
13749 %}
13750 
13751 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13752 %{
13753   predicate(UseAPX);
13754   match(Set dst (AndL src1 (LoadL src2)));
13755   effect(KILL cr);
13756   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13757 
13758   ins_cost(150);
13759   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13760   ins_encode %{
13761     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13762   %}
13763   ins_pipe(ialu_reg_mem);
13764 %}
13765 
13766 // And Memory with Register
13767 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13768 %{
13769   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13770   effect(KILL cr);
13771   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13772 
13773   ins_cost(150);
13774   format %{ "andq    $dst, $src\t# long" %}
13775   ins_encode %{
13776     __ andq($dst$$Address, $src$$Register);
13777   %}
13778   ins_pipe(ialu_mem_reg);
13779 %}
13780 
13781 // And Memory with Immediate
13782 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13783 %{
13784   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13785   effect(KILL cr);
13786   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13787 
13788   ins_cost(125);
13789   format %{ "andq    $dst, $src\t# long" %}
13790   ins_encode %{
13791     __ andq($dst$$Address, $src$$constant);
13792   %}
13793   ins_pipe(ialu_mem_imm);
13794 %}
13795 
13796 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13797 %{
13798   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13799   // because AND/OR works well enough for 8/32-bit values.
13800   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13801 
13802   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13803   effect(KILL cr);
13804 
13805   ins_cost(125);
13806   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13807   ins_encode %{
13808     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13809   %}
13810   ins_pipe(ialu_mem_imm);
13811 %}
13812 
13813 // BMI1 instructions
13814 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13815   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13816   predicate(UseBMI1Instructions);
13817   effect(KILL cr);
13818   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13819 
13820   ins_cost(125);
13821   format %{ "andnq  $dst, $src1, $src2" %}
13822 
13823   ins_encode %{
13824     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13825   %}
13826   ins_pipe(ialu_reg_mem);
13827 %}
13828 
13829 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13830   match(Set dst (AndL (XorL src1 minus_1) src2));
13831   predicate(UseBMI1Instructions);
13832   effect(KILL cr);
13833   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13834 
13835   format %{ "andnq  $dst, $src1, $src2" %}
13836 
13837   ins_encode %{
13838   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13839   %}
13840   ins_pipe(ialu_reg_mem);
13841 %}
13842 
13843 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13844   match(Set dst (AndL (SubL imm_zero src) src));
13845   predicate(UseBMI1Instructions);
13846   effect(KILL cr);
13847   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13848 
13849   format %{ "blsiq  $dst, $src" %}
13850 
13851   ins_encode %{
13852     __ blsiq($dst$$Register, $src$$Register);
13853   %}
13854   ins_pipe(ialu_reg);
13855 %}
13856 
13857 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13858   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13859   predicate(UseBMI1Instructions);
13860   effect(KILL cr);
13861   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13862 
13863   ins_cost(125);
13864   format %{ "blsiq  $dst, $src" %}
13865 
13866   ins_encode %{
13867     __ blsiq($dst$$Register, $src$$Address);
13868   %}
13869   ins_pipe(ialu_reg_mem);
13870 %}
13871 
13872 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13873 %{
13874   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13875   predicate(UseBMI1Instructions);
13876   effect(KILL cr);
13877   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13878 
13879   ins_cost(125);
13880   format %{ "blsmskq $dst, $src" %}
13881 
13882   ins_encode %{
13883     __ blsmskq($dst$$Register, $src$$Address);
13884   %}
13885   ins_pipe(ialu_reg_mem);
13886 %}
13887 
13888 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13889 %{
13890   match(Set dst (XorL (AddL src minus_1) src));
13891   predicate(UseBMI1Instructions);
13892   effect(KILL cr);
13893   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13894 
13895   format %{ "blsmskq $dst, $src" %}
13896 
13897   ins_encode %{
13898     __ blsmskq($dst$$Register, $src$$Register);
13899   %}
13900 
13901   ins_pipe(ialu_reg);
13902 %}
13903 
13904 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13905 %{
13906   match(Set dst (AndL (AddL src minus_1) src) );
13907   predicate(UseBMI1Instructions);
13908   effect(KILL cr);
13909   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13910 
13911   format %{ "blsrq  $dst, $src" %}
13912 
13913   ins_encode %{
13914     __ blsrq($dst$$Register, $src$$Register);
13915   %}
13916 
13917   ins_pipe(ialu_reg);
13918 %}
13919 
13920 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13921 %{
13922   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13923   predicate(UseBMI1Instructions);
13924   effect(KILL cr);
13925   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13926 
13927   ins_cost(125);
13928   format %{ "blsrq  $dst, $src" %}
13929 
13930   ins_encode %{
13931     __ blsrq($dst$$Register, $src$$Address);
13932   %}
13933 
13934   ins_pipe(ialu_reg);
13935 %}
13936 
13937 // Or Instructions
13938 // Or Register with Register
13939 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13940 %{
13941   predicate(!UseAPX);
13942   match(Set dst (OrL dst src));
13943   effect(KILL cr);
13944   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13945 
13946   format %{ "orq     $dst, $src\t# long" %}
13947   ins_encode %{
13948     __ orq($dst$$Register, $src$$Register);
13949   %}
13950   ins_pipe(ialu_reg_reg);
13951 %}
13952 
13953 // Or Register with Register using New Data Destination (NDD)
13954 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13955 %{
13956   predicate(UseAPX);
13957   match(Set dst (OrL src1 src2));
13958   effect(KILL cr);
13959   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13960 
13961   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13962   ins_encode %{
13963     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13964 
13965   %}
13966   ins_pipe(ialu_reg_reg);
13967 %}
13968 
13969 // Use any_RegP to match R15 (TLS register) without spilling.
13970 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13971   match(Set dst (OrL dst (CastP2X src)));
13972   effect(KILL cr);
13973   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13974 
13975   format %{ "orq     $dst, $src\t# long" %}
13976   ins_encode %{
13977     __ orq($dst$$Register, $src$$Register);
13978   %}
13979   ins_pipe(ialu_reg_reg);
13980 %}
13981 
13982 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13983   match(Set dst (OrL src1 (CastP2X src2)));
13984   effect(KILL cr);
13985   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986 
13987   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13988   ins_encode %{
13989     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13990   %}
13991   ins_pipe(ialu_reg_reg);
13992 %}
13993 
13994 // Or Register with Immediate
13995 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13996 %{
13997   predicate(!UseAPX);
13998   match(Set dst (OrL dst src));
13999   effect(KILL cr);
14000   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14001 
14002   format %{ "orq     $dst, $src\t# long" %}
14003   ins_encode %{
14004     __ orq($dst$$Register, $src$$constant);
14005   %}
14006   ins_pipe(ialu_reg);
14007 %}
14008 
14009 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14010 %{
14011   predicate(UseAPX);
14012   match(Set dst (OrL src1 src2));
14013   effect(KILL cr);
14014   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14015 
14016   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14017   ins_encode %{
14018     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14019   %}
14020   ins_pipe(ialu_reg);
14021 %}
14022 
14023 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14024 %{
14025   predicate(UseAPX);
14026   match(Set dst (OrL src1 src2));
14027   effect(KILL cr);
14028   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14029 
14030   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14031   ins_encode %{
14032     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14033   %}
14034   ins_pipe(ialu_reg);
14035 %}
14036 
14037 // Or Memory with Immediate
14038 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14039 %{
14040   predicate(UseAPX);
14041   match(Set dst (OrL (LoadL src1) src2));
14042   effect(KILL cr);
14043   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14044 
14045   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14046   ins_encode %{
14047     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14048   %}
14049   ins_pipe(ialu_reg);
14050 %}
14051 
14052 // Or Register with Memory
14053 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14054 %{
14055   predicate(!UseAPX);
14056   match(Set dst (OrL dst (LoadL src)));
14057   effect(KILL cr);
14058   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14059 
14060   ins_cost(150);
14061   format %{ "orq     $dst, $src\t# long" %}
14062   ins_encode %{
14063     __ orq($dst$$Register, $src$$Address);
14064   %}
14065   ins_pipe(ialu_reg_mem);
14066 %}
14067 
14068 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14069 %{
14070   predicate(UseAPX);
14071   match(Set dst (OrL src1 (LoadL src2)));
14072   effect(KILL cr);
14073   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14074 
14075   ins_cost(150);
14076   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14077   ins_encode %{
14078     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14079   %}
14080   ins_pipe(ialu_reg_mem);
14081 %}
14082 
14083 // Or Memory with Register
14084 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14085 %{
14086   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14087   effect(KILL cr);
14088   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14089 
14090   ins_cost(150);
14091   format %{ "orq     $dst, $src\t# long" %}
14092   ins_encode %{
14093     __ orq($dst$$Address, $src$$Register);
14094   %}
14095   ins_pipe(ialu_mem_reg);
14096 %}
14097 
14098 // Or Memory with Immediate
14099 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14100 %{
14101   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14102   effect(KILL cr);
14103   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14104 
14105   ins_cost(125);
14106   format %{ "orq     $dst, $src\t# long" %}
14107   ins_encode %{
14108     __ orq($dst$$Address, $src$$constant);
14109   %}
14110   ins_pipe(ialu_mem_imm);
14111 %}
14112 
14113 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14114 %{
14115   // con should be a pure 64-bit power of 2 immediate
14116   // because AND/OR works well enough for 8/32-bit values.
14117   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14118 
14119   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14120   effect(KILL cr);
14121 
14122   ins_cost(125);
14123   format %{ "btsq    $dst, log2($con)\t# long" %}
14124   ins_encode %{
14125     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14126   %}
14127   ins_pipe(ialu_mem_imm);
14128 %}
14129 
14130 // Xor Instructions
14131 // Xor Register with Register
14132 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14133 %{
14134   predicate(!UseAPX);
14135   match(Set dst (XorL dst src));
14136   effect(KILL cr);
14137   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14138 
14139   format %{ "xorq    $dst, $src\t# long" %}
14140   ins_encode %{
14141     __ xorq($dst$$Register, $src$$Register);
14142   %}
14143   ins_pipe(ialu_reg_reg);
14144 %}
14145 
14146 // Xor Register with Register using New Data Destination (NDD)
14147 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14148 %{
14149   predicate(UseAPX);
14150   match(Set dst (XorL src1 src2));
14151   effect(KILL cr);
14152   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14153 
14154   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14155   ins_encode %{
14156     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14157   %}
14158   ins_pipe(ialu_reg_reg);
14159 %}
14160 
14161 // Xor Register with Immediate -1
14162 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14163 %{
14164   predicate(!UseAPX);
14165   match(Set dst (XorL dst imm));
14166 
14167   format %{ "notq   $dst" %}
14168   ins_encode %{
14169      __ notq($dst$$Register);
14170   %}
14171   ins_pipe(ialu_reg);
14172 %}
14173 
14174 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14175 %{
14176   predicate(UseAPX);
14177   match(Set dst (XorL src imm));
14178   flag(PD::Flag_ndd_demotable_opr1);
14179 
14180   format %{ "enotq   $dst, $src" %}
14181   ins_encode %{
14182     __ enotq($dst$$Register, $src$$Register);
14183   %}
14184   ins_pipe(ialu_reg);
14185 %}
14186 
14187 // Xor Register with Immediate
14188 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14189 %{
14190   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14191   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14192   match(Set dst (XorL dst src));
14193   effect(KILL cr);
14194   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14195 
14196   format %{ "xorq    $dst, $src\t# long" %}
14197   ins_encode %{
14198     __ xorq($dst$$Register, $src$$constant);
14199   %}
14200   ins_pipe(ialu_reg);
14201 %}
14202 
14203 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14204 %{
14205   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14206   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14207   match(Set dst (XorL src1 src2));
14208   effect(KILL cr);
14209   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14210 
14211   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14212   ins_encode %{
14213     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14214   %}
14215   ins_pipe(ialu_reg);
14216 %}
14217 
14218 // Xor Memory with Immediate
14219 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14220 %{
14221   predicate(UseAPX);
14222   match(Set dst (XorL (LoadL src1) src2));
14223   effect(KILL cr);
14224   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225   ins_cost(150);
14226 
14227   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14228   ins_encode %{
14229     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14230   %}
14231   ins_pipe(ialu_reg);
14232 %}
14233 
14234 // Xor Register with Memory
14235 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14236 %{
14237   predicate(!UseAPX);
14238   match(Set dst (XorL dst (LoadL src)));
14239   effect(KILL cr);
14240   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14241 
14242   ins_cost(150);
14243   format %{ "xorq    $dst, $src\t# long" %}
14244   ins_encode %{
14245     __ xorq($dst$$Register, $src$$Address);
14246   %}
14247   ins_pipe(ialu_reg_mem);
14248 %}
14249 
14250 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14251 %{
14252   predicate(UseAPX);
14253   match(Set dst (XorL src1 (LoadL src2)));
14254   effect(KILL cr);
14255   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14256 
14257   ins_cost(150);
14258   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14259   ins_encode %{
14260     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14261   %}
14262   ins_pipe(ialu_reg_mem);
14263 %}
14264 
14265 // Xor Memory with Register
14266 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14267 %{
14268   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14269   effect(KILL cr);
14270   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14271 
14272   ins_cost(150);
14273   format %{ "xorq    $dst, $src\t# long" %}
14274   ins_encode %{
14275     __ xorq($dst$$Address, $src$$Register);
14276   %}
14277   ins_pipe(ialu_mem_reg);
14278 %}
14279 
14280 // Xor Memory with Immediate
14281 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14282 %{
14283   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14284   effect(KILL cr);
14285   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14286 
14287   ins_cost(125);
14288   format %{ "xorq    $dst, $src\t# long" %}
14289   ins_encode %{
14290     __ xorq($dst$$Address, $src$$constant);
14291   %}
14292   ins_pipe(ialu_mem_imm);
14293 %}
14294 
14295 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14296 %{
14297   match(Set dst (CmpLTMask p q));
14298   effect(KILL cr);
14299 
14300   ins_cost(400);
14301   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14302             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14303             "negl    $dst" %}
14304   ins_encode %{
14305     __ cmpl($p$$Register, $q$$Register);
14306     __ setcc(Assembler::less, $dst$$Register);
14307     __ negl($dst$$Register);
14308   %}
14309   ins_pipe(pipe_slow);
14310 %}
14311 
14312 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14313 %{
14314   match(Set dst (CmpLTMask dst zero));
14315   effect(KILL cr);
14316 
14317   ins_cost(100);
14318   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14319   ins_encode %{
14320     __ sarl($dst$$Register, 31);
14321   %}
14322   ins_pipe(ialu_reg);
14323 %}
14324 
14325 /* Better to save a register than avoid a branch */
14326 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14327 %{
14328   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14329   effect(KILL cr);
14330   ins_cost(300);
14331   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14332             "jge     done\n\t"
14333             "addl    $p,$y\n"
14334             "done:   " %}
14335   ins_encode %{
14336     Register Rp = $p$$Register;
14337     Register Rq = $q$$Register;
14338     Register Ry = $y$$Register;
14339     Label done;
14340     __ subl(Rp, Rq);
14341     __ jccb(Assembler::greaterEqual, done);
14342     __ addl(Rp, Ry);
14343     __ bind(done);
14344   %}
14345   ins_pipe(pipe_cmplt);
14346 %}
14347 
14348 /* Better to save a register than avoid a branch */
14349 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14350 %{
14351   match(Set y (AndI (CmpLTMask p q) y));
14352   effect(KILL cr);
14353 
14354   ins_cost(300);
14355 
14356   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14357             "jlt     done\n\t"
14358             "xorl    $y, $y\n"
14359             "done:   " %}
14360   ins_encode %{
14361     Register Rp = $p$$Register;
14362     Register Rq = $q$$Register;
14363     Register Ry = $y$$Register;
14364     Label done;
14365     __ cmpl(Rp, Rq);
14366     __ jccb(Assembler::less, done);
14367     __ xorl(Ry, Ry);
14368     __ bind(done);
14369   %}
14370   ins_pipe(pipe_cmplt);
14371 %}
14372 
14373 
14374 //---------- FP Instructions------------------------------------------------
14375 
14376 // Really expensive, avoid
14377 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14378 %{
14379   match(Set cr (CmpF src1 src2));
14380 
14381   ins_cost(500);
14382   format %{ "ucomiss $src1, $src2\n\t"
14383             "jnp,s   exit\n\t"
14384             "pushfq\t# saw NaN, set CF\n\t"
14385             "andq    [rsp], #0xffffff2b\n\t"
14386             "popfq\n"
14387     "exit:" %}
14388   ins_encode %{
14389     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14390     emit_cmpfp_fixup(masm);
14391   %}
14392   ins_pipe(pipe_slow);
14393 %}
14394 
14395 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14396   match(Set cr (CmpF src1 src2));
14397 
14398   ins_cost(100);
14399   format %{ "ucomiss $src1, $src2" %}
14400   ins_encode %{
14401     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14402   %}
14403   ins_pipe(pipe_slow);
14404 %}
14405 
14406 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14407   match(Set cr (CmpF src1 src2));
14408 
14409   ins_cost(100);
14410   format %{ "evucomxss $src1, $src2" %}
14411   ins_encode %{
14412     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14413   %}
14414   ins_pipe(pipe_slow);
14415 %}
14416 
14417 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14418   match(Set cr (CmpF src1 (LoadF src2)));
14419 
14420   ins_cost(100);
14421   format %{ "ucomiss $src1, $src2" %}
14422   ins_encode %{
14423     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14424   %}
14425   ins_pipe(pipe_slow);
14426 %}
14427 
14428 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14429   match(Set cr (CmpF src1 (LoadF src2)));
14430 
14431   ins_cost(100);
14432   format %{ "evucomxss $src1, $src2" %}
14433   ins_encode %{
14434     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14435   %}
14436   ins_pipe(pipe_slow);
14437 %}
14438 
14439 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14440   match(Set cr (CmpF src con));
14441 
14442   ins_cost(100);
14443   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14444   ins_encode %{
14445     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14446   %}
14447   ins_pipe(pipe_slow);
14448 %}
14449 
14450 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14451   match(Set cr (CmpF src con));
14452 
14453   ins_cost(100);
14454   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14455   ins_encode %{
14456     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14457   %}
14458   ins_pipe(pipe_slow);
14459 %}
14460 
14461 // Really expensive, avoid
14462 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14463 %{
14464   match(Set cr (CmpD src1 src2));
14465 
14466   ins_cost(500);
14467   format %{ "ucomisd $src1, $src2\n\t"
14468             "jnp,s   exit\n\t"
14469             "pushfq\t# saw NaN, set CF\n\t"
14470             "andq    [rsp], #0xffffff2b\n\t"
14471             "popfq\n"
14472     "exit:" %}
14473   ins_encode %{
14474     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14475     emit_cmpfp_fixup(masm);
14476   %}
14477   ins_pipe(pipe_slow);
14478 %}
14479 
14480 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14481   match(Set cr (CmpD src1 src2));
14482 
14483   ins_cost(100);
14484   format %{ "ucomisd $src1, $src2 test" %}
14485   ins_encode %{
14486     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14487   %}
14488   ins_pipe(pipe_slow);
14489 %}
14490 
14491 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14492   match(Set cr (CmpD src1 src2));
14493 
14494   ins_cost(100);
14495   format %{ "evucomxsd $src1, $src2 test" %}
14496   ins_encode %{
14497     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14498   %}
14499   ins_pipe(pipe_slow);
14500 %}
14501 
14502 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14503   match(Set cr (CmpD src1 (LoadD src2)));
14504 
14505   ins_cost(100);
14506   format %{ "ucomisd $src1, $src2" %}
14507   ins_encode %{
14508     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14509   %}
14510   ins_pipe(pipe_slow);
14511 %}
14512 
14513 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14514   match(Set cr (CmpD src1 (LoadD src2)));
14515 
14516   ins_cost(100);
14517   format %{ "evucomxsd $src1, $src2" %}
14518   ins_encode %{
14519     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14520   %}
14521   ins_pipe(pipe_slow);
14522 %}
14523 
14524 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14525   match(Set cr (CmpD src con));
14526   ins_cost(100);
14527   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14528   ins_encode %{
14529     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14530   %}
14531   ins_pipe(pipe_slow);
14532 %}
14533 
14534 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14535   match(Set cr (CmpD src con));
14536 
14537   ins_cost(100);
14538   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14539   ins_encode %{
14540     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14541   %}
14542   ins_pipe(pipe_slow);
14543 %}
14544 
14545 // Compare into -1,0,1
14546 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14547 %{
14548   match(Set dst (CmpF3 src1 src2));
14549   effect(KILL cr);
14550 
14551   ins_cost(275);
14552   format %{ "ucomiss $src1, $src2\n\t"
14553             "movl    $dst, #-1\n\t"
14554             "jp,s    done\n\t"
14555             "jb,s    done\n\t"
14556             "setne   $dst\n\t"
14557             "movzbl  $dst, $dst\n"
14558     "done:" %}
14559   ins_encode %{
14560     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14561     emit_cmpfp3(masm, $dst$$Register);
14562   %}
14563   ins_pipe(pipe_slow);
14564 %}
14565 
14566 // Compare into -1,0,1
14567 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14568 %{
14569   match(Set dst (CmpF3 src1 (LoadF src2)));
14570   effect(KILL cr);
14571 
14572   ins_cost(275);
14573   format %{ "ucomiss $src1, $src2\n\t"
14574             "movl    $dst, #-1\n\t"
14575             "jp,s    done\n\t"
14576             "jb,s    done\n\t"
14577             "setne   $dst\n\t"
14578             "movzbl  $dst, $dst\n"
14579     "done:" %}
14580   ins_encode %{
14581     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14582     emit_cmpfp3(masm, $dst$$Register);
14583   %}
14584   ins_pipe(pipe_slow);
14585 %}
14586 
14587 // Compare into -1,0,1
14588 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14589   match(Set dst (CmpF3 src con));
14590   effect(KILL cr);
14591 
14592   ins_cost(275);
14593   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14594             "movl    $dst, #-1\n\t"
14595             "jp,s    done\n\t"
14596             "jb,s    done\n\t"
14597             "setne   $dst\n\t"
14598             "movzbl  $dst, $dst\n"
14599     "done:" %}
14600   ins_encode %{
14601     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14602     emit_cmpfp3(masm, $dst$$Register);
14603   %}
14604   ins_pipe(pipe_slow);
14605 %}
14606 
14607 // Compare into -1,0,1
14608 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14609 %{
14610   match(Set dst (CmpD3 src1 src2));
14611   effect(KILL cr);
14612 
14613   ins_cost(275);
14614   format %{ "ucomisd $src1, $src2\n\t"
14615             "movl    $dst, #-1\n\t"
14616             "jp,s    done\n\t"
14617             "jb,s    done\n\t"
14618             "setne   $dst\n\t"
14619             "movzbl  $dst, $dst\n"
14620     "done:" %}
14621   ins_encode %{
14622     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14623     emit_cmpfp3(masm, $dst$$Register);
14624   %}
14625   ins_pipe(pipe_slow);
14626 %}
14627 
14628 // Compare into -1,0,1
14629 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14630 %{
14631   match(Set dst (CmpD3 src1 (LoadD src2)));
14632   effect(KILL cr);
14633 
14634   ins_cost(275);
14635   format %{ "ucomisd $src1, $src2\n\t"
14636             "movl    $dst, #-1\n\t"
14637             "jp,s    done\n\t"
14638             "jb,s    done\n\t"
14639             "setne   $dst\n\t"
14640             "movzbl  $dst, $dst\n"
14641     "done:" %}
14642   ins_encode %{
14643     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14644     emit_cmpfp3(masm, $dst$$Register);
14645   %}
14646   ins_pipe(pipe_slow);
14647 %}
14648 
14649 // Compare into -1,0,1
14650 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14651   match(Set dst (CmpD3 src con));
14652   effect(KILL cr);
14653 
14654   ins_cost(275);
14655   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14656             "movl    $dst, #-1\n\t"
14657             "jp,s    done\n\t"
14658             "jb,s    done\n\t"
14659             "setne   $dst\n\t"
14660             "movzbl  $dst, $dst\n"
14661     "done:" %}
14662   ins_encode %{
14663     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14664     emit_cmpfp3(masm, $dst$$Register);
14665   %}
14666   ins_pipe(pipe_slow);
14667 %}
14668 
14669 //----------Arithmetic Conversion Instructions---------------------------------
14670 
14671 instruct convF2D_reg_reg(regD dst, regF src)
14672 %{
14673   match(Set dst (ConvF2D src));
14674 
14675   format %{ "cvtss2sd $dst, $src" %}
14676   ins_encode %{
14677     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14678   %}
14679   ins_pipe(pipe_slow); // XXX
14680 %}
14681 
14682 instruct convF2D_reg_mem(regD dst, memory src)
14683 %{
14684   predicate(UseAVX == 0);
14685   match(Set dst (ConvF2D (LoadF src)));
14686 
14687   format %{ "cvtss2sd $dst, $src" %}
14688   ins_encode %{
14689     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14690   %}
14691   ins_pipe(pipe_slow); // XXX
14692 %}
14693 
14694 instruct convD2F_reg_reg(regF dst, regD src)
14695 %{
14696   match(Set dst (ConvD2F src));
14697 
14698   format %{ "cvtsd2ss $dst, $src" %}
14699   ins_encode %{
14700     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14701   %}
14702   ins_pipe(pipe_slow); // XXX
14703 %}
14704 
14705 instruct convD2F_reg_mem(regF dst, memory src)
14706 %{
14707   predicate(UseAVX == 0);
14708   match(Set dst (ConvD2F (LoadD src)));
14709 
14710   format %{ "cvtsd2ss $dst, $src" %}
14711   ins_encode %{
14712     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14713   %}
14714   ins_pipe(pipe_slow); // XXX
14715 %}
14716 
14717 // XXX do mem variants
14718 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14719 %{
14720   predicate(!VM_Version::supports_avx10_2());
14721   match(Set dst (ConvF2I src));
14722   effect(KILL cr);
14723   format %{ "convert_f2i $dst, $src" %}
14724   ins_encode %{
14725     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14726   %}
14727   ins_pipe(pipe_slow);
14728 %}
14729 
14730 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14731 %{
14732   predicate(VM_Version::supports_avx10_2());
14733   match(Set dst (ConvF2I src));
14734   format %{ "evcvttss2sisl $dst, $src" %}
14735   ins_encode %{
14736     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14737   %}
14738   ins_pipe(pipe_slow);
14739 %}
14740 
14741 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14742 %{
14743   predicate(VM_Version::supports_avx10_2());
14744   match(Set dst (ConvF2I (LoadF src)));
14745   format %{ "evcvttss2sisl $dst, $src" %}
14746   ins_encode %{
14747     __ evcvttss2sisl($dst$$Register, $src$$Address);
14748   %}
14749   ins_pipe(pipe_slow);
14750 %}
14751 
14752 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14753 %{
14754   predicate(!VM_Version::supports_avx10_2());
14755   match(Set dst (ConvF2L src));
14756   effect(KILL cr);
14757   format %{ "convert_f2l $dst, $src"%}
14758   ins_encode %{
14759     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14760   %}
14761   ins_pipe(pipe_slow);
14762 %}
14763 
14764 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14765 %{
14766   predicate(VM_Version::supports_avx10_2());
14767   match(Set dst (ConvF2L src));
14768   format %{ "evcvttss2sisq $dst, $src" %}
14769   ins_encode %{
14770     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14771   %}
14772   ins_pipe(pipe_slow);
14773 %}
14774 
14775 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14776 %{
14777   predicate(VM_Version::supports_avx10_2());
14778   match(Set dst (ConvF2L (LoadF src)));
14779   format %{ "evcvttss2sisq $dst, $src" %}
14780   ins_encode %{
14781     __ evcvttss2sisq($dst$$Register, $src$$Address);
14782   %}
14783   ins_pipe(pipe_slow);
14784 %}
14785 
14786 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14787 %{
14788   predicate(!VM_Version::supports_avx10_2());
14789   match(Set dst (ConvD2I src));
14790   effect(KILL cr);
14791   format %{ "convert_d2i $dst, $src"%}
14792   ins_encode %{
14793     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14794   %}
14795   ins_pipe(pipe_slow);
14796 %}
14797 
14798 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14799 %{
14800   predicate(VM_Version::supports_avx10_2());
14801   match(Set dst (ConvD2I src));
14802   format %{ "evcvttsd2sisl $dst, $src" %}
14803   ins_encode %{
14804     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14805   %}
14806   ins_pipe(pipe_slow);
14807 %}
14808 
14809 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14810 %{
14811   predicate(VM_Version::supports_avx10_2());
14812   match(Set dst (ConvD2I (LoadD src)));
14813   format %{ "evcvttsd2sisl $dst, $src" %}
14814   ins_encode %{
14815     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14816   %}
14817   ins_pipe(pipe_slow);
14818 %}
14819 
14820 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14821 %{
14822   predicate(!VM_Version::supports_avx10_2());
14823   match(Set dst (ConvD2L src));
14824   effect(KILL cr);
14825   format %{ "convert_d2l $dst, $src"%}
14826   ins_encode %{
14827     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14828   %}
14829   ins_pipe(pipe_slow);
14830 %}
14831 
14832 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14833 %{
14834   predicate(VM_Version::supports_avx10_2());
14835   match(Set dst (ConvD2L src));
14836   format %{ "evcvttsd2sisq $dst, $src" %}
14837   ins_encode %{
14838     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14839   %}
14840   ins_pipe(pipe_slow);
14841 %}
14842 
14843 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14844 %{
14845   predicate(VM_Version::supports_avx10_2());
14846   match(Set dst (ConvD2L (LoadD src)));
14847   format %{ "evcvttsd2sisq $dst, $src" %}
14848   ins_encode %{
14849     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14850   %}
14851   ins_pipe(pipe_slow);
14852 %}
14853 
14854 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14855 %{
14856   match(Set dst (RoundD src));
14857   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14858   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14859   ins_encode %{
14860     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14861   %}
14862   ins_pipe(pipe_slow);
14863 %}
14864 
14865 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14866 %{
14867   match(Set dst (RoundF src));
14868   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14869   format %{ "round_float $dst,$src" %}
14870   ins_encode %{
14871     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14872   %}
14873   ins_pipe(pipe_slow);
14874 %}
14875 
14876 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14877 %{
14878   predicate(!UseXmmI2F);
14879   match(Set dst (ConvI2F src));
14880 
14881   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14882   ins_encode %{
14883     if (UseAVX > 0) {
14884       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14885     }
14886     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14887   %}
14888   ins_pipe(pipe_slow); // XXX
14889 %}
14890 
14891 instruct convI2F_reg_mem(regF dst, memory src)
14892 %{
14893   predicate(UseAVX == 0);
14894   match(Set dst (ConvI2F (LoadI src)));
14895 
14896   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14897   ins_encode %{
14898     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14899   %}
14900   ins_pipe(pipe_slow); // XXX
14901 %}
14902 
14903 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14904 %{
14905   predicate(!UseXmmI2D);
14906   match(Set dst (ConvI2D src));
14907 
14908   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14909   ins_encode %{
14910     if (UseAVX > 0) {
14911       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14912     }
14913     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14914   %}
14915   ins_pipe(pipe_slow); // XXX
14916 %}
14917 
14918 instruct convI2D_reg_mem(regD dst, memory src)
14919 %{
14920   predicate(UseAVX == 0);
14921   match(Set dst (ConvI2D (LoadI src)));
14922 
14923   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14924   ins_encode %{
14925     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14926   %}
14927   ins_pipe(pipe_slow); // XXX
14928 %}
14929 
14930 instruct convXI2F_reg(regF dst, rRegI src)
14931 %{
14932   predicate(UseXmmI2F);
14933   match(Set dst (ConvI2F src));
14934 
14935   format %{ "movdl $dst, $src\n\t"
14936             "cvtdq2psl $dst, $dst\t# i2f" %}
14937   ins_encode %{
14938     __ movdl($dst$$XMMRegister, $src$$Register);
14939     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14940   %}
14941   ins_pipe(pipe_slow); // XXX
14942 %}
14943 
14944 instruct convXI2D_reg(regD dst, rRegI src)
14945 %{
14946   predicate(UseXmmI2D);
14947   match(Set dst (ConvI2D src));
14948 
14949   format %{ "movdl $dst, $src\n\t"
14950             "cvtdq2pdl $dst, $dst\t# i2d" %}
14951   ins_encode %{
14952     __ movdl($dst$$XMMRegister, $src$$Register);
14953     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14954   %}
14955   ins_pipe(pipe_slow); // XXX
14956 %}
14957 
14958 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14959 %{
14960   match(Set dst (ConvL2F src));
14961 
14962   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14963   ins_encode %{
14964     if (UseAVX > 0) {
14965       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14966     }
14967     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14968   %}
14969   ins_pipe(pipe_slow); // XXX
14970 %}
14971 
14972 instruct convL2F_reg_mem(regF dst, memory src)
14973 %{
14974   predicate(UseAVX == 0);
14975   match(Set dst (ConvL2F (LoadL src)));
14976 
14977   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14978   ins_encode %{
14979     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14980   %}
14981   ins_pipe(pipe_slow); // XXX
14982 %}
14983 
14984 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14985 %{
14986   match(Set dst (ConvL2D src));
14987 
14988   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14989   ins_encode %{
14990     if (UseAVX > 0) {
14991       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14992     }
14993     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14994   %}
14995   ins_pipe(pipe_slow); // XXX
14996 %}
14997 
14998 instruct convL2D_reg_mem(regD dst, memory src)
14999 %{
15000   predicate(UseAVX == 0);
15001   match(Set dst (ConvL2D (LoadL src)));
15002 
15003   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15004   ins_encode %{
15005     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15006   %}
15007   ins_pipe(pipe_slow); // XXX
15008 %}
15009 
15010 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15011 %{
15012   match(Set dst (ConvI2L src));
15013 
15014   ins_cost(125);
15015   format %{ "movslq  $dst, $src\t# i2l" %}
15016   ins_encode %{
15017     __ movslq($dst$$Register, $src$$Register);
15018   %}
15019   ins_pipe(ialu_reg_reg);
15020 %}
15021 
15022 // Zero-extend convert int to long
15023 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15024 %{
15025   match(Set dst (AndL (ConvI2L src) mask));
15026 
15027   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15028   ins_encode %{
15029     if ($dst$$reg != $src$$reg) {
15030       __ movl($dst$$Register, $src$$Register);
15031     }
15032   %}
15033   ins_pipe(ialu_reg_reg);
15034 %}
15035 
15036 // Zero-extend convert int to long
15037 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15038 %{
15039   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15040 
15041   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15042   ins_encode %{
15043     __ movl($dst$$Register, $src$$Address);
15044   %}
15045   ins_pipe(ialu_reg_mem);
15046 %}
15047 
15048 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15049 %{
15050   match(Set dst (AndL src mask));
15051 
15052   format %{ "movl    $dst, $src\t# zero-extend long" %}
15053   ins_encode %{
15054     __ movl($dst$$Register, $src$$Register);
15055   %}
15056   ins_pipe(ialu_reg_reg);
15057 %}
15058 
15059 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15060 %{
15061   match(Set dst (ConvL2I src));
15062 
15063   format %{ "movl    $dst, $src\t# l2i" %}
15064   ins_encode %{
15065     __ movl($dst$$Register, $src$$Register);
15066   %}
15067   ins_pipe(ialu_reg_reg);
15068 %}
15069 
15070 
15071 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15072   match(Set dst (MoveF2I src));
15073   effect(DEF dst, USE src);
15074 
15075   ins_cost(125);
15076   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15077   ins_encode %{
15078     __ movl($dst$$Register, Address(rsp, $src$$disp));
15079   %}
15080   ins_pipe(ialu_reg_mem);
15081 %}
15082 
15083 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15084   match(Set dst (MoveI2F src));
15085   effect(DEF dst, USE src);
15086 
15087   ins_cost(125);
15088   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15089   ins_encode %{
15090     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15091   %}
15092   ins_pipe(pipe_slow);
15093 %}
15094 
15095 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15096   match(Set dst (MoveD2L src));
15097   effect(DEF dst, USE src);
15098 
15099   ins_cost(125);
15100   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15101   ins_encode %{
15102     __ movq($dst$$Register, Address(rsp, $src$$disp));
15103   %}
15104   ins_pipe(ialu_reg_mem);
15105 %}
15106 
15107 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15108   predicate(!UseXmmLoadAndClearUpper);
15109   match(Set dst (MoveL2D src));
15110   effect(DEF dst, USE src);
15111 
15112   ins_cost(125);
15113   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15114   ins_encode %{
15115     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15116   %}
15117   ins_pipe(pipe_slow);
15118 %}
15119 
15120 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15121   predicate(UseXmmLoadAndClearUpper);
15122   match(Set dst (MoveL2D src));
15123   effect(DEF dst, USE src);
15124 
15125   ins_cost(125);
15126   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15127   ins_encode %{
15128     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15129   %}
15130   ins_pipe(pipe_slow);
15131 %}
15132 
15133 
15134 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15135   match(Set dst (MoveF2I src));
15136   effect(DEF dst, USE src);
15137 
15138   ins_cost(95); // XXX
15139   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15140   ins_encode %{
15141     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15142   %}
15143   ins_pipe(pipe_slow);
15144 %}
15145 
15146 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15147   match(Set dst (MoveI2F src));
15148   effect(DEF dst, USE src);
15149 
15150   ins_cost(100);
15151   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15152   ins_encode %{
15153     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15154   %}
15155   ins_pipe( ialu_mem_reg );
15156 %}
15157 
15158 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15159   match(Set dst (MoveD2L src));
15160   effect(DEF dst, USE src);
15161 
15162   ins_cost(95); // XXX
15163   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15164   ins_encode %{
15165     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15166   %}
15167   ins_pipe(pipe_slow);
15168 %}
15169 
15170 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15171   match(Set dst (MoveL2D src));
15172   effect(DEF dst, USE src);
15173 
15174   ins_cost(100);
15175   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15176   ins_encode %{
15177     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15178   %}
15179   ins_pipe(ialu_mem_reg);
15180 %}
15181 
15182 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15183   match(Set dst (MoveF2I src));
15184   effect(DEF dst, USE src);
15185   ins_cost(85);
15186   format %{ "movd    $dst,$src\t# MoveF2I" %}
15187   ins_encode %{
15188     __ movdl($dst$$Register, $src$$XMMRegister);
15189   %}
15190   ins_pipe( pipe_slow );
15191 %}
15192 
15193 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15194   match(Set dst (MoveD2L src));
15195   effect(DEF dst, USE src);
15196   ins_cost(85);
15197   format %{ "movd    $dst,$src\t# MoveD2L" %}
15198   ins_encode %{
15199     __ movdq($dst$$Register, $src$$XMMRegister);
15200   %}
15201   ins_pipe( pipe_slow );
15202 %}
15203 
15204 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15205   match(Set dst (MoveI2F src));
15206   effect(DEF dst, USE src);
15207   ins_cost(100);
15208   format %{ "movd    $dst,$src\t# MoveI2F" %}
15209   ins_encode %{
15210     __ movdl($dst$$XMMRegister, $src$$Register);
15211   %}
15212   ins_pipe( pipe_slow );
15213 %}
15214 
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216   match(Set dst (MoveL2D src));
15217   effect(DEF dst, USE src);
15218   ins_cost(100);
15219   format %{ "movd    $dst,$src\t# MoveL2D" %}
15220   ins_encode %{
15221      __ movdq($dst$$XMMRegister, $src$$Register);
15222   %}
15223   ins_pipe( pipe_slow );
15224 %}
15225 
15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229                   Universe dummy, rFlagsReg cr)
15230 %{
15231   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232   match(Set dummy (ClearArray cnt base));
15233   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15234 
15235   format %{ $$template
15236     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15237     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15238     $$emit$$"jg      LARGE\n\t"
15239     $$emit$$"dec     rcx\n\t"
15240     $$emit$$"js      DONE\t# Zero length\n\t"
15241     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15242     $$emit$$"dec     rcx\n\t"
15243     $$emit$$"jge     LOOP\n\t"
15244     $$emit$$"jmp     DONE\n\t"
15245     $$emit$$"# LARGE:\n\t"
15246     if (UseFastStosb) {
15247        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15248        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249     } else if (UseXMMForObjInit) {
15250        $$emit$$"mov     rdi,rax\n\t"
15251        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15252        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15253        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15254        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15256        $$emit$$"add     0x40,rax\n\t"
15257        $$emit$$"# L_zero_64_bytes:\n\t"
15258        $$emit$$"sub     0x8,rcx\n\t"
15259        $$emit$$"jge     L_loop\n\t"
15260        $$emit$$"add     0x4,rcx\n\t"
15261        $$emit$$"jl      L_tail\n\t"
15262        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263        $$emit$$"add     0x20,rax\n\t"
15264        $$emit$$"sub     0x4,rcx\n\t"
15265        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266        $$emit$$"add     0x4,rcx\n\t"
15267        $$emit$$"jle     L_end\n\t"
15268        $$emit$$"dec     rcx\n\t"
15269        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270        $$emit$$"vmovq   xmm0,(rax)\n\t"
15271        $$emit$$"add     0x8,rax\n\t"
15272        $$emit$$"dec     rcx\n\t"
15273        $$emit$$"jge     L_sloop\n\t"
15274        $$emit$$"# L_end:\n\t"
15275     } else {
15276        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277     }
15278     $$emit$$"# DONE"
15279   %}
15280   ins_encode %{
15281     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282                  $tmp$$XMMRegister, false, knoreg);
15283   %}
15284   ins_pipe(pipe_slow);
15285 %}
15286 
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289                        Universe dummy, rFlagsReg cr)
15290 %{
15291   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292   match(Set dummy (ClearArray cnt base));
15293   ins_cost(125);
15294   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295 
15296   format %{ $$template
15297     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15298     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15299     $$emit$$"jg      LARGE\n\t"
15300     $$emit$$"dec     rcx\n\t"
15301     $$emit$$"js      DONE\t# Zero length\n\t"
15302     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"jge     LOOP\n\t"
15305     $$emit$$"jmp     DONE\n\t"
15306     $$emit$$"# LARGE:\n\t"
15307     if (UseFastStosb) {
15308        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15309        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310     } else if (UseXMMForObjInit) {
15311        $$emit$$"mov     rdi,rax\n\t"
15312        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15313        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15314        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15315        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15317        $$emit$$"add     0x40,rax\n\t"
15318        $$emit$$"# L_zero_64_bytes:\n\t"
15319        $$emit$$"sub     0x8,rcx\n\t"
15320        $$emit$$"jge     L_loop\n\t"
15321        $$emit$$"add     0x4,rcx\n\t"
15322        $$emit$$"jl      L_tail\n\t"
15323        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324        $$emit$$"add     0x20,rax\n\t"
15325        $$emit$$"sub     0x4,rcx\n\t"
15326        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327        $$emit$$"add     0x4,rcx\n\t"
15328        $$emit$$"jle     L_end\n\t"
15329        $$emit$$"dec     rcx\n\t"
15330        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331        $$emit$$"vmovq   xmm0,(rax)\n\t"
15332        $$emit$$"add     0x8,rax\n\t"
15333        $$emit$$"dec     rcx\n\t"
15334        $$emit$$"jge     L_sloop\n\t"
15335        $$emit$$"# L_end:\n\t"
15336     } else {
15337        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338     }
15339     $$emit$$"# DONE"
15340   %}
15341   ins_encode %{
15342     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344   %}
15345   ins_pipe(pipe_slow);
15346 %}
15347 
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350                         Universe dummy, rFlagsReg cr)
15351 %{
15352   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353   match(Set dummy (ClearArray cnt base));
15354   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15355 
15356   format %{ $$template
15357     if (UseFastStosb) {
15358        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15359        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15360        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15361     } else if (UseXMMForObjInit) {
15362        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15363        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15364        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15365        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368        $$emit$$"add     0x40,rax\n\t"
15369        $$emit$$"# L_zero_64_bytes:\n\t"
15370        $$emit$$"sub     0x8,rcx\n\t"
15371        $$emit$$"jge     L_loop\n\t"
15372        $$emit$$"add     0x4,rcx\n\t"
15373        $$emit$$"jl      L_tail\n\t"
15374        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375        $$emit$$"add     0x20,rax\n\t"
15376        $$emit$$"sub     0x4,rcx\n\t"
15377        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378        $$emit$$"add     0x4,rcx\n\t"
15379        $$emit$$"jle     L_end\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382        $$emit$$"vmovq   xmm0,(rax)\n\t"
15383        $$emit$$"add     0x8,rax\n\t"
15384        $$emit$$"dec     rcx\n\t"
15385        $$emit$$"jge     L_sloop\n\t"
15386        $$emit$$"# L_end:\n\t"
15387     } else {
15388        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15389        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15390     }
15391   %}
15392   ins_encode %{
15393     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394                  $tmp$$XMMRegister, true, knoreg);
15395   %}
15396   ins_pipe(pipe_slow);
15397 %}
15398 
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401                              Universe dummy, rFlagsReg cr)
15402 %{
15403   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404   match(Set dummy (ClearArray cnt base));
15405   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406 
15407   format %{ $$template
15408     if (UseFastStosb) {
15409        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15410        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15411        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15412     } else if (UseXMMForObjInit) {
15413        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15414        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15415        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15416        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419        $$emit$$"add     0x40,rax\n\t"
15420        $$emit$$"# L_zero_64_bytes:\n\t"
15421        $$emit$$"sub     0x8,rcx\n\t"
15422        $$emit$$"jge     L_loop\n\t"
15423        $$emit$$"add     0x4,rcx\n\t"
15424        $$emit$$"jl      L_tail\n\t"
15425        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426        $$emit$$"add     0x20,rax\n\t"
15427        $$emit$$"sub     0x4,rcx\n\t"
15428        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429        $$emit$$"add     0x4,rcx\n\t"
15430        $$emit$$"jle     L_end\n\t"
15431        $$emit$$"dec     rcx\n\t"
15432        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433        $$emit$$"vmovq   xmm0,(rax)\n\t"
15434        $$emit$$"add     0x8,rax\n\t"
15435        $$emit$$"dec     rcx\n\t"
15436        $$emit$$"jge     L_sloop\n\t"
15437        $$emit$$"# L_end:\n\t"
15438     } else {
15439        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15440        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15441     }
15442   %}
15443   ins_encode %{
15444     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446   %}
15447   ins_pipe(pipe_slow);
15448 %}
15449 
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454   match(Set dummy (ClearArray cnt base));
15455   ins_cost(100);
15456   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15458   ins_encode %{
15459    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460   %}
15461   ins_pipe(pipe_slow);
15462 %}
15463 
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470 
15471   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15472   ins_encode %{
15473     __ string_compare($str1$$Register, $str2$$Register,
15474                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476   %}
15477   ins_pipe( pipe_slow );
15478 %}
15479 
15480 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15481                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15482 %{
15483   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15484   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15485   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15486 
15487   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15488   ins_encode %{
15489     __ string_compare($str1$$Register, $str2$$Register,
15490                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15491                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15492   %}
15493   ins_pipe( pipe_slow );
15494 %}
15495 
15496 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15497                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15498 %{
15499   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15500   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15501   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15502 
15503   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15504   ins_encode %{
15505     __ string_compare($str1$$Register, $str2$$Register,
15506                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15507                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15508   %}
15509   ins_pipe( pipe_slow );
15510 %}
15511 
15512 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15513                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15514 %{
15515   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15516   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15517   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15518 
15519   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15520   ins_encode %{
15521     __ string_compare($str1$$Register, $str2$$Register,
15522                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15523                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15524   %}
15525   ins_pipe( pipe_slow );
15526 %}
15527 
15528 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15529                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15530 %{
15531   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15532   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15533   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15534 
15535   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15536   ins_encode %{
15537     __ string_compare($str1$$Register, $str2$$Register,
15538                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15539                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15540   %}
15541   ins_pipe( pipe_slow );
15542 %}
15543 
15544 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15545                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15546 %{
15547   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15548   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15549   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15550 
15551   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15552   ins_encode %{
15553     __ string_compare($str1$$Register, $str2$$Register,
15554                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15555                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15556   %}
15557   ins_pipe( pipe_slow );
15558 %}
15559 
15560 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15561                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15562 %{
15563   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15564   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15565   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15566 
15567   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15568   ins_encode %{
15569     __ string_compare($str2$$Register, $str1$$Register,
15570                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15571                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15572   %}
15573   ins_pipe( pipe_slow );
15574 %}
15575 
15576 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15577                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15578 %{
15579   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15580   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15581   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15582 
15583   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15584   ins_encode %{
15585     __ string_compare($str2$$Register, $str1$$Register,
15586                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15587                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15588   %}
15589   ins_pipe( pipe_slow );
15590 %}
15591 
15592 // fast search of substring with known size.
15593 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15594                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15595 %{
15596   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15597   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15598   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15599 
15600   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15601   ins_encode %{
15602     int icnt2 = (int)$int_cnt2$$constant;
15603     if (icnt2 >= 16) {
15604       // IndexOf for constant substrings with size >= 16 elements
15605       // which don't need to be loaded through stack.
15606       __ string_indexofC8($str1$$Register, $str2$$Register,
15607                           $cnt1$$Register, $cnt2$$Register,
15608                           icnt2, $result$$Register,
15609                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15610     } else {
15611       // Small strings are loaded through stack if they cross page boundary.
15612       __ string_indexof($str1$$Register, $str2$$Register,
15613                         $cnt1$$Register, $cnt2$$Register,
15614                         icnt2, $result$$Register,
15615                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15616     }
15617   %}
15618   ins_pipe( pipe_slow );
15619 %}
15620 
15621 // fast search of substring with known size.
15622 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15623                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15624 %{
15625   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15626   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15627   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15628 
15629   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15630   ins_encode %{
15631     int icnt2 = (int)$int_cnt2$$constant;
15632     if (icnt2 >= 8) {
15633       // IndexOf for constant substrings with size >= 8 elements
15634       // which don't need to be loaded through stack.
15635       __ string_indexofC8($str1$$Register, $str2$$Register,
15636                           $cnt1$$Register, $cnt2$$Register,
15637                           icnt2, $result$$Register,
15638                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15639     } else {
15640       // Small strings are loaded through stack if they cross page boundary.
15641       __ string_indexof($str1$$Register, $str2$$Register,
15642                         $cnt1$$Register, $cnt2$$Register,
15643                         icnt2, $result$$Register,
15644                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15645     }
15646   %}
15647   ins_pipe( pipe_slow );
15648 %}
15649 
15650 // fast search of substring with known size.
15651 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15652                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15653 %{
15654   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15655   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15656   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15657 
15658   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15659   ins_encode %{
15660     int icnt2 = (int)$int_cnt2$$constant;
15661     if (icnt2 >= 8) {
15662       // IndexOf for constant substrings with size >= 8 elements
15663       // which don't need to be loaded through stack.
15664       __ string_indexofC8($str1$$Register, $str2$$Register,
15665                           $cnt1$$Register, $cnt2$$Register,
15666                           icnt2, $result$$Register,
15667                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15668     } else {
15669       // Small strings are loaded through stack if they cross page boundary.
15670       __ string_indexof($str1$$Register, $str2$$Register,
15671                         $cnt1$$Register, $cnt2$$Register,
15672                         icnt2, $result$$Register,
15673                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15674     }
15675   %}
15676   ins_pipe( pipe_slow );
15677 %}
15678 
15679 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15680                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15681 %{
15682   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15683   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15684   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15685 
15686   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15687   ins_encode %{
15688     __ string_indexof($str1$$Register, $str2$$Register,
15689                       $cnt1$$Register, $cnt2$$Register,
15690                       (-1), $result$$Register,
15691                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15692   %}
15693   ins_pipe( pipe_slow );
15694 %}
15695 
15696 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15697                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15698 %{
15699   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15700   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15701   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15702 
15703   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15704   ins_encode %{
15705     __ string_indexof($str1$$Register, $str2$$Register,
15706                       $cnt1$$Register, $cnt2$$Register,
15707                       (-1), $result$$Register,
15708                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15709   %}
15710   ins_pipe( pipe_slow );
15711 %}
15712 
15713 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15714                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15715 %{
15716   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15717   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15718   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15719 
15720   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15721   ins_encode %{
15722     __ string_indexof($str1$$Register, $str2$$Register,
15723                       $cnt1$$Register, $cnt2$$Register,
15724                       (-1), $result$$Register,
15725                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15726   %}
15727   ins_pipe( pipe_slow );
15728 %}
15729 
15730 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15731                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15732 %{
15733   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15734   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15735   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15736   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15737   ins_encode %{
15738     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15739                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15740   %}
15741   ins_pipe( pipe_slow );
15742 %}
15743 
15744 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15745                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15746 %{
15747   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15748   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15749   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15750   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15751   ins_encode %{
15752     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15753                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15754   %}
15755   ins_pipe( pipe_slow );
15756 %}
15757 
15758 // fast string equals
15759 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15760                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15761 %{
15762   predicate(!VM_Version::supports_avx512vlbw());
15763   match(Set result (StrEquals (Binary str1 str2) cnt));
15764   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15765 
15766   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15767   ins_encode %{
15768     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15769                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15770                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15771   %}
15772   ins_pipe( pipe_slow );
15773 %}
15774 
15775 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15776                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15777 %{
15778   predicate(VM_Version::supports_avx512vlbw());
15779   match(Set result (StrEquals (Binary str1 str2) cnt));
15780   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15781 
15782   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15783   ins_encode %{
15784     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15785                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15786                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15787   %}
15788   ins_pipe( pipe_slow );
15789 %}
15790 
15791 // fast array equals
15792 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15793                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15794 %{
15795   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15796   match(Set result (AryEq ary1 ary2));
15797   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15798 
15799   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15800   ins_encode %{
15801     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15802                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15803                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15804   %}
15805   ins_pipe( pipe_slow );
15806 %}
15807 
15808 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15809                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15810 %{
15811   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15812   match(Set result (AryEq ary1 ary2));
15813   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15814 
15815   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15816   ins_encode %{
15817     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15818                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15819                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15820   %}
15821   ins_pipe( pipe_slow );
15822 %}
15823 
15824 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15825                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15826 %{
15827   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15828   match(Set result (AryEq ary1 ary2));
15829   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15830 
15831   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15832   ins_encode %{
15833     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15834                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15835                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15836   %}
15837   ins_pipe( pipe_slow );
15838 %}
15839 
15840 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15841                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15842 %{
15843   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15844   match(Set result (AryEq ary1 ary2));
15845   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15846 
15847   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15848   ins_encode %{
15849     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15850                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15851                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15852   %}
15853   ins_pipe( pipe_slow );
15854 %}
15855 
15856 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15857                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15858                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15859                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15860                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15861 %{
15862   predicate(UseAVX >= 2);
15863   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15864   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15865          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15866          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15867          USE basic_type, KILL cr);
15868 
15869   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15870   ins_encode %{
15871     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15872                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15873                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15874                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15875                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15876                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15877                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15878   %}
15879   ins_pipe( pipe_slow );
15880 %}
15881 
15882 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15883                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15884 %{
15885   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15886   match(Set result (CountPositives ary1 len));
15887   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15888 
15889   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15890   ins_encode %{
15891     __ count_positives($ary1$$Register, $len$$Register,
15892                        $result$$Register, $tmp3$$Register,
15893                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15894   %}
15895   ins_pipe( pipe_slow );
15896 %}
15897 
15898 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15899                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15900 %{
15901   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15902   match(Set result (CountPositives ary1 len));
15903   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15904 
15905   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15906   ins_encode %{
15907     __ count_positives($ary1$$Register, $len$$Register,
15908                        $result$$Register, $tmp3$$Register,
15909                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15910   %}
15911   ins_pipe( pipe_slow );
15912 %}
15913 
15914 // fast char[] to byte[] compression
15915 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15916                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15917   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15918   match(Set result (StrCompressedCopy src (Binary dst len)));
15919   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15920          USE_KILL len, KILL tmp5, KILL cr);
15921 
15922   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15923   ins_encode %{
15924     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15925                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15926                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15927                            knoreg, knoreg);
15928   %}
15929   ins_pipe( pipe_slow );
15930 %}
15931 
15932 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15933                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15934   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15935   match(Set result (StrCompressedCopy src (Binary dst len)));
15936   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15937          USE_KILL len, KILL tmp5, KILL cr);
15938 
15939   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15940   ins_encode %{
15941     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15942                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15943                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15944                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15945   %}
15946   ins_pipe( pipe_slow );
15947 %}
15948 // fast byte[] to char[] inflation
15949 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15950                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15951   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15952   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15953   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15954 
15955   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15956   ins_encode %{
15957     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15958                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15959   %}
15960   ins_pipe( pipe_slow );
15961 %}
15962 
15963 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15964                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15965   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15966   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15967   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15968 
15969   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15970   ins_encode %{
15971     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15972                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15973   %}
15974   ins_pipe( pipe_slow );
15975 %}
15976 
15977 // encode char[] to byte[] in ISO_8859_1
15978 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15979                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15980                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15981   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15982   match(Set result (EncodeISOArray src (Binary dst len)));
15983   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15984 
15985   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15986   ins_encode %{
15987     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15988                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15989                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15990   %}
15991   ins_pipe( pipe_slow );
15992 %}
15993 
15994 // encode char[] to byte[] in ASCII
15995 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15996                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15997                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15998   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15999   match(Set result (EncodeISOArray src (Binary dst len)));
16000   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16001 
16002   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16003   ins_encode %{
16004     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16005                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16006                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16007   %}
16008   ins_pipe( pipe_slow );
16009 %}
16010 
16011 //----------Overflow Math Instructions-----------------------------------------
16012 
16013 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16014 %{
16015   match(Set cr (OverflowAddI op1 op2));
16016   effect(DEF cr, USE_KILL op1, USE op2);
16017 
16018   format %{ "addl    $op1, $op2\t# overflow check int" %}
16019 
16020   ins_encode %{
16021     __ addl($op1$$Register, $op2$$Register);
16022   %}
16023   ins_pipe(ialu_reg_reg);
16024 %}
16025 
16026 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16027 %{
16028   match(Set cr (OverflowAddI op1 op2));
16029   effect(DEF cr, USE_KILL op1, USE op2);
16030 
16031   format %{ "addl    $op1, $op2\t# overflow check int" %}
16032 
16033   ins_encode %{
16034     __ addl($op1$$Register, $op2$$constant);
16035   %}
16036   ins_pipe(ialu_reg_reg);
16037 %}
16038 
16039 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16040 %{
16041   match(Set cr (OverflowAddL op1 op2));
16042   effect(DEF cr, USE_KILL op1, USE op2);
16043 
16044   format %{ "addq    $op1, $op2\t# overflow check long" %}
16045   ins_encode %{
16046     __ addq($op1$$Register, $op2$$Register);
16047   %}
16048   ins_pipe(ialu_reg_reg);
16049 %}
16050 
16051 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16052 %{
16053   match(Set cr (OverflowAddL op1 op2));
16054   effect(DEF cr, USE_KILL op1, USE op2);
16055 
16056   format %{ "addq    $op1, $op2\t# overflow check long" %}
16057   ins_encode %{
16058     __ addq($op1$$Register, $op2$$constant);
16059   %}
16060   ins_pipe(ialu_reg_reg);
16061 %}
16062 
16063 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16064 %{
16065   match(Set cr (OverflowSubI op1 op2));
16066 
16067   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16068   ins_encode %{
16069     __ cmpl($op1$$Register, $op2$$Register);
16070   %}
16071   ins_pipe(ialu_reg_reg);
16072 %}
16073 
16074 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16075 %{
16076   match(Set cr (OverflowSubI op1 op2));
16077 
16078   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16079   ins_encode %{
16080     __ cmpl($op1$$Register, $op2$$constant);
16081   %}
16082   ins_pipe(ialu_reg_reg);
16083 %}
16084 
16085 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16086 %{
16087   match(Set cr (OverflowSubL op1 op2));
16088 
16089   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16090   ins_encode %{
16091     __ cmpq($op1$$Register, $op2$$Register);
16092   %}
16093   ins_pipe(ialu_reg_reg);
16094 %}
16095 
16096 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16097 %{
16098   match(Set cr (OverflowSubL op1 op2));
16099 
16100   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16101   ins_encode %{
16102     __ cmpq($op1$$Register, $op2$$constant);
16103   %}
16104   ins_pipe(ialu_reg_reg);
16105 %}
16106 
16107 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16108 %{
16109   match(Set cr (OverflowSubI zero op2));
16110   effect(DEF cr, USE_KILL op2);
16111 
16112   format %{ "negl    $op2\t# overflow check int" %}
16113   ins_encode %{
16114     __ negl($op2$$Register);
16115   %}
16116   ins_pipe(ialu_reg_reg);
16117 %}
16118 
16119 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16120 %{
16121   match(Set cr (OverflowSubL zero op2));
16122   effect(DEF cr, USE_KILL op2);
16123 
16124   format %{ "negq    $op2\t# overflow check long" %}
16125   ins_encode %{
16126     __ negq($op2$$Register);
16127   %}
16128   ins_pipe(ialu_reg_reg);
16129 %}
16130 
16131 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16132 %{
16133   match(Set cr (OverflowMulI op1 op2));
16134   effect(DEF cr, USE_KILL op1, USE op2);
16135 
16136   format %{ "imull    $op1, $op2\t# overflow check int" %}
16137   ins_encode %{
16138     __ imull($op1$$Register, $op2$$Register);
16139   %}
16140   ins_pipe(ialu_reg_reg_alu0);
16141 %}
16142 
16143 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16144 %{
16145   match(Set cr (OverflowMulI op1 op2));
16146   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16147 
16148   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16149   ins_encode %{
16150     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16151   %}
16152   ins_pipe(ialu_reg_reg_alu0);
16153 %}
16154 
16155 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16156 %{
16157   match(Set cr (OverflowMulL op1 op2));
16158   effect(DEF cr, USE_KILL op1, USE op2);
16159 
16160   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16161   ins_encode %{
16162     __ imulq($op1$$Register, $op2$$Register);
16163   %}
16164   ins_pipe(ialu_reg_reg_alu0);
16165 %}
16166 
16167 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16168 %{
16169   match(Set cr (OverflowMulL op1 op2));
16170   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16171 
16172   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16173   ins_encode %{
16174     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16175   %}
16176   ins_pipe(ialu_reg_reg_alu0);
16177 %}
16178 
16179 
16180 //----------Control Flow Instructions------------------------------------------
16181 // Signed compare Instructions
16182 
16183 // XXX more variants!!
16184 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16185 %{
16186   match(Set cr (CmpI op1 op2));
16187   effect(DEF cr, USE op1, USE op2);
16188 
16189   format %{ "cmpl    $op1, $op2" %}
16190   ins_encode %{
16191     __ cmpl($op1$$Register, $op2$$Register);
16192   %}
16193   ins_pipe(ialu_cr_reg_reg);
16194 %}
16195 
16196 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16197 %{
16198   match(Set cr (CmpI op1 op2));
16199 
16200   format %{ "cmpl    $op1, $op2" %}
16201   ins_encode %{
16202     __ cmpl($op1$$Register, $op2$$constant);
16203   %}
16204   ins_pipe(ialu_cr_reg_imm);
16205 %}
16206 
16207 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16208 %{
16209   match(Set cr (CmpI op1 (LoadI op2)));
16210 
16211   ins_cost(500); // XXX
16212   format %{ "cmpl    $op1, $op2" %}
16213   ins_encode %{
16214     __ cmpl($op1$$Register, $op2$$Address);
16215   %}
16216   ins_pipe(ialu_cr_reg_mem);
16217 %}
16218 
16219 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16220 %{
16221   match(Set cr (CmpI src zero));
16222 
16223   format %{ "testl   $src, $src" %}
16224   ins_encode %{
16225     __ testl($src$$Register, $src$$Register);
16226   %}
16227   ins_pipe(ialu_cr_reg_imm);
16228 %}
16229 
16230 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16231 %{
16232   match(Set cr (CmpI (AndI src con) zero));
16233 
16234   format %{ "testl   $src, $con" %}
16235   ins_encode %{
16236     __ testl($src$$Register, $con$$constant);
16237   %}
16238   ins_pipe(ialu_cr_reg_imm);
16239 %}
16240 
16241 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16242 %{
16243   match(Set cr (CmpI (AndI src1 src2) zero));
16244 
16245   format %{ "testl   $src1, $src2" %}
16246   ins_encode %{
16247     __ testl($src1$$Register, $src2$$Register);
16248   %}
16249   ins_pipe(ialu_cr_reg_imm);
16250 %}
16251 
16252 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16253 %{
16254   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16255 
16256   format %{ "testl   $src, $mem" %}
16257   ins_encode %{
16258     __ testl($src$$Register, $mem$$Address);
16259   %}
16260   ins_pipe(ialu_cr_reg_mem);
16261 %}
16262 
16263 // Unsigned compare Instructions; really, same as signed except they
16264 // produce an rFlagsRegU instead of rFlagsReg.
16265 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16266 %{
16267   match(Set cr (CmpU op1 op2));
16268 
16269   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16270   ins_encode %{
16271     __ cmpl($op1$$Register, $op2$$Register);
16272   %}
16273   ins_pipe(ialu_cr_reg_reg);
16274 %}
16275 
16276 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16277 %{
16278   match(Set cr (CmpU op1 op2));
16279 
16280   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16281   ins_encode %{
16282     __ cmpl($op1$$Register, $op2$$constant);
16283   %}
16284   ins_pipe(ialu_cr_reg_imm);
16285 %}
16286 
16287 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16288 %{
16289   match(Set cr (CmpU op1 (LoadI op2)));
16290 
16291   ins_cost(500); // XXX
16292   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16293   ins_encode %{
16294     __ cmpl($op1$$Register, $op2$$Address);
16295   %}
16296   ins_pipe(ialu_cr_reg_mem);
16297 %}
16298 
16299 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16300 %{
16301   match(Set cr (CmpU src zero));
16302 
16303   format %{ "testl   $src, $src\t# unsigned" %}
16304   ins_encode %{
16305     __ testl($src$$Register, $src$$Register);
16306   %}
16307   ins_pipe(ialu_cr_reg_imm);
16308 %}
16309 
16310 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16311 %{
16312   match(Set cr (CmpP op1 op2));
16313 
16314   format %{ "cmpq    $op1, $op2\t# ptr" %}
16315   ins_encode %{
16316     __ cmpq($op1$$Register, $op2$$Register);
16317   %}
16318   ins_pipe(ialu_cr_reg_reg);
16319 %}
16320 
16321 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16322 %{
16323   match(Set cr (CmpP op1 (LoadP op2)));
16324   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16325 
16326   ins_cost(500); // XXX
16327   format %{ "cmpq    $op1, $op2\t# ptr" %}
16328   ins_encode %{
16329     __ cmpq($op1$$Register, $op2$$Address);
16330   %}
16331   ins_pipe(ialu_cr_reg_mem);
16332 %}
16333 
16334 // XXX this is generalized by compP_rReg_mem???
16335 // Compare raw pointer (used in out-of-heap check).
16336 // Only works because non-oop pointers must be raw pointers
16337 // and raw pointers have no anti-dependencies.
16338 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16339 %{
16340   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16341             n->in(2)->as_Load()->barrier_data() == 0);
16342   match(Set cr (CmpP op1 (LoadP op2)));
16343 
16344   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16345   ins_encode %{
16346     __ cmpq($op1$$Register, $op2$$Address);
16347   %}
16348   ins_pipe(ialu_cr_reg_mem);
16349 %}
16350 
16351 // This will generate a signed flags result. This should be OK since
16352 // any compare to a zero should be eq/neq.
16353 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16354 %{
16355   match(Set cr (CmpP src zero));
16356 
16357   format %{ "testq   $src, $src\t# ptr" %}
16358   ins_encode %{
16359     __ testq($src$$Register, $src$$Register);
16360   %}
16361   ins_pipe(ialu_cr_reg_imm);
16362 %}
16363 
16364 // This will generate a signed flags result. This should be OK since
16365 // any compare to a zero should be eq/neq.
16366 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16367 %{
16368   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16369             n->in(1)->as_Load()->barrier_data() == 0);
16370   match(Set cr (CmpP (LoadP op) zero));
16371 
16372   ins_cost(500); // XXX
16373   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16374   ins_encode %{
16375     __ testq($op$$Address, 0xFFFFFFFF);
16376   %}
16377   ins_pipe(ialu_cr_reg_imm);
16378 %}
16379 
16380 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16381 %{
16382   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16383             n->in(1)->as_Load()->barrier_data() == 0);
16384   match(Set cr (CmpP (LoadP mem) zero));
16385 
16386   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16387   ins_encode %{
16388     __ cmpq(r12, $mem$$Address);
16389   %}
16390   ins_pipe(ialu_cr_reg_mem);
16391 %}
16392 
16393 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16394 %{
16395   match(Set cr (CmpN op1 op2));
16396 
16397   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16398   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16399   ins_pipe(ialu_cr_reg_reg);
16400 %}
16401 
16402 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16403 %{
16404   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16405   match(Set cr (CmpN src (LoadN mem)));
16406 
16407   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16408   ins_encode %{
16409     __ cmpl($src$$Register, $mem$$Address);
16410   %}
16411   ins_pipe(ialu_cr_reg_mem);
16412 %}
16413 
16414 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16415   match(Set cr (CmpN op1 op2));
16416 
16417   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16418   ins_encode %{
16419     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16420   %}
16421   ins_pipe(ialu_cr_reg_imm);
16422 %}
16423 
16424 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16425 %{
16426   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16427   match(Set cr (CmpN src (LoadN mem)));
16428 
16429   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16430   ins_encode %{
16431     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16432   %}
16433   ins_pipe(ialu_cr_reg_mem);
16434 %}
16435 
16436 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16437   match(Set cr (CmpN op1 op2));
16438 
16439   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16440   ins_encode %{
16441     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16442   %}
16443   ins_pipe(ialu_cr_reg_imm);
16444 %}
16445 
16446 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16447 %{
16448   predicate(!UseCompactObjectHeaders);
16449   match(Set cr (CmpN src (LoadNKlass mem)));
16450 
16451   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16452   ins_encode %{
16453     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16454   %}
16455   ins_pipe(ialu_cr_reg_mem);
16456 %}
16457 
16458 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16459   match(Set cr (CmpN src zero));
16460 
16461   format %{ "testl   $src, $src\t# compressed ptr" %}
16462   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16463   ins_pipe(ialu_cr_reg_imm);
16464 %}
16465 
16466 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16467 %{
16468   predicate(CompressedOops::base() != nullptr &&
16469             n->in(1)->as_Load()->barrier_data() == 0);
16470   match(Set cr (CmpN (LoadN mem) zero));
16471 
16472   ins_cost(500); // XXX
16473   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16474   ins_encode %{
16475     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16476   %}
16477   ins_pipe(ialu_cr_reg_mem);
16478 %}
16479 
16480 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16481 %{
16482   predicate(CompressedOops::base() == nullptr &&
16483             n->in(1)->as_Load()->barrier_data() == 0);
16484   match(Set cr (CmpN (LoadN mem) zero));
16485 
16486   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16487   ins_encode %{
16488     __ cmpl(r12, $mem$$Address);
16489   %}
16490   ins_pipe(ialu_cr_reg_mem);
16491 %}
16492 
16493 // Yanked all unsigned pointer compare operations.
16494 // Pointer compares are done with CmpP which is already unsigned.
16495 
16496 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16497 %{
16498   match(Set cr (CmpL op1 op2));
16499 
16500   format %{ "cmpq    $op1, $op2" %}
16501   ins_encode %{
16502     __ cmpq($op1$$Register, $op2$$Register);
16503   %}
16504   ins_pipe(ialu_cr_reg_reg);
16505 %}
16506 
16507 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16508 %{
16509   match(Set cr (CmpL op1 op2));
16510 
16511   format %{ "cmpq    $op1, $op2" %}
16512   ins_encode %{
16513     __ cmpq($op1$$Register, $op2$$constant);
16514   %}
16515   ins_pipe(ialu_cr_reg_imm);
16516 %}
16517 
16518 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16519 %{
16520   match(Set cr (CmpL op1 (LoadL op2)));
16521 
16522   format %{ "cmpq    $op1, $op2" %}
16523   ins_encode %{
16524     __ cmpq($op1$$Register, $op2$$Address);
16525   %}
16526   ins_pipe(ialu_cr_reg_mem);
16527 %}
16528 
16529 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16530 %{
16531   match(Set cr (CmpL src zero));
16532 
16533   format %{ "testq   $src, $src" %}
16534   ins_encode %{
16535     __ testq($src$$Register, $src$$Register);
16536   %}
16537   ins_pipe(ialu_cr_reg_imm);
16538 %}
16539 
16540 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16541 %{
16542   match(Set cr (CmpL (AndL src con) zero));
16543 
16544   format %{ "testq   $src, $con\t# long" %}
16545   ins_encode %{
16546     __ testq($src$$Register, $con$$constant);
16547   %}
16548   ins_pipe(ialu_cr_reg_imm);
16549 %}
16550 
16551 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16552 %{
16553   match(Set cr (CmpL (AndL src1 src2) zero));
16554 
16555   format %{ "testq   $src1, $src2\t# long" %}
16556   ins_encode %{
16557     __ testq($src1$$Register, $src2$$Register);
16558   %}
16559   ins_pipe(ialu_cr_reg_imm);
16560 %}
16561 
16562 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16563 %{
16564   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16565 
16566   format %{ "testq   $src, $mem" %}
16567   ins_encode %{
16568     __ testq($src$$Register, $mem$$Address);
16569   %}
16570   ins_pipe(ialu_cr_reg_mem);
16571 %}
16572 
16573 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16574 %{
16575   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16576 
16577   format %{ "testq   $src, $mem" %}
16578   ins_encode %{
16579     __ testq($src$$Register, $mem$$Address);
16580   %}
16581   ins_pipe(ialu_cr_reg_mem);
16582 %}
16583 
16584 // Manifest a CmpU result in an integer register.  Very painful.
16585 // This is the test to avoid.
16586 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16587 %{
16588   match(Set dst (CmpU3 src1 src2));
16589   effect(KILL flags);
16590 
16591   ins_cost(275); // XXX
16592   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16593             "movl    $dst, -1\n\t"
16594             "jb,u    done\n\t"
16595             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16596     "done:" %}
16597   ins_encode %{
16598     Label done;
16599     __ cmpl($src1$$Register, $src2$$Register);
16600     __ movl($dst$$Register, -1);
16601     __ jccb(Assembler::below, done);
16602     __ setcc(Assembler::notZero, $dst$$Register);
16603     __ bind(done);
16604   %}
16605   ins_pipe(pipe_slow);
16606 %}
16607 
16608 // Manifest a CmpL result in an integer register.  Very painful.
16609 // This is the test to avoid.
16610 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16611 %{
16612   match(Set dst (CmpL3 src1 src2));
16613   effect(KILL flags);
16614 
16615   ins_cost(275); // XXX
16616   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16617             "movl    $dst, -1\n\t"
16618             "jl,s    done\n\t"
16619             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16620     "done:" %}
16621   ins_encode %{
16622     Label done;
16623     __ cmpq($src1$$Register, $src2$$Register);
16624     __ movl($dst$$Register, -1);
16625     __ jccb(Assembler::less, done);
16626     __ setcc(Assembler::notZero, $dst$$Register);
16627     __ bind(done);
16628   %}
16629   ins_pipe(pipe_slow);
16630 %}
16631 
16632 // Manifest a CmpUL result in an integer register.  Very painful.
16633 // This is the test to avoid.
16634 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16635 %{
16636   match(Set dst (CmpUL3 src1 src2));
16637   effect(KILL flags);
16638 
16639   ins_cost(275); // XXX
16640   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16641             "movl    $dst, -1\n\t"
16642             "jb,u    done\n\t"
16643             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16644     "done:" %}
16645   ins_encode %{
16646     Label done;
16647     __ cmpq($src1$$Register, $src2$$Register);
16648     __ movl($dst$$Register, -1);
16649     __ jccb(Assembler::below, done);
16650     __ setcc(Assembler::notZero, $dst$$Register);
16651     __ bind(done);
16652   %}
16653   ins_pipe(pipe_slow);
16654 %}
16655 
16656 // Unsigned long compare Instructions; really, same as signed long except they
16657 // produce an rFlagsRegU instead of rFlagsReg.
16658 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16659 %{
16660   match(Set cr (CmpUL op1 op2));
16661 
16662   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16663   ins_encode %{
16664     __ cmpq($op1$$Register, $op2$$Register);
16665   %}
16666   ins_pipe(ialu_cr_reg_reg);
16667 %}
16668 
16669 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16670 %{
16671   match(Set cr (CmpUL op1 op2));
16672 
16673   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16674   ins_encode %{
16675     __ cmpq($op1$$Register, $op2$$constant);
16676   %}
16677   ins_pipe(ialu_cr_reg_imm);
16678 %}
16679 
16680 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16681 %{
16682   match(Set cr (CmpUL op1 (LoadL op2)));
16683 
16684   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16685   ins_encode %{
16686     __ cmpq($op1$$Register, $op2$$Address);
16687   %}
16688   ins_pipe(ialu_cr_reg_mem);
16689 %}
16690 
16691 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16692 %{
16693   match(Set cr (CmpUL src zero));
16694 
16695   format %{ "testq   $src, $src\t# unsigned" %}
16696   ins_encode %{
16697     __ testq($src$$Register, $src$$Register);
16698   %}
16699   ins_pipe(ialu_cr_reg_imm);
16700 %}
16701 
16702 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16703 %{
16704   match(Set cr (CmpI (LoadB mem) imm));
16705 
16706   ins_cost(125);
16707   format %{ "cmpb    $mem, $imm" %}
16708   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16709   ins_pipe(ialu_cr_reg_mem);
16710 %}
16711 
16712 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16713 %{
16714   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16715 
16716   ins_cost(125);
16717   format %{ "testb   $mem, $imm\t# ubyte" %}
16718   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16719   ins_pipe(ialu_cr_reg_mem);
16720 %}
16721 
16722 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16723 %{
16724   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16725 
16726   ins_cost(125);
16727   format %{ "testb   $mem, $imm\t# byte" %}
16728   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16729   ins_pipe(ialu_cr_reg_mem);
16730 %}
16731 
16732 //----------Max and Min--------------------------------------------------------
16733 // Min Instructions
16734 
16735 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16736 %{
16737   predicate(!UseAPX);
16738   effect(USE_DEF dst, USE src, USE cr);
16739 
16740   format %{ "cmovlgt $dst, $src\t# min" %}
16741   ins_encode %{
16742     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16743   %}
16744   ins_pipe(pipe_cmov_reg);
16745 %}
16746 
16747 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16748 %{
16749   predicate(UseAPX);
16750   effect(DEF dst, USE src1, USE src2, USE cr);
16751 
16752   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16753   ins_encode %{
16754     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16755   %}
16756   ins_pipe(pipe_cmov_reg);
16757 %}
16758 
16759 instruct minI_rReg(rRegI dst, rRegI src)
16760 %{
16761   predicate(!UseAPX);
16762   match(Set dst (MinI dst src));
16763 
16764   ins_cost(200);
16765   expand %{
16766     rFlagsReg cr;
16767     compI_rReg(cr, dst, src);
16768     cmovI_reg_g(dst, src, cr);
16769   %}
16770 %}
16771 
16772 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16773 %{
16774   predicate(UseAPX);
16775   match(Set dst (MinI src1 src2));
16776   effect(DEF dst, USE src1, USE src2);
16777   flag(PD::Flag_ndd_demotable_opr1);
16778 
16779   ins_cost(200);
16780   expand %{
16781     rFlagsReg cr;
16782     compI_rReg(cr, src1, src2);
16783     cmovI_reg_g_ndd(dst, src1, src2, cr);
16784   %}
16785 %}
16786 
16787 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16788 %{
16789   predicate(!UseAPX);
16790   effect(USE_DEF dst, USE src, USE cr);
16791 
16792   format %{ "cmovllt $dst, $src\t# max" %}
16793   ins_encode %{
16794     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16795   %}
16796   ins_pipe(pipe_cmov_reg);
16797 %}
16798 
16799 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16800 %{
16801   predicate(UseAPX);
16802   effect(DEF dst, USE src1, USE src2, USE cr);
16803 
16804   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16805   ins_encode %{
16806     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16807   %}
16808   ins_pipe(pipe_cmov_reg);
16809 %}
16810 
16811 instruct maxI_rReg(rRegI dst, rRegI src)
16812 %{
16813   predicate(!UseAPX);
16814   match(Set dst (MaxI dst src));
16815 
16816   ins_cost(200);
16817   expand %{
16818     rFlagsReg cr;
16819     compI_rReg(cr, dst, src);
16820     cmovI_reg_l(dst, src, cr);
16821   %}
16822 %}
16823 
16824 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16825 %{
16826   predicate(UseAPX);
16827   match(Set dst (MaxI src1 src2));
16828   effect(DEF dst, USE src1, USE src2);
16829   flag(PD::Flag_ndd_demotable_opr1);
16830 
16831   ins_cost(200);
16832   expand %{
16833     rFlagsReg cr;
16834     compI_rReg(cr, src1, src2);
16835     cmovI_reg_l_ndd(dst, src1, src2, cr);
16836   %}
16837 %}
16838 
16839 // ============================================================================
16840 // Branch Instructions
16841 
16842 // Jump Direct - Label defines a relative address from JMP+1
16843 instruct jmpDir(label labl)
16844 %{
16845   match(Goto);
16846   effect(USE labl);
16847 
16848   ins_cost(300);
16849   format %{ "jmp     $labl" %}
16850   size(5);
16851   ins_encode %{
16852     Label* L = $labl$$label;
16853     __ jmp(*L, false); // Always long jump
16854   %}
16855   ins_pipe(pipe_jmp);
16856 %}
16857 
16858 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16859 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16860 %{
16861   match(If cop cr);
16862   effect(USE labl);
16863 
16864   ins_cost(300);
16865   format %{ "j$cop     $labl" %}
16866   size(6);
16867   ins_encode %{
16868     Label* L = $labl$$label;
16869     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16870   %}
16871   ins_pipe(pipe_jcc);
16872 %}
16873 
16874 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16875 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16876 %{
16877   match(CountedLoopEnd cop cr);
16878   effect(USE labl);
16879 
16880   ins_cost(300);
16881   format %{ "j$cop     $labl\t# loop end" %}
16882   size(6);
16883   ins_encode %{
16884     Label* L = $labl$$label;
16885     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16886   %}
16887   ins_pipe(pipe_jcc);
16888 %}
16889 
16890 // Jump Direct Conditional - using unsigned comparison
16891 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16892   match(If cop cmp);
16893   effect(USE labl);
16894 
16895   ins_cost(300);
16896   format %{ "j$cop,u   $labl" %}
16897   size(6);
16898   ins_encode %{
16899     Label* L = $labl$$label;
16900     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16901   %}
16902   ins_pipe(pipe_jcc);
16903 %}
16904 
16905 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16906   match(If cop cmp);
16907   effect(USE labl);
16908 
16909   ins_cost(200);
16910   format %{ "j$cop,u   $labl" %}
16911   size(6);
16912   ins_encode %{
16913     Label* L = $labl$$label;
16914     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16915   %}
16916   ins_pipe(pipe_jcc);
16917 %}
16918 
16919 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16920   match(If cop cmp);
16921   effect(USE labl);
16922 
16923   ins_cost(200);
16924   format %{ $$template
16925     if ($cop$$cmpcode == Assembler::notEqual) {
16926       $$emit$$"jp,u    $labl\n\t"
16927       $$emit$$"j$cop,u   $labl"
16928     } else {
16929       $$emit$$"jp,u    done\n\t"
16930       $$emit$$"j$cop,u   $labl\n\t"
16931       $$emit$$"done:"
16932     }
16933   %}
16934   ins_encode %{
16935     Label* l = $labl$$label;
16936     if ($cop$$cmpcode == Assembler::notEqual) {
16937       __ jcc(Assembler::parity, *l, false);
16938       __ jcc(Assembler::notEqual, *l, false);
16939     } else if ($cop$$cmpcode == Assembler::equal) {
16940       Label done;
16941       __ jccb(Assembler::parity, done);
16942       __ jcc(Assembler::equal, *l, false);
16943       __ bind(done);
16944     } else {
16945        ShouldNotReachHere();
16946     }
16947   %}
16948   ins_pipe(pipe_jcc);
16949 %}
16950 
16951 // Jump Direct Conditional - using signed and unsigned comparison
16952 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16953   match(If cop cmp);
16954   effect(USE labl);
16955 
16956   ins_cost(200);
16957   format %{ "j$cop,su   $labl" %}
16958   size(6);
16959   ins_encode %{
16960     Label* L = $labl$$label;
16961     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16962   %}
16963   ins_pipe(pipe_jcc);
16964 %}
16965 
16966 // ============================================================================
16967 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16968 // superklass array for an instance of the superklass.  Set a hidden
16969 // internal cache on a hit (cache is checked with exposed code in
16970 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16971 // encoding ALSO sets flags.
16972 
16973 instruct partialSubtypeCheck(rdi_RegP result,
16974                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16975                              rFlagsReg cr)
16976 %{
16977   match(Set result (PartialSubtypeCheck sub super));
16978   predicate(!UseSecondarySupersTable);
16979   effect(KILL rcx, KILL cr);
16980 
16981   ins_cost(1100);  // slightly larger than the next version
16982   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16983             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16984             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16985             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16986             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16987             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16988             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16989     "miss:\t" %}
16990 
16991   ins_encode %{
16992     Label miss;
16993     // NB: Callers may assume that, when $result is a valid register,
16994     // check_klass_subtype_slow_path_linear sets it to a nonzero
16995     // value.
16996     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16997                                             $rcx$$Register, $result$$Register,
16998                                             nullptr, &miss,
16999                                             /*set_cond_codes:*/ true);
17000     __ xorptr($result$$Register, $result$$Register);
17001     __ bind(miss);
17002   %}
17003 
17004   ins_pipe(pipe_slow);
17005 %}
17006 
17007 // ============================================================================
17008 // Two versions of hashtable-based partialSubtypeCheck, both used when
17009 // we need to search for a super class in the secondary supers array.
17010 // The first is used when we don't know _a priori_ the class being
17011 // searched for. The second, far more common, is used when we do know:
17012 // this is used for instanceof, checkcast, and any case where C2 can
17013 // determine it by constant propagation.
17014 
17015 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17016                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17017                                        rFlagsReg cr)
17018 %{
17019   match(Set result (PartialSubtypeCheck sub super));
17020   predicate(UseSecondarySupersTable);
17021   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17022 
17023   ins_cost(1000);
17024   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17025 
17026   ins_encode %{
17027     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17028 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17029   %}
17030 
17031   ins_pipe(pipe_slow);
17032 %}
17033 
17034 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17035                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17036                                        rFlagsReg cr)
17037 %{
17038   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17039   predicate(UseSecondarySupersTable);
17040   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17041 
17042   ins_cost(700);  // smaller than the next version
17043   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17044 
17045   ins_encode %{
17046     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17047     if (InlineSecondarySupersTest) {
17048       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17049                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17050                                        super_klass_slot);
17051     } else {
17052       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17053     }
17054   %}
17055 
17056   ins_pipe(pipe_slow);
17057 %}
17058 
17059 // ============================================================================
17060 // Branch Instructions -- short offset versions
17061 //
17062 // These instructions are used to replace jumps of a long offset (the default
17063 // match) with jumps of a shorter offset.  These instructions are all tagged
17064 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17065 // match rules in general matching.  Instead, the ADLC generates a conversion
17066 // method in the MachNode which can be used to do in-place replacement of the
17067 // long variant with the shorter variant.  The compiler will determine if a
17068 // branch can be taken by the is_short_branch_offset() predicate in the machine
17069 // specific code section of the file.
17070 
17071 // Jump Direct - Label defines a relative address from JMP+1
17072 instruct jmpDir_short(label labl) %{
17073   match(Goto);
17074   effect(USE labl);
17075 
17076   ins_cost(300);
17077   format %{ "jmp,s   $labl" %}
17078   size(2);
17079   ins_encode %{
17080     Label* L = $labl$$label;
17081     __ jmpb(*L);
17082   %}
17083   ins_pipe(pipe_jmp);
17084   ins_short_branch(1);
17085 %}
17086 
17087 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17088 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17089   match(If cop cr);
17090   effect(USE labl);
17091 
17092   ins_cost(300);
17093   format %{ "j$cop,s   $labl" %}
17094   size(2);
17095   ins_encode %{
17096     Label* L = $labl$$label;
17097     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17098   %}
17099   ins_pipe(pipe_jcc);
17100   ins_short_branch(1);
17101 %}
17102 
17103 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17104 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17105   match(CountedLoopEnd cop cr);
17106   effect(USE labl);
17107 
17108   ins_cost(300);
17109   format %{ "j$cop,s   $labl\t# loop end" %}
17110   size(2);
17111   ins_encode %{
17112     Label* L = $labl$$label;
17113     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17114   %}
17115   ins_pipe(pipe_jcc);
17116   ins_short_branch(1);
17117 %}
17118 
17119 // Jump Direct Conditional - using unsigned comparison
17120 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17121   match(If cop cmp);
17122   effect(USE labl);
17123 
17124   ins_cost(300);
17125   format %{ "j$cop,us  $labl" %}
17126   size(2);
17127   ins_encode %{
17128     Label* L = $labl$$label;
17129     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17130   %}
17131   ins_pipe(pipe_jcc);
17132   ins_short_branch(1);
17133 %}
17134 
17135 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17136   match(If cop cmp);
17137   effect(USE labl);
17138 
17139   ins_cost(300);
17140   format %{ "j$cop,us  $labl" %}
17141   size(2);
17142   ins_encode %{
17143     Label* L = $labl$$label;
17144     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17145   %}
17146   ins_pipe(pipe_jcc);
17147   ins_short_branch(1);
17148 %}
17149 
17150 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17151   match(If cop cmp);
17152   effect(USE labl);
17153 
17154   ins_cost(300);
17155   format %{ $$template
17156     if ($cop$$cmpcode == Assembler::notEqual) {
17157       $$emit$$"jp,u,s  $labl\n\t"
17158       $$emit$$"j$cop,u,s  $labl"
17159     } else {
17160       $$emit$$"jp,u,s  done\n\t"
17161       $$emit$$"j$cop,u,s  $labl\n\t"
17162       $$emit$$"done:"
17163     }
17164   %}
17165   size(4);
17166   ins_encode %{
17167     Label* l = $labl$$label;
17168     if ($cop$$cmpcode == Assembler::notEqual) {
17169       __ jccb(Assembler::parity, *l);
17170       __ jccb(Assembler::notEqual, *l);
17171     } else if ($cop$$cmpcode == Assembler::equal) {
17172       Label done;
17173       __ jccb(Assembler::parity, done);
17174       __ jccb(Assembler::equal, *l);
17175       __ bind(done);
17176     } else {
17177        ShouldNotReachHere();
17178     }
17179   %}
17180   ins_pipe(pipe_jcc);
17181   ins_short_branch(1);
17182 %}
17183 
17184 // Jump Direct Conditional - using signed and unsigned comparison
17185 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17186   match(If cop cmp);
17187   effect(USE labl);
17188 
17189   ins_cost(300);
17190   format %{ "j$cop,sus  $labl" %}
17191   size(2);
17192   ins_encode %{
17193     Label* L = $labl$$label;
17194     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17195   %}
17196   ins_pipe(pipe_jcc);
17197   ins_short_branch(1);
17198 %}
17199 
17200 // ============================================================================
17201 // inlined locking and unlocking
17202 
17203 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17204   match(Set cr (FastLock object box));
17205   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17206   ins_cost(300);
17207   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17208   ins_encode %{
17209     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17210   %}
17211   ins_pipe(pipe_slow);
17212 %}
17213 
17214 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17215   match(Set cr (FastUnlock object rax_reg));
17216   effect(TEMP tmp, USE_KILL rax_reg);
17217   ins_cost(300);
17218   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17219   ins_encode %{
17220     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17221   %}
17222   ins_pipe(pipe_slow);
17223 %}
17224 
17225 
17226 // ============================================================================
17227 // Safepoint Instructions
17228 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17229 %{
17230   match(SafePoint poll);
17231   effect(KILL cr, USE poll);
17232 
17233   format %{ "testl   rax, [$poll]\t"
17234             "# Safepoint: poll for GC" %}
17235   ins_cost(125);
17236   ins_encode %{
17237     __ relocate(relocInfo::poll_type);
17238     address pre_pc = __ pc();
17239     __ testl(rax, Address($poll$$Register, 0));
17240     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17241   %}
17242   ins_pipe(ialu_reg_mem);
17243 %}
17244 
17245 instruct mask_all_evexL(kReg dst, rRegL src) %{
17246   match(Set dst (MaskAll src));
17247   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17248   ins_encode %{
17249     int mask_len = Matcher::vector_length(this);
17250     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17251   %}
17252   ins_pipe( pipe_slow );
17253 %}
17254 
17255 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17256   predicate(Matcher::vector_length(n) > 32);
17257   match(Set dst (MaskAll src));
17258   effect(TEMP tmp);
17259   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17260   ins_encode %{
17261     int mask_len = Matcher::vector_length(this);
17262     __ movslq($tmp$$Register, $src$$Register);
17263     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17264   %}
17265   ins_pipe( pipe_slow );
17266 %}
17267 
17268 // ============================================================================
17269 // Procedure Call/Return Instructions
17270 // Call Java Static Instruction
17271 // Note: If this code changes, the corresponding ret_addr_offset() and
17272 //       compute_padding() functions will have to be adjusted.
17273 instruct CallStaticJavaDirect(method meth) %{
17274   match(CallStaticJava);
17275   effect(USE meth);
17276 
17277   ins_cost(300);
17278   format %{ "call,static " %}
17279   opcode(0xE8); /* E8 cd */
17280   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17281   ins_pipe(pipe_slow);
17282   ins_alignment(4);
17283 %}
17284 
17285 // Call Java Dynamic Instruction
17286 // Note: If this code changes, the corresponding ret_addr_offset() and
17287 //       compute_padding() functions will have to be adjusted.
17288 instruct CallDynamicJavaDirect(method meth)
17289 %{
17290   match(CallDynamicJava);
17291   effect(USE meth);
17292 
17293   ins_cost(300);
17294   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17295             "call,dynamic " %}
17296   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17297   ins_pipe(pipe_slow);
17298   ins_alignment(4);
17299 %}
17300 
17301 // Call Runtime Instruction
17302 instruct CallRuntimeDirect(method meth)
17303 %{
17304   match(CallRuntime);
17305   effect(USE meth);
17306 
17307   ins_cost(300);
17308   format %{ "call,runtime " %}
17309   ins_encode(clear_avx, Java_To_Runtime(meth));
17310   ins_pipe(pipe_slow);
17311 %}
17312 
17313 // Call runtime without safepoint
17314 instruct CallLeafDirect(method meth)
17315 %{
17316   match(CallLeaf);
17317   effect(USE meth);
17318 
17319   ins_cost(300);
17320   format %{ "call_leaf,runtime " %}
17321   ins_encode(clear_avx, Java_To_Runtime(meth));
17322   ins_pipe(pipe_slow);
17323 %}
17324 
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328   match(CallLeafVector);
17329   effect(USE meth);
17330 
17331   ins_cost(300);
17332   format %{ "call_leaf,vector " %}
17333   ins_encode(Java_To_Runtime(meth));
17334   ins_pipe(pipe_slow);
17335 %}
17336 
17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{
17340   match(CallLeafNoFP);
17341   effect(USE meth);
17342 
17343   ins_cost(300);
17344   format %{ "call_leaf_nofp,runtime " %}
17345   ins_encode(clear_avx, Java_To_Runtime(meth));
17346   ins_pipe(pipe_slow);
17347 %}
17348 
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355   match(Return);
17356 
17357   format %{ "ret" %}
17358   ins_encode %{
17359     __ ret(0);
17360   %}
17361   ins_pipe(pipe_jmp);
17362 %}
17363 
17364 // Tail Call; Jump from runtime stub to Java code.
17365 // Also known as an 'interprocedural jump'.
17366 // Target of jump will eventually return to caller.
17367 // TailJump below removes the return address.
17368 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17369 // emitted just above the TailCall which has reset rbp to the caller state.
17370 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17371 %{
17372   match(TailCall jump_target method_ptr);
17373 
17374   ins_cost(300);
17375   format %{ "jmp     $jump_target\t# rbx holds method" %}
17376   ins_encode %{
17377     __ jmp($jump_target$$Register);
17378   %}
17379   ins_pipe(pipe_jmp);
17380 %}
17381 
17382 // Tail Jump; remove the return address; jump to target.
17383 // TailCall above leaves the return address around.
17384 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17385 %{
17386   match(TailJump jump_target ex_oop);
17387 
17388   ins_cost(300);
17389   format %{ "popq    rdx\t# pop return address\n\t"
17390             "jmp     $jump_target" %}
17391   ins_encode %{
17392     __ popq(as_Register(RDX_enc));
17393     __ jmp($jump_target$$Register);
17394   %}
17395   ins_pipe(pipe_jmp);
17396 %}
17397 
17398 // Forward exception.
17399 instruct ForwardExceptionjmp()
17400 %{
17401   match(ForwardException);
17402 
17403   format %{ "jmp     forward_exception_stub" %}
17404   ins_encode %{
17405     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17406   %}
17407   ins_pipe(pipe_jmp);
17408 %}
17409 
17410 // Create exception oop: created by stack-crawling runtime code.
17411 // Created exception is now available to this handler, and is setup
17412 // just prior to jumping to this handler.  No code emitted.
17413 instruct CreateException(rax_RegP ex_oop)
17414 %{
17415   match(Set ex_oop (CreateEx));
17416 
17417   size(0);
17418   // use the following format syntax
17419   format %{ "# exception oop is in rax; no code emitted" %}
17420   ins_encode();
17421   ins_pipe(empty);
17422 %}
17423 
17424 // Rethrow exception:
17425 // The exception oop will come in the first argument position.
17426 // Then JUMP (not call) to the rethrow stub code.
17427 instruct RethrowException()
17428 %{
17429   match(Rethrow);
17430 
17431   // use the following format syntax
17432   format %{ "jmp     rethrow_stub" %}
17433   ins_encode %{
17434     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17435   %}
17436   ins_pipe(pipe_jmp);
17437 %}
17438 
17439 // ============================================================================
17440 // This name is KNOWN by the ADLC and cannot be changed.
17441 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17442 // for this guy.
17443 instruct tlsLoadP(r15_RegP dst) %{
17444   match(Set dst (ThreadLocal));
17445   effect(DEF dst);
17446 
17447   size(0);
17448   format %{ "# TLS is in R15" %}
17449   ins_encode( /*empty encoding*/ );
17450   ins_pipe(ialu_reg_reg);
17451 %}
17452 
17453 instruct addF_reg(regF dst, regF src) %{
17454   predicate(UseAVX == 0);
17455   match(Set dst (AddF dst src));
17456 
17457   format %{ "addss   $dst, $src" %}
17458   ins_cost(150);
17459   ins_encode %{
17460     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17461   %}
17462   ins_pipe(pipe_slow);
17463 %}
17464 
17465 instruct addF_mem(regF dst, memory src) %{
17466   predicate(UseAVX == 0);
17467   match(Set dst (AddF dst (LoadF src)));
17468 
17469   format %{ "addss   $dst, $src" %}
17470   ins_cost(150);
17471   ins_encode %{
17472     __ addss($dst$$XMMRegister, $src$$Address);
17473   %}
17474   ins_pipe(pipe_slow);
17475 %}
17476 
17477 instruct addF_imm(regF dst, immF con) %{
17478   predicate(UseAVX == 0);
17479   match(Set dst (AddF dst con));
17480   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17481   ins_cost(150);
17482   ins_encode %{
17483     __ addss($dst$$XMMRegister, $constantaddress($con));
17484   %}
17485   ins_pipe(pipe_slow);
17486 %}
17487 
17488 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17489   predicate(UseAVX > 0);
17490   match(Set dst (AddF src1 src2));
17491 
17492   format %{ "vaddss  $dst, $src1, $src2" %}
17493   ins_cost(150);
17494   ins_encode %{
17495     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17496   %}
17497   ins_pipe(pipe_slow);
17498 %}
17499 
17500 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17501   predicate(UseAVX > 0);
17502   match(Set dst (AddF src1 (LoadF src2)));
17503 
17504   format %{ "vaddss  $dst, $src1, $src2" %}
17505   ins_cost(150);
17506   ins_encode %{
17507     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17508   %}
17509   ins_pipe(pipe_slow);
17510 %}
17511 
17512 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17513   predicate(UseAVX > 0);
17514   match(Set dst (AddF src con));
17515 
17516   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17517   ins_cost(150);
17518   ins_encode %{
17519     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17520   %}
17521   ins_pipe(pipe_slow);
17522 %}
17523 
17524 instruct addD_reg(regD dst, regD src) %{
17525   predicate(UseAVX == 0);
17526   match(Set dst (AddD dst src));
17527 
17528   format %{ "addsd   $dst, $src" %}
17529   ins_cost(150);
17530   ins_encode %{
17531     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17532   %}
17533   ins_pipe(pipe_slow);
17534 %}
17535 
17536 instruct addD_mem(regD dst, memory src) %{
17537   predicate(UseAVX == 0);
17538   match(Set dst (AddD dst (LoadD src)));
17539 
17540   format %{ "addsd   $dst, $src" %}
17541   ins_cost(150);
17542   ins_encode %{
17543     __ addsd($dst$$XMMRegister, $src$$Address);
17544   %}
17545   ins_pipe(pipe_slow);
17546 %}
17547 
17548 instruct addD_imm(regD dst, immD con) %{
17549   predicate(UseAVX == 0);
17550   match(Set dst (AddD dst con));
17551   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17552   ins_cost(150);
17553   ins_encode %{
17554     __ addsd($dst$$XMMRegister, $constantaddress($con));
17555   %}
17556   ins_pipe(pipe_slow);
17557 %}
17558 
17559 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17560   predicate(UseAVX > 0);
17561   match(Set dst (AddD src1 src2));
17562 
17563   format %{ "vaddsd  $dst, $src1, $src2" %}
17564   ins_cost(150);
17565   ins_encode %{
17566     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17567   %}
17568   ins_pipe(pipe_slow);
17569 %}
17570 
17571 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17572   predicate(UseAVX > 0);
17573   match(Set dst (AddD src1 (LoadD src2)));
17574 
17575   format %{ "vaddsd  $dst, $src1, $src2" %}
17576   ins_cost(150);
17577   ins_encode %{
17578     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17579   %}
17580   ins_pipe(pipe_slow);
17581 %}
17582 
17583 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17584   predicate(UseAVX > 0);
17585   match(Set dst (AddD src con));
17586 
17587   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17588   ins_cost(150);
17589   ins_encode %{
17590     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17591   %}
17592   ins_pipe(pipe_slow);
17593 %}
17594 
17595 instruct subF_reg(regF dst, regF src) %{
17596   predicate(UseAVX == 0);
17597   match(Set dst (SubF dst src));
17598 
17599   format %{ "subss   $dst, $src" %}
17600   ins_cost(150);
17601   ins_encode %{
17602     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17603   %}
17604   ins_pipe(pipe_slow);
17605 %}
17606 
17607 instruct subF_mem(regF dst, memory src) %{
17608   predicate(UseAVX == 0);
17609   match(Set dst (SubF dst (LoadF src)));
17610 
17611   format %{ "subss   $dst, $src" %}
17612   ins_cost(150);
17613   ins_encode %{
17614     __ subss($dst$$XMMRegister, $src$$Address);
17615   %}
17616   ins_pipe(pipe_slow);
17617 %}
17618 
17619 instruct subF_imm(regF dst, immF con) %{
17620   predicate(UseAVX == 0);
17621   match(Set dst (SubF dst con));
17622   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17623   ins_cost(150);
17624   ins_encode %{
17625     __ subss($dst$$XMMRegister, $constantaddress($con));
17626   %}
17627   ins_pipe(pipe_slow);
17628 %}
17629 
17630 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17631   predicate(UseAVX > 0);
17632   match(Set dst (SubF src1 src2));
17633 
17634   format %{ "vsubss  $dst, $src1, $src2" %}
17635   ins_cost(150);
17636   ins_encode %{
17637     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17638   %}
17639   ins_pipe(pipe_slow);
17640 %}
17641 
17642 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17643   predicate(UseAVX > 0);
17644   match(Set dst (SubF src1 (LoadF src2)));
17645 
17646   format %{ "vsubss  $dst, $src1, $src2" %}
17647   ins_cost(150);
17648   ins_encode %{
17649     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17650   %}
17651   ins_pipe(pipe_slow);
17652 %}
17653 
17654 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17655   predicate(UseAVX > 0);
17656   match(Set dst (SubF src con));
17657 
17658   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17659   ins_cost(150);
17660   ins_encode %{
17661     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17662   %}
17663   ins_pipe(pipe_slow);
17664 %}
17665 
17666 instruct subD_reg(regD dst, regD src) %{
17667   predicate(UseAVX == 0);
17668   match(Set dst (SubD dst src));
17669 
17670   format %{ "subsd   $dst, $src" %}
17671   ins_cost(150);
17672   ins_encode %{
17673     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17674   %}
17675   ins_pipe(pipe_slow);
17676 %}
17677 
17678 instruct subD_mem(regD dst, memory src) %{
17679   predicate(UseAVX == 0);
17680   match(Set dst (SubD dst (LoadD src)));
17681 
17682   format %{ "subsd   $dst, $src" %}
17683   ins_cost(150);
17684   ins_encode %{
17685     __ subsd($dst$$XMMRegister, $src$$Address);
17686   %}
17687   ins_pipe(pipe_slow);
17688 %}
17689 
17690 instruct subD_imm(regD dst, immD con) %{
17691   predicate(UseAVX == 0);
17692   match(Set dst (SubD dst con));
17693   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17694   ins_cost(150);
17695   ins_encode %{
17696     __ subsd($dst$$XMMRegister, $constantaddress($con));
17697   %}
17698   ins_pipe(pipe_slow);
17699 %}
17700 
17701 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17702   predicate(UseAVX > 0);
17703   match(Set dst (SubD src1 src2));
17704 
17705   format %{ "vsubsd  $dst, $src1, $src2" %}
17706   ins_cost(150);
17707   ins_encode %{
17708     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17709   %}
17710   ins_pipe(pipe_slow);
17711 %}
17712 
17713 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17714   predicate(UseAVX > 0);
17715   match(Set dst (SubD src1 (LoadD src2)));
17716 
17717   format %{ "vsubsd  $dst, $src1, $src2" %}
17718   ins_cost(150);
17719   ins_encode %{
17720     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17721   %}
17722   ins_pipe(pipe_slow);
17723 %}
17724 
17725 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17726   predicate(UseAVX > 0);
17727   match(Set dst (SubD src con));
17728 
17729   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17730   ins_cost(150);
17731   ins_encode %{
17732     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17733   %}
17734   ins_pipe(pipe_slow);
17735 %}
17736 
17737 instruct mulF_reg(regF dst, regF src) %{
17738   predicate(UseAVX == 0);
17739   match(Set dst (MulF dst src));
17740 
17741   format %{ "mulss   $dst, $src" %}
17742   ins_cost(150);
17743   ins_encode %{
17744     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17745   %}
17746   ins_pipe(pipe_slow);
17747 %}
17748 
17749 instruct mulF_mem(regF dst, memory src) %{
17750   predicate(UseAVX == 0);
17751   match(Set dst (MulF dst (LoadF src)));
17752 
17753   format %{ "mulss   $dst, $src" %}
17754   ins_cost(150);
17755   ins_encode %{
17756     __ mulss($dst$$XMMRegister, $src$$Address);
17757   %}
17758   ins_pipe(pipe_slow);
17759 %}
17760 
17761 instruct mulF_imm(regF dst, immF con) %{
17762   predicate(UseAVX == 0);
17763   match(Set dst (MulF dst con));
17764   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17765   ins_cost(150);
17766   ins_encode %{
17767     __ mulss($dst$$XMMRegister, $constantaddress($con));
17768   %}
17769   ins_pipe(pipe_slow);
17770 %}
17771 
17772 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17773   predicate(UseAVX > 0);
17774   match(Set dst (MulF src1 src2));
17775 
17776   format %{ "vmulss  $dst, $src1, $src2" %}
17777   ins_cost(150);
17778   ins_encode %{
17779     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17780   %}
17781   ins_pipe(pipe_slow);
17782 %}
17783 
17784 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17785   predicate(UseAVX > 0);
17786   match(Set dst (MulF src1 (LoadF src2)));
17787 
17788   format %{ "vmulss  $dst, $src1, $src2" %}
17789   ins_cost(150);
17790   ins_encode %{
17791     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17792   %}
17793   ins_pipe(pipe_slow);
17794 %}
17795 
17796 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17797   predicate(UseAVX > 0);
17798   match(Set dst (MulF src con));
17799 
17800   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17801   ins_cost(150);
17802   ins_encode %{
17803     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17804   %}
17805   ins_pipe(pipe_slow);
17806 %}
17807 
17808 instruct mulD_reg(regD dst, regD src) %{
17809   predicate(UseAVX == 0);
17810   match(Set dst (MulD dst src));
17811 
17812   format %{ "mulsd   $dst, $src" %}
17813   ins_cost(150);
17814   ins_encode %{
17815     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17816   %}
17817   ins_pipe(pipe_slow);
17818 %}
17819 
17820 instruct mulD_mem(regD dst, memory src) %{
17821   predicate(UseAVX == 0);
17822   match(Set dst (MulD dst (LoadD src)));
17823 
17824   format %{ "mulsd   $dst, $src" %}
17825   ins_cost(150);
17826   ins_encode %{
17827     __ mulsd($dst$$XMMRegister, $src$$Address);
17828   %}
17829   ins_pipe(pipe_slow);
17830 %}
17831 
17832 instruct mulD_imm(regD dst, immD con) %{
17833   predicate(UseAVX == 0);
17834   match(Set dst (MulD dst con));
17835   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17836   ins_cost(150);
17837   ins_encode %{
17838     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17839   %}
17840   ins_pipe(pipe_slow);
17841 %}
17842 
17843 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17844   predicate(UseAVX > 0);
17845   match(Set dst (MulD src1 src2));
17846 
17847   format %{ "vmulsd  $dst, $src1, $src2" %}
17848   ins_cost(150);
17849   ins_encode %{
17850     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17851   %}
17852   ins_pipe(pipe_slow);
17853 %}
17854 
17855 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17856   predicate(UseAVX > 0);
17857   match(Set dst (MulD src1 (LoadD src2)));
17858 
17859   format %{ "vmulsd  $dst, $src1, $src2" %}
17860   ins_cost(150);
17861   ins_encode %{
17862     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17863   %}
17864   ins_pipe(pipe_slow);
17865 %}
17866 
17867 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17868   predicate(UseAVX > 0);
17869   match(Set dst (MulD src con));
17870 
17871   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17872   ins_cost(150);
17873   ins_encode %{
17874     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17875   %}
17876   ins_pipe(pipe_slow);
17877 %}
17878 
17879 instruct divF_reg(regF dst, regF src) %{
17880   predicate(UseAVX == 0);
17881   match(Set dst (DivF dst src));
17882 
17883   format %{ "divss   $dst, $src" %}
17884   ins_cost(150);
17885   ins_encode %{
17886     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17887   %}
17888   ins_pipe(pipe_slow);
17889 %}
17890 
17891 instruct divF_mem(regF dst, memory src) %{
17892   predicate(UseAVX == 0);
17893   match(Set dst (DivF dst (LoadF src)));
17894 
17895   format %{ "divss   $dst, $src" %}
17896   ins_cost(150);
17897   ins_encode %{
17898     __ divss($dst$$XMMRegister, $src$$Address);
17899   %}
17900   ins_pipe(pipe_slow);
17901 %}
17902 
17903 instruct divF_imm(regF dst, immF con) %{
17904   predicate(UseAVX == 0);
17905   match(Set dst (DivF dst con));
17906   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17907   ins_cost(150);
17908   ins_encode %{
17909     __ divss($dst$$XMMRegister, $constantaddress($con));
17910   %}
17911   ins_pipe(pipe_slow);
17912 %}
17913 
17914 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17915   predicate(UseAVX > 0);
17916   match(Set dst (DivF src1 src2));
17917 
17918   format %{ "vdivss  $dst, $src1, $src2" %}
17919   ins_cost(150);
17920   ins_encode %{
17921     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17922   %}
17923   ins_pipe(pipe_slow);
17924 %}
17925 
17926 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17927   predicate(UseAVX > 0);
17928   match(Set dst (DivF src1 (LoadF src2)));
17929 
17930   format %{ "vdivss  $dst, $src1, $src2" %}
17931   ins_cost(150);
17932   ins_encode %{
17933     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17934   %}
17935   ins_pipe(pipe_slow);
17936 %}
17937 
17938 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17939   predicate(UseAVX > 0);
17940   match(Set dst (DivF src con));
17941 
17942   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17943   ins_cost(150);
17944   ins_encode %{
17945     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17946   %}
17947   ins_pipe(pipe_slow);
17948 %}
17949 
17950 instruct divD_reg(regD dst, regD src) %{
17951   predicate(UseAVX == 0);
17952   match(Set dst (DivD dst src));
17953 
17954   format %{ "divsd   $dst, $src" %}
17955   ins_cost(150);
17956   ins_encode %{
17957     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17958   %}
17959   ins_pipe(pipe_slow);
17960 %}
17961 
17962 instruct divD_mem(regD dst, memory src) %{
17963   predicate(UseAVX == 0);
17964   match(Set dst (DivD dst (LoadD src)));
17965 
17966   format %{ "divsd   $dst, $src" %}
17967   ins_cost(150);
17968   ins_encode %{
17969     __ divsd($dst$$XMMRegister, $src$$Address);
17970   %}
17971   ins_pipe(pipe_slow);
17972 %}
17973 
17974 instruct divD_imm(regD dst, immD con) %{
17975   predicate(UseAVX == 0);
17976   match(Set dst (DivD dst con));
17977   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17978   ins_cost(150);
17979   ins_encode %{
17980     __ divsd($dst$$XMMRegister, $constantaddress($con));
17981   %}
17982   ins_pipe(pipe_slow);
17983 %}
17984 
17985 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17986   predicate(UseAVX > 0);
17987   match(Set dst (DivD src1 src2));
17988 
17989   format %{ "vdivsd  $dst, $src1, $src2" %}
17990   ins_cost(150);
17991   ins_encode %{
17992     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17993   %}
17994   ins_pipe(pipe_slow);
17995 %}
17996 
17997 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17998   predicate(UseAVX > 0);
17999   match(Set dst (DivD src1 (LoadD src2)));
18000 
18001   format %{ "vdivsd  $dst, $src1, $src2" %}
18002   ins_cost(150);
18003   ins_encode %{
18004     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18005   %}
18006   ins_pipe(pipe_slow);
18007 %}
18008 
18009 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18010   predicate(UseAVX > 0);
18011   match(Set dst (DivD src con));
18012 
18013   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18014   ins_cost(150);
18015   ins_encode %{
18016     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18017   %}
18018   ins_pipe(pipe_slow);
18019 %}
18020 
18021 instruct absF_reg(regF dst) %{
18022   predicate(UseAVX == 0);
18023   match(Set dst (AbsF dst));
18024   ins_cost(150);
18025   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18026   ins_encode %{
18027     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18028   %}
18029   ins_pipe(pipe_slow);
18030 %}
18031 
18032 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18033   predicate(UseAVX > 0);
18034   match(Set dst (AbsF src));
18035   ins_cost(150);
18036   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18037   ins_encode %{
18038     int vlen_enc = Assembler::AVX_128bit;
18039     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18040               ExternalAddress(float_signmask()), vlen_enc);
18041   %}
18042   ins_pipe(pipe_slow);
18043 %}
18044 
18045 instruct absD_reg(regD dst) %{
18046   predicate(UseAVX == 0);
18047   match(Set dst (AbsD dst));
18048   ins_cost(150);
18049   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18050             "# abs double by sign masking" %}
18051   ins_encode %{
18052     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18053   %}
18054   ins_pipe(pipe_slow);
18055 %}
18056 
18057 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18058   predicate(UseAVX > 0);
18059   match(Set dst (AbsD src));
18060   ins_cost(150);
18061   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18062             "# abs double by sign masking" %}
18063   ins_encode %{
18064     int vlen_enc = Assembler::AVX_128bit;
18065     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18066               ExternalAddress(double_signmask()), vlen_enc);
18067   %}
18068   ins_pipe(pipe_slow);
18069 %}
18070 
18071 instruct negF_reg(regF dst) %{
18072   predicate(UseAVX == 0);
18073   match(Set dst (NegF dst));
18074   ins_cost(150);
18075   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18076   ins_encode %{
18077     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18078   %}
18079   ins_pipe(pipe_slow);
18080 %}
18081 
18082 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18083   predicate(UseAVX > 0);
18084   match(Set dst (NegF src));
18085   ins_cost(150);
18086   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18087   ins_encode %{
18088     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18089                  ExternalAddress(float_signflip()));
18090   %}
18091   ins_pipe(pipe_slow);
18092 %}
18093 
18094 instruct negD_reg(regD dst) %{
18095   predicate(UseAVX == 0);
18096   match(Set dst (NegD dst));
18097   ins_cost(150);
18098   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18099             "# neg double by sign flipping" %}
18100   ins_encode %{
18101     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18102   %}
18103   ins_pipe(pipe_slow);
18104 %}
18105 
18106 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18107   predicate(UseAVX > 0);
18108   match(Set dst (NegD src));
18109   ins_cost(150);
18110   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18111             "# neg double by sign flipping" %}
18112   ins_encode %{
18113     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18114                  ExternalAddress(double_signflip()));
18115   %}
18116   ins_pipe(pipe_slow);
18117 %}
18118 
18119 // sqrtss instruction needs destination register to be pre initialized for best performance
18120 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18121 instruct sqrtF_reg(regF dst) %{
18122   match(Set dst (SqrtF dst));
18123   format %{ "sqrtss  $dst, $dst" %}
18124   ins_encode %{
18125     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18126   %}
18127   ins_pipe(pipe_slow);
18128 %}
18129 
18130 // sqrtsd instruction needs destination register to be pre initialized for best performance
18131 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18132 instruct sqrtD_reg(regD dst) %{
18133   match(Set dst (SqrtD dst));
18134   format %{ "sqrtsd  $dst, $dst" %}
18135   ins_encode %{
18136     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18137   %}
18138   ins_pipe(pipe_slow);
18139 %}
18140 
18141 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18142   effect(TEMP tmp);
18143   match(Set dst (ConvF2HF src));
18144   ins_cost(125);
18145   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18146   ins_encode %{
18147     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18148   %}
18149   ins_pipe( pipe_slow );
18150 %}
18151 
18152 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18153   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18154   effect(TEMP ktmp, TEMP rtmp);
18155   match(Set mem (StoreC mem (ConvF2HF src)));
18156   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18157   ins_encode %{
18158     __ movl($rtmp$$Register, 0x1);
18159     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18160     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18161   %}
18162   ins_pipe( pipe_slow );
18163 %}
18164 
18165 instruct vconvF2HF(vec dst, vec src) %{
18166   match(Set dst (VectorCastF2HF src));
18167   format %{ "vector_conv_F2HF $dst $src" %}
18168   ins_encode %{
18169     int vlen_enc = vector_length_encoding(this, $src);
18170     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18171   %}
18172   ins_pipe( pipe_slow );
18173 %}
18174 
18175 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18176   predicate(n->as_StoreVector()->memory_size() >= 16);
18177   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18178   format %{ "vcvtps2ph $mem,$src" %}
18179   ins_encode %{
18180     int vlen_enc = vector_length_encoding(this, $src);
18181     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18182   %}
18183   ins_pipe( pipe_slow );
18184 %}
18185 
18186 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18187   match(Set dst (ConvHF2F src));
18188   format %{ "vcvtph2ps $dst,$src" %}
18189   ins_encode %{
18190     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18191   %}
18192   ins_pipe( pipe_slow );
18193 %}
18194 
18195 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18196   match(Set dst (VectorCastHF2F (LoadVector mem)));
18197   format %{ "vcvtph2ps $dst,$mem" %}
18198   ins_encode %{
18199     int vlen_enc = vector_length_encoding(this);
18200     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18201   %}
18202   ins_pipe( pipe_slow );
18203 %}
18204 
18205 instruct vconvHF2F(vec dst, vec src) %{
18206   match(Set dst (VectorCastHF2F src));
18207   ins_cost(125);
18208   format %{ "vector_conv_HF2F $dst,$src" %}
18209   ins_encode %{
18210     int vlen_enc = vector_length_encoding(this);
18211     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18212   %}
18213   ins_pipe( pipe_slow );
18214 %}
18215 
18216 // ---------------------------------------- VectorReinterpret ------------------------------------
18217 instruct reinterpret_mask(kReg dst) %{
18218   predicate(n->bottom_type()->isa_vectmask() &&
18219             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18220   match(Set dst (VectorReinterpret dst));
18221   ins_cost(125);
18222   format %{ "vector_reinterpret $dst\t!" %}
18223   ins_encode %{
18224     // empty
18225   %}
18226   ins_pipe( pipe_slow );
18227 %}
18228 
18229 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18230   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18231             n->bottom_type()->isa_vectmask() &&
18232             n->in(1)->bottom_type()->isa_vectmask() &&
18233             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18234             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18235   match(Set dst (VectorReinterpret src));
18236   effect(TEMP xtmp);
18237   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18238   ins_encode %{
18239      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18240      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18241      assert(src_sz == dst_sz , "src and dst size mismatch");
18242      int vlen_enc = vector_length_encoding(src_sz);
18243      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18244      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18245   %}
18246   ins_pipe( pipe_slow );
18247 %}
18248 
18249 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18250   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18251             n->bottom_type()->isa_vectmask() &&
18252             n->in(1)->bottom_type()->isa_vectmask() &&
18253             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18254              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18255             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18256   match(Set dst (VectorReinterpret src));
18257   effect(TEMP xtmp);
18258   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18259   ins_encode %{
18260      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18261      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18262      assert(src_sz == dst_sz , "src and dst size mismatch");
18263      int vlen_enc = vector_length_encoding(src_sz);
18264      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18265      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18266   %}
18267   ins_pipe( pipe_slow );
18268 %}
18269 
18270 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18271   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18272             n->bottom_type()->isa_vectmask() &&
18273             n->in(1)->bottom_type()->isa_vectmask() &&
18274             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18275              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18276             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18277   match(Set dst (VectorReinterpret src));
18278   effect(TEMP xtmp);
18279   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18280   ins_encode %{
18281      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18282      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18283      assert(src_sz == dst_sz , "src and dst size mismatch");
18284      int vlen_enc = vector_length_encoding(src_sz);
18285      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18286      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18287   %}
18288   ins_pipe( pipe_slow );
18289 %}
18290 
18291 instruct reinterpret(vec dst) %{
18292   predicate(!n->bottom_type()->isa_vectmask() &&
18293             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18294   match(Set dst (VectorReinterpret dst));
18295   ins_cost(125);
18296   format %{ "vector_reinterpret $dst\t!" %}
18297   ins_encode %{
18298     // empty
18299   %}
18300   ins_pipe( pipe_slow );
18301 %}
18302 
18303 instruct reinterpret_expand(vec dst, vec src) %{
18304   predicate(UseAVX == 0 &&
18305             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18306   match(Set dst (VectorReinterpret src));
18307   ins_cost(125);
18308   effect(TEMP dst);
18309   format %{ "vector_reinterpret_expand $dst,$src" %}
18310   ins_encode %{
18311     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18312     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18313 
18314     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18315     if (src_vlen_in_bytes == 4) {
18316       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18317     } else {
18318       assert(src_vlen_in_bytes == 8, "");
18319       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18320     }
18321     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18322   %}
18323   ins_pipe( pipe_slow );
18324 %}
18325 
18326 instruct vreinterpret_expand4(legVec dst, vec src) %{
18327   predicate(UseAVX > 0 &&
18328             !n->bottom_type()->isa_vectmask() &&
18329             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18330             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18331   match(Set dst (VectorReinterpret src));
18332   ins_cost(125);
18333   format %{ "vector_reinterpret_expand $dst,$src" %}
18334   ins_encode %{
18335     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18336   %}
18337   ins_pipe( pipe_slow );
18338 %}
18339 
18340 
18341 instruct vreinterpret_expand(legVec dst, vec src) %{
18342   predicate(UseAVX > 0 &&
18343             !n->bottom_type()->isa_vectmask() &&
18344             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18345             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18346   match(Set dst (VectorReinterpret src));
18347   ins_cost(125);
18348   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18349   ins_encode %{
18350     switch (Matcher::vector_length_in_bytes(this, $src)) {
18351       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18352       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18353       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18354       default: ShouldNotReachHere();
18355     }
18356   %}
18357   ins_pipe( pipe_slow );
18358 %}
18359 
18360 instruct reinterpret_shrink(vec dst, legVec src) %{
18361   predicate(!n->bottom_type()->isa_vectmask() &&
18362             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18363   match(Set dst (VectorReinterpret src));
18364   ins_cost(125);
18365   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18366   ins_encode %{
18367     switch (Matcher::vector_length_in_bytes(this)) {
18368       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18369       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18370       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18371       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18372       default: ShouldNotReachHere();
18373     }
18374   %}
18375   ins_pipe( pipe_slow );
18376 %}
18377 
18378 // ----------------------------------------------------------------------------------------------------
18379 
18380 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18381   match(Set dst (RoundDoubleMode src rmode));
18382   format %{ "roundsd $dst,$src" %}
18383   ins_cost(150);
18384   ins_encode %{
18385     assert(UseSSE >= 4, "required");
18386     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18387       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18388     }
18389     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18390   %}
18391   ins_pipe(pipe_slow);
18392 %}
18393 
18394 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18395   match(Set dst (RoundDoubleMode con rmode));
18396   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18397   ins_cost(150);
18398   ins_encode %{
18399     assert(UseSSE >= 4, "required");
18400     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18401   %}
18402   ins_pipe(pipe_slow);
18403 %}
18404 
18405 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18406   predicate(Matcher::vector_length(n) < 8);
18407   match(Set dst (RoundDoubleModeV src rmode));
18408   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18409   ins_encode %{
18410     assert(UseAVX > 0, "required");
18411     int vlen_enc = vector_length_encoding(this);
18412     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18413   %}
18414   ins_pipe( pipe_slow );
18415 %}
18416 
18417 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18418   predicate(Matcher::vector_length(n) == 8);
18419   match(Set dst (RoundDoubleModeV src rmode));
18420   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18421   ins_encode %{
18422     assert(UseAVX > 2, "required");
18423     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18424   %}
18425   ins_pipe( pipe_slow );
18426 %}
18427 
18428 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18429   predicate(Matcher::vector_length(n) < 8);
18430   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18431   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18432   ins_encode %{
18433     assert(UseAVX > 0, "required");
18434     int vlen_enc = vector_length_encoding(this);
18435     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18436   %}
18437   ins_pipe( pipe_slow );
18438 %}
18439 
18440 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18441   predicate(Matcher::vector_length(n) == 8);
18442   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18443   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18444   ins_encode %{
18445     assert(UseAVX > 2, "required");
18446     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18447   %}
18448   ins_pipe( pipe_slow );
18449 %}
18450 
18451 instruct onspinwait() %{
18452   match(OnSpinWait);
18453   ins_cost(200);
18454 
18455   format %{
18456     $$template
18457     $$emit$$"pause\t! membar_onspinwait"
18458   %}
18459   ins_encode %{
18460     __ pause();
18461   %}
18462   ins_pipe(pipe_slow);
18463 %}
18464 
18465 // a * b + c
18466 instruct fmaD_reg(regD a, regD b, regD c) %{
18467   match(Set c (FmaD  c (Binary a b)));
18468   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18469   ins_cost(150);
18470   ins_encode %{
18471     assert(UseFMA, "Needs FMA instructions support.");
18472     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18473   %}
18474   ins_pipe( pipe_slow );
18475 %}
18476 
18477 // a * b + c
18478 instruct fmaF_reg(regF a, regF b, regF c) %{
18479   match(Set c (FmaF  c (Binary a b)));
18480   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18481   ins_cost(150);
18482   ins_encode %{
18483     assert(UseFMA, "Needs FMA instructions support.");
18484     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18485   %}
18486   ins_pipe( pipe_slow );
18487 %}
18488 
18489 // ====================VECTOR INSTRUCTIONS=====================================
18490 
18491 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18492 instruct MoveVec2Leg(legVec dst, vec src) %{
18493   match(Set dst src);
18494   format %{ "" %}
18495   ins_encode %{
18496     ShouldNotReachHere();
18497   %}
18498   ins_pipe( fpu_reg_reg );
18499 %}
18500 
18501 instruct MoveLeg2Vec(vec dst, legVec src) %{
18502   match(Set dst src);
18503   format %{ "" %}
18504   ins_encode %{
18505     ShouldNotReachHere();
18506   %}
18507   ins_pipe( fpu_reg_reg );
18508 %}
18509 
18510 // ============================================================================
18511 
18512 // Load vectors generic operand pattern
18513 instruct loadV(vec dst, memory mem) %{
18514   match(Set dst (LoadVector mem));
18515   ins_cost(125);
18516   format %{ "load_vector $dst,$mem" %}
18517   ins_encode %{
18518     BasicType bt = Matcher::vector_element_basic_type(this);
18519     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18520   %}
18521   ins_pipe( pipe_slow );
18522 %}
18523 
18524 // Store vectors generic operand pattern.
18525 instruct storeV(memory mem, vec src) %{
18526   match(Set mem (StoreVector mem src));
18527   ins_cost(145);
18528   format %{ "store_vector $mem,$src\n\t" %}
18529   ins_encode %{
18530     switch (Matcher::vector_length_in_bytes(this, $src)) {
18531       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18532       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18533       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18534       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18535       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18536       default: ShouldNotReachHere();
18537     }
18538   %}
18539   ins_pipe( pipe_slow );
18540 %}
18541 
18542 // ---------------------------------------- Gather ------------------------------------
18543 
18544 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18545 
18546 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18547   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18548             Matcher::vector_length_in_bytes(n) <= 32);
18549   match(Set dst (LoadVectorGather mem idx));
18550   effect(TEMP dst, TEMP tmp, TEMP mask);
18551   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18552   ins_encode %{
18553     int vlen_enc = vector_length_encoding(this);
18554     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18555     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18556     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18557     __ lea($tmp$$Register, $mem$$Address);
18558     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18559   %}
18560   ins_pipe( pipe_slow );
18561 %}
18562 
18563 
18564 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18565   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18566             !is_subword_type(Matcher::vector_element_basic_type(n)));
18567   match(Set dst (LoadVectorGather mem idx));
18568   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18569   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18570   ins_encode %{
18571     int vlen_enc = vector_length_encoding(this);
18572     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18573     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18574     __ lea($tmp$$Register, $mem$$Address);
18575     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18576   %}
18577   ins_pipe( pipe_slow );
18578 %}
18579 
18580 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18581   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18582             !is_subword_type(Matcher::vector_element_basic_type(n)));
18583   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18584   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18585   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18586   ins_encode %{
18587     assert(UseAVX > 2, "sanity");
18588     int vlen_enc = vector_length_encoding(this);
18589     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18590     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18591     // Note: Since gather instruction partially updates the opmask register used
18592     // for predication hense moving mask operand to a temporary.
18593     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18594     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18595     __ lea($tmp$$Register, $mem$$Address);
18596     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18597   %}
18598   ins_pipe( pipe_slow );
18599 %}
18600 
18601 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18602   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18603   match(Set dst (LoadVectorGather mem idx_base));
18604   effect(TEMP tmp, TEMP rtmp);
18605   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18606   ins_encode %{
18607     int vlen_enc = vector_length_encoding(this);
18608     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18609     __ lea($tmp$$Register, $mem$$Address);
18610     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18611   %}
18612   ins_pipe( pipe_slow );
18613 %}
18614 
18615 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18616                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18617   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18618   match(Set dst (LoadVectorGather mem idx_base));
18619   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18620   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18621   ins_encode %{
18622     int vlen_enc = vector_length_encoding(this);
18623     int vector_len = Matcher::vector_length(this);
18624     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18625     __ lea($tmp$$Register, $mem$$Address);
18626     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18627     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18628                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18629   %}
18630   ins_pipe( pipe_slow );
18631 %}
18632 
18633 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18634   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18635   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18636   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18637   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18638   ins_encode %{
18639     int vlen_enc = vector_length_encoding(this);
18640     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18641     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18642     __ lea($tmp$$Register, $mem$$Address);
18643     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18644     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18645   %}
18646   ins_pipe( pipe_slow );
18647 %}
18648 
18649 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18650                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18651   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18652   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18653   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18654   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18655   ins_encode %{
18656     int vlen_enc = vector_length_encoding(this);
18657     int vector_len = Matcher::vector_length(this);
18658     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18659     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18660     __ lea($tmp$$Register, $mem$$Address);
18661     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18662     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18663     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18664                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18665   %}
18666   ins_pipe( pipe_slow );
18667 %}
18668 
18669 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18670   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18671   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18672   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18673   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18674   ins_encode %{
18675     int vlen_enc = vector_length_encoding(this);
18676     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18677     __ lea($tmp$$Register, $mem$$Address);
18678     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18679     if (elem_bt == T_SHORT) {
18680       __ movl($mask_idx$$Register, 0x55555555);
18681       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18682     }
18683     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18684     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18685   %}
18686   ins_pipe( pipe_slow );
18687 %}
18688 
18689 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18690                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18691   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18692   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18693   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18694   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18695   ins_encode %{
18696     int vlen_enc = vector_length_encoding(this);
18697     int vector_len = Matcher::vector_length(this);
18698     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18699     __ lea($tmp$$Register, $mem$$Address);
18700     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18701     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18702     if (elem_bt == T_SHORT) {
18703       __ movl($mask_idx$$Register, 0x55555555);
18704       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18705     }
18706     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18707     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18708                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18709   %}
18710   ins_pipe( pipe_slow );
18711 %}
18712 
18713 // ====================Scatter=======================================
18714 
18715 // Scatter INT, LONG, FLOAT, DOUBLE
18716 
18717 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18718   predicate(UseAVX > 2);
18719   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18720   effect(TEMP tmp, TEMP ktmp);
18721   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18722   ins_encode %{
18723     int vlen_enc = vector_length_encoding(this, $src);
18724     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18725 
18726     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18727     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18728 
18729     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18730     __ lea($tmp$$Register, $mem$$Address);
18731     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18732   %}
18733   ins_pipe( pipe_slow );
18734 %}
18735 
18736 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18737   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18738   effect(TEMP tmp, TEMP ktmp);
18739   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18740   ins_encode %{
18741     int vlen_enc = vector_length_encoding(this, $src);
18742     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18743     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18744     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18745     // Note: Since scatter instruction partially updates the opmask register used
18746     // for predication hense moving mask operand to a temporary.
18747     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18748     __ lea($tmp$$Register, $mem$$Address);
18749     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18750   %}
18751   ins_pipe( pipe_slow );
18752 %}
18753 
18754 // ====================REPLICATE=======================================
18755 
18756 // Replicate byte scalar to be vector
18757 instruct vReplB_reg(vec dst, rRegI src) %{
18758   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18759   match(Set dst (Replicate src));
18760   format %{ "replicateB $dst,$src" %}
18761   ins_encode %{
18762     uint vlen = Matcher::vector_length(this);
18763     if (UseAVX >= 2) {
18764       int vlen_enc = vector_length_encoding(this);
18765       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18766         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18767         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18768       } else {
18769         __ movdl($dst$$XMMRegister, $src$$Register);
18770         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18771       }
18772     } else {
18773        assert(UseAVX < 2, "");
18774       __ movdl($dst$$XMMRegister, $src$$Register);
18775       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18776       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18777       if (vlen >= 16) {
18778         assert(vlen == 16, "");
18779         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18780       }
18781     }
18782   %}
18783   ins_pipe( pipe_slow );
18784 %}
18785 
18786 instruct ReplB_mem(vec dst, memory mem) %{
18787   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18788   match(Set dst (Replicate (LoadB mem)));
18789   format %{ "replicateB $dst,$mem" %}
18790   ins_encode %{
18791     int vlen_enc = vector_length_encoding(this);
18792     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18793   %}
18794   ins_pipe( pipe_slow );
18795 %}
18796 
18797 // ====================ReplicateS=======================================
18798 
18799 instruct vReplS_reg(vec dst, rRegI src) %{
18800   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18801   match(Set dst (Replicate src));
18802   format %{ "replicateS $dst,$src" %}
18803   ins_encode %{
18804     uint vlen = Matcher::vector_length(this);
18805     int vlen_enc = vector_length_encoding(this);
18806     if (UseAVX >= 2) {
18807       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18808         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18809         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18810       } else {
18811         __ movdl($dst$$XMMRegister, $src$$Register);
18812         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18813       }
18814     } else {
18815       assert(UseAVX < 2, "");
18816       __ movdl($dst$$XMMRegister, $src$$Register);
18817       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18818       if (vlen >= 8) {
18819         assert(vlen == 8, "");
18820         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18821       }
18822     }
18823   %}
18824   ins_pipe( pipe_slow );
18825 %}
18826 
18827 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18828   match(Set dst (Replicate con));
18829   effect(TEMP rtmp);
18830   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18831   ins_encode %{
18832     int vlen_enc = vector_length_encoding(this);
18833     BasicType bt = Matcher::vector_element_basic_type(this);
18834     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18835     __ movl($rtmp$$Register, $con$$constant);
18836     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18837   %}
18838   ins_pipe( pipe_slow );
18839 %}
18840 
18841 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18842   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18843   match(Set dst (Replicate src));
18844   effect(TEMP rtmp);
18845   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18846   ins_encode %{
18847     int vlen_enc = vector_length_encoding(this);
18848     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18849     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18850   %}
18851   ins_pipe( pipe_slow );
18852 %}
18853 
18854 instruct ReplS_mem(vec dst, memory mem) %{
18855   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18856   match(Set dst (Replicate (LoadS mem)));
18857   format %{ "replicateS $dst,$mem" %}
18858   ins_encode %{
18859     int vlen_enc = vector_length_encoding(this);
18860     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18861   %}
18862   ins_pipe( pipe_slow );
18863 %}
18864 
18865 // ====================ReplicateI=======================================
18866 
18867 instruct ReplI_reg(vec dst, rRegI src) %{
18868   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18869   match(Set dst (Replicate src));
18870   format %{ "replicateI $dst,$src" %}
18871   ins_encode %{
18872     uint vlen = Matcher::vector_length(this);
18873     int vlen_enc = vector_length_encoding(this);
18874     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18875       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18876     } else if (VM_Version::supports_avx2()) {
18877       __ movdl($dst$$XMMRegister, $src$$Register);
18878       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18879     } else {
18880       __ movdl($dst$$XMMRegister, $src$$Register);
18881       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18882     }
18883   %}
18884   ins_pipe( pipe_slow );
18885 %}
18886 
18887 instruct ReplI_mem(vec dst, memory mem) %{
18888   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18889   match(Set dst (Replicate (LoadI mem)));
18890   format %{ "replicateI $dst,$mem" %}
18891   ins_encode %{
18892     int vlen_enc = vector_length_encoding(this);
18893     if (VM_Version::supports_avx2()) {
18894       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18895     } else if (VM_Version::supports_avx()) {
18896       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18897     } else {
18898       __ movdl($dst$$XMMRegister, $mem$$Address);
18899       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18900     }
18901   %}
18902   ins_pipe( pipe_slow );
18903 %}
18904 
18905 instruct ReplI_imm(vec dst, immI con) %{
18906   predicate(Matcher::is_non_long_integral_vector(n));
18907   match(Set dst (Replicate con));
18908   format %{ "replicateI $dst,$con" %}
18909   ins_encode %{
18910     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18911                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18912                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18913     BasicType bt = Matcher::vector_element_basic_type(this);
18914     int vlen = Matcher::vector_length_in_bytes(this);
18915     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18916   %}
18917   ins_pipe( pipe_slow );
18918 %}
18919 
18920 // Replicate scalar zero to be vector
18921 instruct ReplI_zero(vec dst, immI_0 zero) %{
18922   predicate(Matcher::is_non_long_integral_vector(n));
18923   match(Set dst (Replicate zero));
18924   format %{ "replicateI $dst,$zero" %}
18925   ins_encode %{
18926     int vlen_enc = vector_length_encoding(this);
18927     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18928       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18929     } else {
18930       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18931     }
18932   %}
18933   ins_pipe( fpu_reg_reg );
18934 %}
18935 
18936 instruct ReplI_M1(vec dst, immI_M1 con) %{
18937   predicate(Matcher::is_non_long_integral_vector(n));
18938   match(Set dst (Replicate con));
18939   format %{ "vallones $dst" %}
18940   ins_encode %{
18941     int vector_len = vector_length_encoding(this);
18942     __ vallones($dst$$XMMRegister, vector_len);
18943   %}
18944   ins_pipe( pipe_slow );
18945 %}
18946 
18947 // ====================ReplicateL=======================================
18948 
18949 // Replicate long (8 byte) scalar to be vector
18950 instruct ReplL_reg(vec dst, rRegL src) %{
18951   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18952   match(Set dst (Replicate src));
18953   format %{ "replicateL $dst,$src" %}
18954   ins_encode %{
18955     int vlen = Matcher::vector_length(this);
18956     int vlen_enc = vector_length_encoding(this);
18957     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18958       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18959     } else if (VM_Version::supports_avx2()) {
18960       __ movdq($dst$$XMMRegister, $src$$Register);
18961       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18962     } else {
18963       __ movdq($dst$$XMMRegister, $src$$Register);
18964       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18965     }
18966   %}
18967   ins_pipe( pipe_slow );
18968 %}
18969 
18970 instruct ReplL_mem(vec dst, memory mem) %{
18971   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18972   match(Set dst (Replicate (LoadL mem)));
18973   format %{ "replicateL $dst,$mem" %}
18974   ins_encode %{
18975     int vlen_enc = vector_length_encoding(this);
18976     if (VM_Version::supports_avx2()) {
18977       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18978     } else if (VM_Version::supports_sse3()) {
18979       __ movddup($dst$$XMMRegister, $mem$$Address);
18980     } else {
18981       __ movq($dst$$XMMRegister, $mem$$Address);
18982       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18983     }
18984   %}
18985   ins_pipe( pipe_slow );
18986 %}
18987 
18988 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18989 instruct ReplL_imm(vec dst, immL con) %{
18990   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18991   match(Set dst (Replicate con));
18992   format %{ "replicateL $dst,$con" %}
18993   ins_encode %{
18994     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18995     int vlen = Matcher::vector_length_in_bytes(this);
18996     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18997   %}
18998   ins_pipe( pipe_slow );
18999 %}
19000 
19001 instruct ReplL_zero(vec dst, immL0 zero) %{
19002   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19003   match(Set dst (Replicate zero));
19004   format %{ "replicateL $dst,$zero" %}
19005   ins_encode %{
19006     int vlen_enc = vector_length_encoding(this);
19007     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19008       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19009     } else {
19010       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19011     }
19012   %}
19013   ins_pipe( fpu_reg_reg );
19014 %}
19015 
19016 instruct ReplL_M1(vec dst, immL_M1 con) %{
19017   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19018   match(Set dst (Replicate con));
19019   format %{ "vallones $dst" %}
19020   ins_encode %{
19021     int vector_len = vector_length_encoding(this);
19022     __ vallones($dst$$XMMRegister, vector_len);
19023   %}
19024   ins_pipe( pipe_slow );
19025 %}
19026 
19027 // ====================ReplicateF=======================================
19028 
19029 instruct vReplF_reg(vec dst, vlRegF src) %{
19030   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19031   match(Set dst (Replicate src));
19032   format %{ "replicateF $dst,$src" %}
19033   ins_encode %{
19034     uint vlen = Matcher::vector_length(this);
19035     int vlen_enc = vector_length_encoding(this);
19036     if (vlen <= 4) {
19037       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19038     } else if (VM_Version::supports_avx2()) {
19039       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19040     } else {
19041       assert(vlen == 8, "sanity");
19042       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19043       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19044     }
19045   %}
19046   ins_pipe( pipe_slow );
19047 %}
19048 
19049 instruct ReplF_reg(vec dst, vlRegF src) %{
19050   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19051   match(Set dst (Replicate src));
19052   format %{ "replicateF $dst,$src" %}
19053   ins_encode %{
19054     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19055   %}
19056   ins_pipe( pipe_slow );
19057 %}
19058 
19059 instruct ReplF_mem(vec dst, memory mem) %{
19060   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19061   match(Set dst (Replicate (LoadF mem)));
19062   format %{ "replicateF $dst,$mem" %}
19063   ins_encode %{
19064     int vlen_enc = vector_length_encoding(this);
19065     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19066   %}
19067   ins_pipe( pipe_slow );
19068 %}
19069 
19070 // Replicate float scalar immediate to be vector by loading from const table.
19071 instruct ReplF_imm(vec dst, immF con) %{
19072   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19073   match(Set dst (Replicate con));
19074   format %{ "replicateF $dst,$con" %}
19075   ins_encode %{
19076     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19077                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19078     int vlen = Matcher::vector_length_in_bytes(this);
19079     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19080   %}
19081   ins_pipe( pipe_slow );
19082 %}
19083 
19084 instruct ReplF_zero(vec dst, immF0 zero) %{
19085   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19086   match(Set dst (Replicate zero));
19087   format %{ "replicateF $dst,$zero" %}
19088   ins_encode %{
19089     int vlen_enc = vector_length_encoding(this);
19090     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19091       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19092     } else {
19093       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19094     }
19095   %}
19096   ins_pipe( fpu_reg_reg );
19097 %}
19098 
19099 // ====================ReplicateD=======================================
19100 
19101 // Replicate double (8 bytes) scalar to be vector
19102 instruct vReplD_reg(vec dst, vlRegD src) %{
19103   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19104   match(Set dst (Replicate src));
19105   format %{ "replicateD $dst,$src" %}
19106   ins_encode %{
19107     uint vlen = Matcher::vector_length(this);
19108     int vlen_enc = vector_length_encoding(this);
19109     if (vlen <= 2) {
19110       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19111     } else if (VM_Version::supports_avx2()) {
19112       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19113     } else {
19114       assert(vlen == 4, "sanity");
19115       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19116       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19117     }
19118   %}
19119   ins_pipe( pipe_slow );
19120 %}
19121 
19122 instruct ReplD_reg(vec dst, vlRegD src) %{
19123   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19124   match(Set dst (Replicate src));
19125   format %{ "replicateD $dst,$src" %}
19126   ins_encode %{
19127     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19128   %}
19129   ins_pipe( pipe_slow );
19130 %}
19131 
19132 instruct ReplD_mem(vec dst, memory mem) %{
19133   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19134   match(Set dst (Replicate (LoadD mem)));
19135   format %{ "replicateD $dst,$mem" %}
19136   ins_encode %{
19137     if (Matcher::vector_length(this) >= 4) {
19138       int vlen_enc = vector_length_encoding(this);
19139       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19140     } else {
19141       __ movddup($dst$$XMMRegister, $mem$$Address);
19142     }
19143   %}
19144   ins_pipe( pipe_slow );
19145 %}
19146 
19147 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19148 instruct ReplD_imm(vec dst, immD con) %{
19149   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19150   match(Set dst (Replicate con));
19151   format %{ "replicateD $dst,$con" %}
19152   ins_encode %{
19153     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19154     int vlen = Matcher::vector_length_in_bytes(this);
19155     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19156   %}
19157   ins_pipe( pipe_slow );
19158 %}
19159 
19160 instruct ReplD_zero(vec dst, immD0 zero) %{
19161   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19162   match(Set dst (Replicate zero));
19163   format %{ "replicateD $dst,$zero" %}
19164   ins_encode %{
19165     int vlen_enc = vector_length_encoding(this);
19166     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19167       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19168     } else {
19169       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19170     }
19171   %}
19172   ins_pipe( fpu_reg_reg );
19173 %}
19174 
19175 // ====================VECTOR INSERT=======================================
19176 
19177 instruct insert(vec dst, rRegI val, immU8 idx) %{
19178   predicate(Matcher::vector_length_in_bytes(n) < 32);
19179   match(Set dst (VectorInsert (Binary dst val) idx));
19180   format %{ "vector_insert $dst,$val,$idx" %}
19181   ins_encode %{
19182     assert(UseSSE >= 4, "required");
19183     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19184 
19185     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19186 
19187     assert(is_integral_type(elem_bt), "");
19188     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19189 
19190     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19191   %}
19192   ins_pipe( pipe_slow );
19193 %}
19194 
19195 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19196   predicate(Matcher::vector_length_in_bytes(n) == 32);
19197   match(Set dst (VectorInsert (Binary src val) idx));
19198   effect(TEMP vtmp);
19199   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19200   ins_encode %{
19201     int vlen_enc = Assembler::AVX_256bit;
19202     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19203     int elem_per_lane = 16/type2aelembytes(elem_bt);
19204     int log2epr = log2(elem_per_lane);
19205 
19206     assert(is_integral_type(elem_bt), "sanity");
19207     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19208 
19209     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19210     uint y_idx = ($idx$$constant >> log2epr) & 1;
19211     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19212     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19213     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19214   %}
19215   ins_pipe( pipe_slow );
19216 %}
19217 
19218 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19219   predicate(Matcher::vector_length_in_bytes(n) == 64);
19220   match(Set dst (VectorInsert (Binary src val) idx));
19221   effect(TEMP vtmp);
19222   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19223   ins_encode %{
19224     assert(UseAVX > 2, "sanity");
19225 
19226     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19227     int elem_per_lane = 16/type2aelembytes(elem_bt);
19228     int log2epr = log2(elem_per_lane);
19229 
19230     assert(is_integral_type(elem_bt), "");
19231     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19232 
19233     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19234     uint y_idx = ($idx$$constant >> log2epr) & 3;
19235     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19236     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19237     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19238   %}
19239   ins_pipe( pipe_slow );
19240 %}
19241 
19242 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19243   predicate(Matcher::vector_length(n) == 2);
19244   match(Set dst (VectorInsert (Binary dst val) idx));
19245   format %{ "vector_insert $dst,$val,$idx" %}
19246   ins_encode %{
19247     assert(UseSSE >= 4, "required");
19248     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19249     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19250 
19251     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19252   %}
19253   ins_pipe( pipe_slow );
19254 %}
19255 
19256 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19257   predicate(Matcher::vector_length(n) == 4);
19258   match(Set dst (VectorInsert (Binary src val) idx));
19259   effect(TEMP vtmp);
19260   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19261   ins_encode %{
19262     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19263     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19264 
19265     uint x_idx = $idx$$constant & right_n_bits(1);
19266     uint y_idx = ($idx$$constant >> 1) & 1;
19267     int vlen_enc = Assembler::AVX_256bit;
19268     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19269     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19270     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19271   %}
19272   ins_pipe( pipe_slow );
19273 %}
19274 
19275 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19276   predicate(Matcher::vector_length(n) == 8);
19277   match(Set dst (VectorInsert (Binary src val) idx));
19278   effect(TEMP vtmp);
19279   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19280   ins_encode %{
19281     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19282     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19283 
19284     uint x_idx = $idx$$constant & right_n_bits(1);
19285     uint y_idx = ($idx$$constant >> 1) & 3;
19286     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19287     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19288     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19289   %}
19290   ins_pipe( pipe_slow );
19291 %}
19292 
19293 instruct insertF(vec dst, regF val, immU8 idx) %{
19294   predicate(Matcher::vector_length(n) < 8);
19295   match(Set dst (VectorInsert (Binary dst val) idx));
19296   format %{ "vector_insert $dst,$val,$idx" %}
19297   ins_encode %{
19298     assert(UseSSE >= 4, "sanity");
19299 
19300     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19301     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19302 
19303     uint x_idx = $idx$$constant & right_n_bits(2);
19304     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19305   %}
19306   ins_pipe( pipe_slow );
19307 %}
19308 
19309 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19310   predicate(Matcher::vector_length(n) >= 8);
19311   match(Set dst (VectorInsert (Binary src val) idx));
19312   effect(TEMP vtmp);
19313   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19314   ins_encode %{
19315     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19316     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19317 
19318     int vlen = Matcher::vector_length(this);
19319     uint x_idx = $idx$$constant & right_n_bits(2);
19320     if (vlen == 8) {
19321       uint y_idx = ($idx$$constant >> 2) & 1;
19322       int vlen_enc = Assembler::AVX_256bit;
19323       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19324       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19325       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19326     } else {
19327       assert(vlen == 16, "sanity");
19328       uint y_idx = ($idx$$constant >> 2) & 3;
19329       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19330       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19331       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19332     }
19333   %}
19334   ins_pipe( pipe_slow );
19335 %}
19336 
19337 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19338   predicate(Matcher::vector_length(n) == 2);
19339   match(Set dst (VectorInsert (Binary dst val) idx));
19340   effect(TEMP tmp);
19341   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19342   ins_encode %{
19343     assert(UseSSE >= 4, "sanity");
19344     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19345     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19346 
19347     __ movq($tmp$$Register, $val$$XMMRegister);
19348     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19349   %}
19350   ins_pipe( pipe_slow );
19351 %}
19352 
19353 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19354   predicate(Matcher::vector_length(n) == 4);
19355   match(Set dst (VectorInsert (Binary src val) idx));
19356   effect(TEMP vtmp, TEMP tmp);
19357   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19358   ins_encode %{
19359     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19360     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19361 
19362     uint x_idx = $idx$$constant & right_n_bits(1);
19363     uint y_idx = ($idx$$constant >> 1) & 1;
19364     int vlen_enc = Assembler::AVX_256bit;
19365     __ movq($tmp$$Register, $val$$XMMRegister);
19366     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19367     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19368     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19369   %}
19370   ins_pipe( pipe_slow );
19371 %}
19372 
19373 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19374   predicate(Matcher::vector_length(n) == 8);
19375   match(Set dst (VectorInsert (Binary src val) idx));
19376   effect(TEMP tmp, TEMP vtmp);
19377   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19378   ins_encode %{
19379     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19380     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19381 
19382     uint x_idx = $idx$$constant & right_n_bits(1);
19383     uint y_idx = ($idx$$constant >> 1) & 3;
19384     __ movq($tmp$$Register, $val$$XMMRegister);
19385     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19386     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19387     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19388   %}
19389   ins_pipe( pipe_slow );
19390 %}
19391 
19392 // ====================REDUCTION ARITHMETIC=======================================
19393 
19394 // =======================Int Reduction==========================================
19395 
19396 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19397   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19398   match(Set dst (AddReductionVI src1 src2));
19399   match(Set dst (MulReductionVI src1 src2));
19400   match(Set dst (AndReductionV  src1 src2));
19401   match(Set dst ( OrReductionV  src1 src2));
19402   match(Set dst (XorReductionV  src1 src2));
19403   match(Set dst (MinReductionV  src1 src2));
19404   match(Set dst (MaxReductionV  src1 src2));
19405   match(Set dst (UMinReductionV  src1 src2));
19406   match(Set dst (UMaxReductionV  src1 src2));
19407   effect(TEMP vtmp1, TEMP vtmp2);
19408   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19409   ins_encode %{
19410     int opcode = this->ideal_Opcode();
19411     int vlen = Matcher::vector_length(this, $src2);
19412     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19413   %}
19414   ins_pipe( pipe_slow );
19415 %}
19416 
19417 // =======================Long Reduction==========================================
19418 
19419 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19420   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19421   match(Set dst (AddReductionVL src1 src2));
19422   match(Set dst (MulReductionVL src1 src2));
19423   match(Set dst (AndReductionV  src1 src2));
19424   match(Set dst ( OrReductionV  src1 src2));
19425   match(Set dst (XorReductionV  src1 src2));
19426   match(Set dst (MinReductionV  src1 src2));
19427   match(Set dst (MaxReductionV  src1 src2));
19428   match(Set dst (UMinReductionV  src1 src2));
19429   match(Set dst (UMaxReductionV  src1 src2));
19430   effect(TEMP vtmp1, TEMP vtmp2);
19431   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19432   ins_encode %{
19433     int opcode = this->ideal_Opcode();
19434     int vlen = Matcher::vector_length(this, $src2);
19435     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19436   %}
19437   ins_pipe( pipe_slow );
19438 %}
19439 
19440 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19441   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19442   match(Set dst (AddReductionVL src1 src2));
19443   match(Set dst (MulReductionVL src1 src2));
19444   match(Set dst (AndReductionV  src1 src2));
19445   match(Set dst ( OrReductionV  src1 src2));
19446   match(Set dst (XorReductionV  src1 src2));
19447   match(Set dst (MinReductionV  src1 src2));
19448   match(Set dst (MaxReductionV  src1 src2));
19449   match(Set dst (UMinReductionV  src1 src2));
19450   match(Set dst (UMaxReductionV  src1 src2));
19451   effect(TEMP vtmp1, TEMP vtmp2);
19452   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19453   ins_encode %{
19454     int opcode = this->ideal_Opcode();
19455     int vlen = Matcher::vector_length(this, $src2);
19456     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19457   %}
19458   ins_pipe( pipe_slow );
19459 %}
19460 
19461 // =======================Float Reduction==========================================
19462 
19463 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19464   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19465   match(Set dst (AddReductionVF dst src));
19466   match(Set dst (MulReductionVF dst src));
19467   effect(TEMP dst, TEMP vtmp);
19468   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19469   ins_encode %{
19470     int opcode = this->ideal_Opcode();
19471     int vlen = Matcher::vector_length(this, $src);
19472     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19473   %}
19474   ins_pipe( pipe_slow );
19475 %}
19476 
19477 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19478   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19479   match(Set dst (AddReductionVF dst src));
19480   match(Set dst (MulReductionVF dst src));
19481   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19482   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19483   ins_encode %{
19484     int opcode = this->ideal_Opcode();
19485     int vlen = Matcher::vector_length(this, $src);
19486     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19487   %}
19488   ins_pipe( pipe_slow );
19489 %}
19490 
19491 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19492   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19493   match(Set dst (AddReductionVF dst src));
19494   match(Set dst (MulReductionVF dst src));
19495   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19496   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19497   ins_encode %{
19498     int opcode = this->ideal_Opcode();
19499     int vlen = Matcher::vector_length(this, $src);
19500     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19501   %}
19502   ins_pipe( pipe_slow );
19503 %}
19504 
19505 
19506 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19507   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19508   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19509   // src1 contains reduction identity
19510   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19511   match(Set dst (AddReductionVF src1 src2));
19512   match(Set dst (MulReductionVF src1 src2));
19513   effect(TEMP dst);
19514   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19515   ins_encode %{
19516     int opcode = this->ideal_Opcode();
19517     int vlen = Matcher::vector_length(this, $src2);
19518     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19519   %}
19520   ins_pipe( pipe_slow );
19521 %}
19522 
19523 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19524   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19525   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19526   // src1 contains reduction identity
19527   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19528   match(Set dst (AddReductionVF src1 src2));
19529   match(Set dst (MulReductionVF src1 src2));
19530   effect(TEMP dst, TEMP vtmp);
19531   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19532   ins_encode %{
19533     int opcode = this->ideal_Opcode();
19534     int vlen = Matcher::vector_length(this, $src2);
19535     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19536   %}
19537   ins_pipe( pipe_slow );
19538 %}
19539 
19540 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19541   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19542   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19543   // src1 contains reduction identity
19544   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19545   match(Set dst (AddReductionVF src1 src2));
19546   match(Set dst (MulReductionVF src1 src2));
19547   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19548   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19549   ins_encode %{
19550     int opcode = this->ideal_Opcode();
19551     int vlen = Matcher::vector_length(this, $src2);
19552     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19553   %}
19554   ins_pipe( pipe_slow );
19555 %}
19556 
19557 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19558   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19559   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19560   // src1 contains reduction identity
19561   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19562   match(Set dst (AddReductionVF src1 src2));
19563   match(Set dst (MulReductionVF src1 src2));
19564   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19565   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19566   ins_encode %{
19567     int opcode = this->ideal_Opcode();
19568     int vlen = Matcher::vector_length(this, $src2);
19569     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19570   %}
19571   ins_pipe( pipe_slow );
19572 %}
19573 
19574 // =======================Double Reduction==========================================
19575 
19576 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19577   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19578   match(Set dst (AddReductionVD dst src));
19579   match(Set dst (MulReductionVD dst src));
19580   effect(TEMP dst, TEMP vtmp);
19581   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19582   ins_encode %{
19583     int opcode = this->ideal_Opcode();
19584     int vlen = Matcher::vector_length(this, $src);
19585     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19586 %}
19587   ins_pipe( pipe_slow );
19588 %}
19589 
19590 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19591   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19592   match(Set dst (AddReductionVD dst src));
19593   match(Set dst (MulReductionVD dst src));
19594   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19595   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19596   ins_encode %{
19597     int opcode = this->ideal_Opcode();
19598     int vlen = Matcher::vector_length(this, $src);
19599     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19600   %}
19601   ins_pipe( pipe_slow );
19602 %}
19603 
19604 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19605   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19606   match(Set dst (AddReductionVD dst src));
19607   match(Set dst (MulReductionVD dst src));
19608   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19609   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19610   ins_encode %{
19611     int opcode = this->ideal_Opcode();
19612     int vlen = Matcher::vector_length(this, $src);
19613     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19614   %}
19615   ins_pipe( pipe_slow );
19616 %}
19617 
19618 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19619   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19620   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19621   // src1 contains reduction identity
19622   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19623   match(Set dst (AddReductionVD src1 src2));
19624   match(Set dst (MulReductionVD src1 src2));
19625   effect(TEMP dst);
19626   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19627   ins_encode %{
19628     int opcode = this->ideal_Opcode();
19629     int vlen = Matcher::vector_length(this, $src2);
19630     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19631 %}
19632   ins_pipe( pipe_slow );
19633 %}
19634 
19635 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19636   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19637   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19638   // src1 contains reduction identity
19639   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19640   match(Set dst (AddReductionVD src1 src2));
19641   match(Set dst (MulReductionVD src1 src2));
19642   effect(TEMP dst, TEMP vtmp);
19643   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19644   ins_encode %{
19645     int opcode = this->ideal_Opcode();
19646     int vlen = Matcher::vector_length(this, $src2);
19647     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19648   %}
19649   ins_pipe( pipe_slow );
19650 %}
19651 
19652 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19653   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19654   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19655   // src1 contains reduction identity
19656   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19657   match(Set dst (AddReductionVD src1 src2));
19658   match(Set dst (MulReductionVD src1 src2));
19659   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19660   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19661   ins_encode %{
19662     int opcode = this->ideal_Opcode();
19663     int vlen = Matcher::vector_length(this, $src2);
19664     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19665   %}
19666   ins_pipe( pipe_slow );
19667 %}
19668 
19669 // =======================Byte Reduction==========================================
19670 
19671 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19672   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19673   match(Set dst (AddReductionVI src1 src2));
19674   match(Set dst (AndReductionV  src1 src2));
19675   match(Set dst ( OrReductionV  src1 src2));
19676   match(Set dst (XorReductionV  src1 src2));
19677   match(Set dst (MinReductionV  src1 src2));
19678   match(Set dst (MaxReductionV  src1 src2));
19679   match(Set dst (UMinReductionV  src1 src2));
19680   match(Set dst (UMaxReductionV  src1 src2));
19681   effect(TEMP vtmp1, TEMP vtmp2);
19682   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19683   ins_encode %{
19684     int opcode = this->ideal_Opcode();
19685     int vlen = Matcher::vector_length(this, $src2);
19686     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19687   %}
19688   ins_pipe( pipe_slow );
19689 %}
19690 
19691 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19692   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19693   match(Set dst (AddReductionVI src1 src2));
19694   match(Set dst (AndReductionV  src1 src2));
19695   match(Set dst ( OrReductionV  src1 src2));
19696   match(Set dst (XorReductionV  src1 src2));
19697   match(Set dst (MinReductionV  src1 src2));
19698   match(Set dst (MaxReductionV  src1 src2));
19699   match(Set dst (UMinReductionV  src1 src2));
19700   match(Set dst (UMaxReductionV  src1 src2));
19701   effect(TEMP vtmp1, TEMP vtmp2);
19702   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19703   ins_encode %{
19704     int opcode = this->ideal_Opcode();
19705     int vlen = Matcher::vector_length(this, $src2);
19706     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19707   %}
19708   ins_pipe( pipe_slow );
19709 %}
19710 
19711 // =======================Short Reduction==========================================
19712 
19713 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19714   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19715   match(Set dst (AddReductionVI src1 src2));
19716   match(Set dst (MulReductionVI src1 src2));
19717   match(Set dst (AndReductionV  src1 src2));
19718   match(Set dst ( OrReductionV  src1 src2));
19719   match(Set dst (XorReductionV  src1 src2));
19720   match(Set dst (MinReductionV  src1 src2));
19721   match(Set dst (MaxReductionV  src1 src2));
19722   match(Set dst (UMinReductionV  src1 src2));
19723   match(Set dst (UMaxReductionV  src1 src2));
19724   effect(TEMP vtmp1, TEMP vtmp2);
19725   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19726   ins_encode %{
19727     int opcode = this->ideal_Opcode();
19728     int vlen = Matcher::vector_length(this, $src2);
19729     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19730   %}
19731   ins_pipe( pipe_slow );
19732 %}
19733 
19734 // =======================Mul Reduction==========================================
19735 
19736 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19737   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19738             Matcher::vector_length(n->in(2)) <= 32); // src2
19739   match(Set dst (MulReductionVI src1 src2));
19740   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19741   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19742   ins_encode %{
19743     int opcode = this->ideal_Opcode();
19744     int vlen = Matcher::vector_length(this, $src2);
19745     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19746   %}
19747   ins_pipe( pipe_slow );
19748 %}
19749 
19750 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19751   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19752             Matcher::vector_length(n->in(2)) == 64); // src2
19753   match(Set dst (MulReductionVI src1 src2));
19754   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19755   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19756   ins_encode %{
19757     int opcode = this->ideal_Opcode();
19758     int vlen = Matcher::vector_length(this, $src2);
19759     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19760   %}
19761   ins_pipe( pipe_slow );
19762 %}
19763 
19764 //--------------------Min/Max Float Reduction --------------------
19765 // Float Min Reduction
19766 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19767                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19768   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19769             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19770              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19771             Matcher::vector_length(n->in(2)) == 2);
19772   match(Set dst (MinReductionV src1 src2));
19773   match(Set dst (MaxReductionV src1 src2));
19774   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19775   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19776   ins_encode %{
19777     assert(UseAVX > 0, "sanity");
19778 
19779     int opcode = this->ideal_Opcode();
19780     int vlen = Matcher::vector_length(this, $src2);
19781     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19782                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19783   %}
19784   ins_pipe( pipe_slow );
19785 %}
19786 
19787 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19788                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19789   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19790             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19791              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19792             Matcher::vector_length(n->in(2)) >= 4);
19793   match(Set dst (MinReductionV src1 src2));
19794   match(Set dst (MaxReductionV src1 src2));
19795   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19796   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19797   ins_encode %{
19798     assert(UseAVX > 0, "sanity");
19799 
19800     int opcode = this->ideal_Opcode();
19801     int vlen = Matcher::vector_length(this, $src2);
19802     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19803                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19804   %}
19805   ins_pipe( pipe_slow );
19806 %}
19807 
19808 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19809                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19810   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19811             Matcher::vector_length(n->in(2)) == 2);
19812   match(Set dst (MinReductionV dst src));
19813   match(Set dst (MaxReductionV dst src));
19814   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19815   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19816   ins_encode %{
19817     assert(UseAVX > 0, "sanity");
19818 
19819     int opcode = this->ideal_Opcode();
19820     int vlen = Matcher::vector_length(this, $src);
19821     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19822                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19823   %}
19824   ins_pipe( pipe_slow );
19825 %}
19826 
19827 
19828 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19829                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19830   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19831             Matcher::vector_length(n->in(2)) >= 4);
19832   match(Set dst (MinReductionV dst src));
19833   match(Set dst (MaxReductionV dst src));
19834   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19835   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19836   ins_encode %{
19837     assert(UseAVX > 0, "sanity");
19838 
19839     int opcode = this->ideal_Opcode();
19840     int vlen = Matcher::vector_length(this, $src);
19841     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19842                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19843   %}
19844   ins_pipe( pipe_slow );
19845 %}
19846 
19847 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19848   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19849             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19850              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19851             Matcher::vector_length(n->in(2)) == 2);
19852   match(Set dst (MinReductionV src1 src2));
19853   match(Set dst (MaxReductionV src1 src2));
19854   effect(TEMP dst, TEMP xtmp1);
19855   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19856   ins_encode %{
19857     int opcode = this->ideal_Opcode();
19858     int vlen = Matcher::vector_length(this, $src2);
19859     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19860                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19861   %}
19862   ins_pipe( pipe_slow );
19863 %}
19864 
19865 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19866   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19867             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19868              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19869             Matcher::vector_length(n->in(2)) >= 4);
19870   match(Set dst (MinReductionV src1 src2));
19871   match(Set dst (MaxReductionV src1 src2));
19872   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19873   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19874   ins_encode %{
19875     int opcode = this->ideal_Opcode();
19876     int vlen = Matcher::vector_length(this, $src2);
19877     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19878                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19879   %}
19880   ins_pipe( pipe_slow );
19881 %}
19882 
19883 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19884   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19885             Matcher::vector_length(n->in(2)) == 2);
19886   match(Set dst (MinReductionV dst src));
19887   match(Set dst (MaxReductionV dst src));
19888   effect(TEMP dst, TEMP xtmp1);
19889   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19890   ins_encode %{
19891     int opcode = this->ideal_Opcode();
19892     int vlen = Matcher::vector_length(this, $src);
19893     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19894                          $xtmp1$$XMMRegister);
19895   %}
19896   ins_pipe( pipe_slow );
19897 %}
19898 
19899 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19900   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19901             Matcher::vector_length(n->in(2)) >= 4);
19902   match(Set dst (MinReductionV dst src));
19903   match(Set dst (MaxReductionV dst src));
19904   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19905   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19906   ins_encode %{
19907     int opcode = this->ideal_Opcode();
19908     int vlen = Matcher::vector_length(this, $src);
19909     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19910                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19911   %}
19912   ins_pipe( pipe_slow );
19913 %}
19914 
19915 //--------------------Min Double Reduction --------------------
19916 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19917                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19918   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19919             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19920              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19921             Matcher::vector_length(n->in(2)) == 2);
19922   match(Set dst (MinReductionV src1 src2));
19923   match(Set dst (MaxReductionV src1 src2));
19924   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19925   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19926   ins_encode %{
19927     assert(UseAVX > 0, "sanity");
19928 
19929     int opcode = this->ideal_Opcode();
19930     int vlen = Matcher::vector_length(this, $src2);
19931     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19932                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19933   %}
19934   ins_pipe( pipe_slow );
19935 %}
19936 
19937 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19938                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19939   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19940             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19941              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19942             Matcher::vector_length(n->in(2)) >= 4);
19943   match(Set dst (MinReductionV src1 src2));
19944   match(Set dst (MaxReductionV src1 src2));
19945   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19946   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19947   ins_encode %{
19948     assert(UseAVX > 0, "sanity");
19949 
19950     int opcode = this->ideal_Opcode();
19951     int vlen = Matcher::vector_length(this, $src2);
19952     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19953                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19954   %}
19955   ins_pipe( pipe_slow );
19956 %}
19957 
19958 
19959 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19960                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19961   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19962             Matcher::vector_length(n->in(2)) == 2);
19963   match(Set dst (MinReductionV dst src));
19964   match(Set dst (MaxReductionV dst src));
19965   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19966   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19967   ins_encode %{
19968     assert(UseAVX > 0, "sanity");
19969 
19970     int opcode = this->ideal_Opcode();
19971     int vlen = Matcher::vector_length(this, $src);
19972     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19973                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19974   %}
19975   ins_pipe( pipe_slow );
19976 %}
19977 
19978 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19979                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19980   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19981             Matcher::vector_length(n->in(2)) >= 4);
19982   match(Set dst (MinReductionV dst src));
19983   match(Set dst (MaxReductionV dst src));
19984   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19985   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19986   ins_encode %{
19987     assert(UseAVX > 0, "sanity");
19988 
19989     int opcode = this->ideal_Opcode();
19990     int vlen = Matcher::vector_length(this, $src);
19991     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19992                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19993   %}
19994   ins_pipe( pipe_slow );
19995 %}
19996 
19997 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19998   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19999             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20000              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20001             Matcher::vector_length(n->in(2)) == 2);
20002   match(Set dst (MinReductionV src1 src2));
20003   match(Set dst (MaxReductionV src1 src2));
20004   effect(TEMP dst, TEMP xtmp1);
20005   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20006   ins_encode %{
20007     int opcode = this->ideal_Opcode();
20008     int vlen = Matcher::vector_length(this, $src2);
20009     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20010                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20011   %}
20012   ins_pipe( pipe_slow );
20013 %}
20014 
20015 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20016   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20017             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20018              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20019             Matcher::vector_length(n->in(2)) >= 4);
20020   match(Set dst (MinReductionV src1 src2));
20021   match(Set dst (MaxReductionV src1 src2));
20022   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20023   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20024   ins_encode %{
20025     int opcode = this->ideal_Opcode();
20026     int vlen = Matcher::vector_length(this, $src2);
20027     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20028                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20029   %}
20030   ins_pipe( pipe_slow );
20031 %}
20032 
20033 
20034 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20035   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20036             Matcher::vector_length(n->in(2)) == 2);
20037   match(Set dst (MinReductionV dst src));
20038   match(Set dst (MaxReductionV dst src));
20039   effect(TEMP dst, TEMP xtmp1);
20040   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20041   ins_encode %{
20042     int opcode = this->ideal_Opcode();
20043     int vlen = Matcher::vector_length(this, $src);
20044     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20045                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20046   %}
20047   ins_pipe( pipe_slow );
20048 %}
20049 
20050 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20051   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20052             Matcher::vector_length(n->in(2)) >= 4);
20053   match(Set dst (MinReductionV dst src));
20054   match(Set dst (MaxReductionV dst src));
20055   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20056   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20057   ins_encode %{
20058     int opcode = this->ideal_Opcode();
20059     int vlen = Matcher::vector_length(this, $src);
20060     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20061                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20062   %}
20063   ins_pipe( pipe_slow );
20064 %}
20065 
20066 // ====================VECTOR ARITHMETIC=======================================
20067 
20068 // --------------------------------- ADD --------------------------------------
20069 
20070 // Bytes vector add
20071 instruct vaddB(vec dst, vec src) %{
20072   predicate(UseAVX == 0);
20073   match(Set dst (AddVB dst src));
20074   format %{ "paddb   $dst,$src\t! add packedB" %}
20075   ins_encode %{
20076     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20077   %}
20078   ins_pipe( pipe_slow );
20079 %}
20080 
20081 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20082   predicate(UseAVX > 0);
20083   match(Set dst (AddVB src1 src2));
20084   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20085   ins_encode %{
20086     int vlen_enc = vector_length_encoding(this);
20087     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20088   %}
20089   ins_pipe( pipe_slow );
20090 %}
20091 
20092 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20093   predicate((UseAVX > 0) &&
20094             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20095   match(Set dst (AddVB src (LoadVector mem)));
20096   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20097   ins_encode %{
20098     int vlen_enc = vector_length_encoding(this);
20099     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20100   %}
20101   ins_pipe( pipe_slow );
20102 %}
20103 
20104 // Shorts/Chars vector add
20105 instruct vaddS(vec dst, vec src) %{
20106   predicate(UseAVX == 0);
20107   match(Set dst (AddVS dst src));
20108   format %{ "paddw   $dst,$src\t! add packedS" %}
20109   ins_encode %{
20110     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20111   %}
20112   ins_pipe( pipe_slow );
20113 %}
20114 
20115 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20116   predicate(UseAVX > 0);
20117   match(Set dst (AddVS src1 src2));
20118   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20119   ins_encode %{
20120     int vlen_enc = vector_length_encoding(this);
20121     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20122   %}
20123   ins_pipe( pipe_slow );
20124 %}
20125 
20126 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20127   predicate((UseAVX > 0) &&
20128             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20129   match(Set dst (AddVS src (LoadVector mem)));
20130   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20131   ins_encode %{
20132     int vlen_enc = vector_length_encoding(this);
20133     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20134   %}
20135   ins_pipe( pipe_slow );
20136 %}
20137 
20138 // Integers vector add
20139 instruct vaddI(vec dst, vec src) %{
20140   predicate(UseAVX == 0);
20141   match(Set dst (AddVI dst src));
20142   format %{ "paddd   $dst,$src\t! add packedI" %}
20143   ins_encode %{
20144     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20145   %}
20146   ins_pipe( pipe_slow );
20147 %}
20148 
20149 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20150   predicate(UseAVX > 0);
20151   match(Set dst (AddVI src1 src2));
20152   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20153   ins_encode %{
20154     int vlen_enc = vector_length_encoding(this);
20155     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20156   %}
20157   ins_pipe( pipe_slow );
20158 %}
20159 
20160 
20161 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20162   predicate((UseAVX > 0) &&
20163             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20164   match(Set dst (AddVI src (LoadVector mem)));
20165   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20166   ins_encode %{
20167     int vlen_enc = vector_length_encoding(this);
20168     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20169   %}
20170   ins_pipe( pipe_slow );
20171 %}
20172 
20173 // Longs vector add
20174 instruct vaddL(vec dst, vec src) %{
20175   predicate(UseAVX == 0);
20176   match(Set dst (AddVL dst src));
20177   format %{ "paddq   $dst,$src\t! add packedL" %}
20178   ins_encode %{
20179     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20180   %}
20181   ins_pipe( pipe_slow );
20182 %}
20183 
20184 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20185   predicate(UseAVX > 0);
20186   match(Set dst (AddVL src1 src2));
20187   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20188   ins_encode %{
20189     int vlen_enc = vector_length_encoding(this);
20190     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20191   %}
20192   ins_pipe( pipe_slow );
20193 %}
20194 
20195 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20196   predicate((UseAVX > 0) &&
20197             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20198   match(Set dst (AddVL src (LoadVector mem)));
20199   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20200   ins_encode %{
20201     int vlen_enc = vector_length_encoding(this);
20202     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20203   %}
20204   ins_pipe( pipe_slow );
20205 %}
20206 
20207 // Floats vector add
20208 instruct vaddF(vec dst, vec src) %{
20209   predicate(UseAVX == 0);
20210   match(Set dst (AddVF dst src));
20211   format %{ "addps   $dst,$src\t! add packedF" %}
20212   ins_encode %{
20213     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20214   %}
20215   ins_pipe( pipe_slow );
20216 %}
20217 
20218 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20219   predicate(UseAVX > 0);
20220   match(Set dst (AddVF src1 src2));
20221   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20222   ins_encode %{
20223     int vlen_enc = vector_length_encoding(this);
20224     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20225   %}
20226   ins_pipe( pipe_slow );
20227 %}
20228 
20229 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20230   predicate((UseAVX > 0) &&
20231             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20232   match(Set dst (AddVF src (LoadVector mem)));
20233   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20234   ins_encode %{
20235     int vlen_enc = vector_length_encoding(this);
20236     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20237   %}
20238   ins_pipe( pipe_slow );
20239 %}
20240 
20241 // Doubles vector add
20242 instruct vaddD(vec dst, vec src) %{
20243   predicate(UseAVX == 0);
20244   match(Set dst (AddVD dst src));
20245   format %{ "addpd   $dst,$src\t! add packedD" %}
20246   ins_encode %{
20247     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20248   %}
20249   ins_pipe( pipe_slow );
20250 %}
20251 
20252 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20253   predicate(UseAVX > 0);
20254   match(Set dst (AddVD src1 src2));
20255   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20256   ins_encode %{
20257     int vlen_enc = vector_length_encoding(this);
20258     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20259   %}
20260   ins_pipe( pipe_slow );
20261 %}
20262 
20263 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20264   predicate((UseAVX > 0) &&
20265             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20266   match(Set dst (AddVD src (LoadVector mem)));
20267   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20268   ins_encode %{
20269     int vlen_enc = vector_length_encoding(this);
20270     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20271   %}
20272   ins_pipe( pipe_slow );
20273 %}
20274 
20275 // --------------------------------- SUB --------------------------------------
20276 
20277 // Bytes vector sub
20278 instruct vsubB(vec dst, vec src) %{
20279   predicate(UseAVX == 0);
20280   match(Set dst (SubVB dst src));
20281   format %{ "psubb   $dst,$src\t! sub packedB" %}
20282   ins_encode %{
20283     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20284   %}
20285   ins_pipe( pipe_slow );
20286 %}
20287 
20288 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20289   predicate(UseAVX > 0);
20290   match(Set dst (SubVB src1 src2));
20291   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20292   ins_encode %{
20293     int vlen_enc = vector_length_encoding(this);
20294     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20295   %}
20296   ins_pipe( pipe_slow );
20297 %}
20298 
20299 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20300   predicate((UseAVX > 0) &&
20301             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20302   match(Set dst (SubVB src (LoadVector mem)));
20303   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20304   ins_encode %{
20305     int vlen_enc = vector_length_encoding(this);
20306     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20307   %}
20308   ins_pipe( pipe_slow );
20309 %}
20310 
20311 // Shorts/Chars vector sub
20312 instruct vsubS(vec dst, vec src) %{
20313   predicate(UseAVX == 0);
20314   match(Set dst (SubVS dst src));
20315   format %{ "psubw   $dst,$src\t! sub packedS" %}
20316   ins_encode %{
20317     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20318   %}
20319   ins_pipe( pipe_slow );
20320 %}
20321 
20322 
20323 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20324   predicate(UseAVX > 0);
20325   match(Set dst (SubVS src1 src2));
20326   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20327   ins_encode %{
20328     int vlen_enc = vector_length_encoding(this);
20329     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20330   %}
20331   ins_pipe( pipe_slow );
20332 %}
20333 
20334 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20335   predicate((UseAVX > 0) &&
20336             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20337   match(Set dst (SubVS src (LoadVector mem)));
20338   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20339   ins_encode %{
20340     int vlen_enc = vector_length_encoding(this);
20341     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20342   %}
20343   ins_pipe( pipe_slow );
20344 %}
20345 
20346 // Integers vector sub
20347 instruct vsubI(vec dst, vec src) %{
20348   predicate(UseAVX == 0);
20349   match(Set dst (SubVI dst src));
20350   format %{ "psubd   $dst,$src\t! sub packedI" %}
20351   ins_encode %{
20352     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20353   %}
20354   ins_pipe( pipe_slow );
20355 %}
20356 
20357 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20358   predicate(UseAVX > 0);
20359   match(Set dst (SubVI src1 src2));
20360   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20361   ins_encode %{
20362     int vlen_enc = vector_length_encoding(this);
20363     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20364   %}
20365   ins_pipe( pipe_slow );
20366 %}
20367 
20368 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20369   predicate((UseAVX > 0) &&
20370             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20371   match(Set dst (SubVI src (LoadVector mem)));
20372   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20373   ins_encode %{
20374     int vlen_enc = vector_length_encoding(this);
20375     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20376   %}
20377   ins_pipe( pipe_slow );
20378 %}
20379 
20380 // Longs vector sub
20381 instruct vsubL(vec dst, vec src) %{
20382   predicate(UseAVX == 0);
20383   match(Set dst (SubVL dst src));
20384   format %{ "psubq   $dst,$src\t! sub packedL" %}
20385   ins_encode %{
20386     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20387   %}
20388   ins_pipe( pipe_slow );
20389 %}
20390 
20391 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20392   predicate(UseAVX > 0);
20393   match(Set dst (SubVL src1 src2));
20394   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20395   ins_encode %{
20396     int vlen_enc = vector_length_encoding(this);
20397     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20398   %}
20399   ins_pipe( pipe_slow );
20400 %}
20401 
20402 
20403 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20404   predicate((UseAVX > 0) &&
20405             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20406   match(Set dst (SubVL src (LoadVector mem)));
20407   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20408   ins_encode %{
20409     int vlen_enc = vector_length_encoding(this);
20410     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20411   %}
20412   ins_pipe( pipe_slow );
20413 %}
20414 
20415 // Floats vector sub
20416 instruct vsubF(vec dst, vec src) %{
20417   predicate(UseAVX == 0);
20418   match(Set dst (SubVF dst src));
20419   format %{ "subps   $dst,$src\t! sub packedF" %}
20420   ins_encode %{
20421     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20422   %}
20423   ins_pipe( pipe_slow );
20424 %}
20425 
20426 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20427   predicate(UseAVX > 0);
20428   match(Set dst (SubVF src1 src2));
20429   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20430   ins_encode %{
20431     int vlen_enc = vector_length_encoding(this);
20432     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20433   %}
20434   ins_pipe( pipe_slow );
20435 %}
20436 
20437 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20438   predicate((UseAVX > 0) &&
20439             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20440   match(Set dst (SubVF src (LoadVector mem)));
20441   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20442   ins_encode %{
20443     int vlen_enc = vector_length_encoding(this);
20444     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20445   %}
20446   ins_pipe( pipe_slow );
20447 %}
20448 
20449 // Doubles vector sub
20450 instruct vsubD(vec dst, vec src) %{
20451   predicate(UseAVX == 0);
20452   match(Set dst (SubVD dst src));
20453   format %{ "subpd   $dst,$src\t! sub packedD" %}
20454   ins_encode %{
20455     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20456   %}
20457   ins_pipe( pipe_slow );
20458 %}
20459 
20460 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20461   predicate(UseAVX > 0);
20462   match(Set dst (SubVD src1 src2));
20463   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20464   ins_encode %{
20465     int vlen_enc = vector_length_encoding(this);
20466     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20467   %}
20468   ins_pipe( pipe_slow );
20469 %}
20470 
20471 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20472   predicate((UseAVX > 0) &&
20473             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20474   match(Set dst (SubVD src (LoadVector mem)));
20475   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20476   ins_encode %{
20477     int vlen_enc = vector_length_encoding(this);
20478     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20479   %}
20480   ins_pipe( pipe_slow );
20481 %}
20482 
20483 // --------------------------------- MUL --------------------------------------
20484 
20485 // Byte vector mul
20486 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20487   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20488   match(Set dst (MulVB src1 src2));
20489   effect(TEMP dst, TEMP xtmp);
20490   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20491   ins_encode %{
20492     assert(UseSSE > 3, "required");
20493     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20494     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20495     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20496     __ psllw($dst$$XMMRegister, 8);
20497     __ psrlw($dst$$XMMRegister, 8);
20498     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20499   %}
20500   ins_pipe( pipe_slow );
20501 %}
20502 
20503 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20504   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20505   match(Set dst (MulVB src1 src2));
20506   effect(TEMP dst, TEMP xtmp);
20507   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20508   ins_encode %{
20509     assert(UseSSE > 3, "required");
20510     // Odd-index elements
20511     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20512     __ psrlw($dst$$XMMRegister, 8);
20513     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20514     __ psrlw($xtmp$$XMMRegister, 8);
20515     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20516     __ psllw($dst$$XMMRegister, 8);
20517     // Even-index elements
20518     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20519     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20520     __ psllw($xtmp$$XMMRegister, 8);
20521     __ psrlw($xtmp$$XMMRegister, 8);
20522     // Combine
20523     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20524   %}
20525   ins_pipe( pipe_slow );
20526 %}
20527 
20528 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20529   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20530   match(Set dst (MulVB src1 src2));
20531   effect(TEMP xtmp1, TEMP xtmp2);
20532   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20533   ins_encode %{
20534     int vlen_enc = vector_length_encoding(this);
20535     // Odd-index elements
20536     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20537     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20538     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20539     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20540     // Even-index elements
20541     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20542     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20543     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20544     // Combine
20545     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20546   %}
20547   ins_pipe( pipe_slow );
20548 %}
20549 
20550 // Shorts/Chars vector mul
20551 instruct vmulS(vec dst, vec src) %{
20552   predicate(UseAVX == 0);
20553   match(Set dst (MulVS dst src));
20554   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20555   ins_encode %{
20556     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20557   %}
20558   ins_pipe( pipe_slow );
20559 %}
20560 
20561 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20562   predicate(UseAVX > 0);
20563   match(Set dst (MulVS src1 src2));
20564   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20565   ins_encode %{
20566     int vlen_enc = vector_length_encoding(this);
20567     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20568   %}
20569   ins_pipe( pipe_slow );
20570 %}
20571 
20572 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20573   predicate((UseAVX > 0) &&
20574             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20575   match(Set dst (MulVS src (LoadVector mem)));
20576   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20577   ins_encode %{
20578     int vlen_enc = vector_length_encoding(this);
20579     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20580   %}
20581   ins_pipe( pipe_slow );
20582 %}
20583 
20584 // Integers vector mul
20585 instruct vmulI(vec dst, vec src) %{
20586   predicate(UseAVX == 0);
20587   match(Set dst (MulVI dst src));
20588   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20589   ins_encode %{
20590     assert(UseSSE > 3, "required");
20591     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20592   %}
20593   ins_pipe( pipe_slow );
20594 %}
20595 
20596 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20597   predicate(UseAVX > 0);
20598   match(Set dst (MulVI src1 src2));
20599   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20600   ins_encode %{
20601     int vlen_enc = vector_length_encoding(this);
20602     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20603   %}
20604   ins_pipe( pipe_slow );
20605 %}
20606 
20607 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20608   predicate((UseAVX > 0) &&
20609             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20610   match(Set dst (MulVI src (LoadVector mem)));
20611   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20612   ins_encode %{
20613     int vlen_enc = vector_length_encoding(this);
20614     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20615   %}
20616   ins_pipe( pipe_slow );
20617 %}
20618 
20619 // Longs vector mul
20620 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20621   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20622              VM_Version::supports_avx512dq()) ||
20623             VM_Version::supports_avx512vldq());
20624   match(Set dst (MulVL src1 src2));
20625   ins_cost(500);
20626   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20627   ins_encode %{
20628     assert(UseAVX > 2, "required");
20629     int vlen_enc = vector_length_encoding(this);
20630     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20631   %}
20632   ins_pipe( pipe_slow );
20633 %}
20634 
20635 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20636   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20637              VM_Version::supports_avx512dq()) ||
20638             (Matcher::vector_length_in_bytes(n) > 8 &&
20639              VM_Version::supports_avx512vldq()));
20640   match(Set dst (MulVL src (LoadVector mem)));
20641   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20642   ins_cost(500);
20643   ins_encode %{
20644     assert(UseAVX > 2, "required");
20645     int vlen_enc = vector_length_encoding(this);
20646     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20647   %}
20648   ins_pipe( pipe_slow );
20649 %}
20650 
20651 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20652   predicate(UseAVX == 0);
20653   match(Set dst (MulVL src1 src2));
20654   ins_cost(500);
20655   effect(TEMP dst, TEMP xtmp);
20656   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20657   ins_encode %{
20658     assert(VM_Version::supports_sse4_1(), "required");
20659     // Get the lo-hi products, only the lower 32 bits is in concerns
20660     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20661     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20662     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20663     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20664     __ psllq($dst$$XMMRegister, 32);
20665     // Get the lo-lo products
20666     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20667     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20668     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20669   %}
20670   ins_pipe( pipe_slow );
20671 %}
20672 
20673 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20674   predicate(UseAVX > 0 &&
20675             ((Matcher::vector_length_in_bytes(n) == 64 &&
20676               !VM_Version::supports_avx512dq()) ||
20677              (Matcher::vector_length_in_bytes(n) < 64 &&
20678               !VM_Version::supports_avx512vldq())));
20679   match(Set dst (MulVL src1 src2));
20680   effect(TEMP xtmp1, TEMP xtmp2);
20681   ins_cost(500);
20682   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20683   ins_encode %{
20684     int vlen_enc = vector_length_encoding(this);
20685     // Get the lo-hi products, only the lower 32 bits is in concerns
20686     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20687     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20688     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20689     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20690     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20691     // Get the lo-lo products
20692     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20693     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20694   %}
20695   ins_pipe( pipe_slow );
20696 %}
20697 
20698 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20699   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20700   match(Set dst (MulVL src1 src2));
20701   ins_cost(100);
20702   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20703   ins_encode %{
20704     int vlen_enc = vector_length_encoding(this);
20705     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20706   %}
20707   ins_pipe( pipe_slow );
20708 %}
20709 
20710 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20711   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20712   match(Set dst (MulVL src1 src2));
20713   ins_cost(100);
20714   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20715   ins_encode %{
20716     int vlen_enc = vector_length_encoding(this);
20717     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20718   %}
20719   ins_pipe( pipe_slow );
20720 %}
20721 
20722 // Floats vector mul
20723 instruct vmulF(vec dst, vec src) %{
20724   predicate(UseAVX == 0);
20725   match(Set dst (MulVF dst src));
20726   format %{ "mulps   $dst,$src\t! mul packedF" %}
20727   ins_encode %{
20728     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20729   %}
20730   ins_pipe( pipe_slow );
20731 %}
20732 
20733 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20734   predicate(UseAVX > 0);
20735   match(Set dst (MulVF src1 src2));
20736   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20737   ins_encode %{
20738     int vlen_enc = vector_length_encoding(this);
20739     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20740   %}
20741   ins_pipe( pipe_slow );
20742 %}
20743 
20744 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20745   predicate((UseAVX > 0) &&
20746             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20747   match(Set dst (MulVF src (LoadVector mem)));
20748   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20749   ins_encode %{
20750     int vlen_enc = vector_length_encoding(this);
20751     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20752   %}
20753   ins_pipe( pipe_slow );
20754 %}
20755 
20756 // Doubles vector mul
20757 instruct vmulD(vec dst, vec src) %{
20758   predicate(UseAVX == 0);
20759   match(Set dst (MulVD dst src));
20760   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20761   ins_encode %{
20762     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20763   %}
20764   ins_pipe( pipe_slow );
20765 %}
20766 
20767 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20768   predicate(UseAVX > 0);
20769   match(Set dst (MulVD src1 src2));
20770   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20771   ins_encode %{
20772     int vlen_enc = vector_length_encoding(this);
20773     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20774   %}
20775   ins_pipe( pipe_slow );
20776 %}
20777 
20778 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20779   predicate((UseAVX > 0) &&
20780             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20781   match(Set dst (MulVD src (LoadVector mem)));
20782   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20783   ins_encode %{
20784     int vlen_enc = vector_length_encoding(this);
20785     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20786   %}
20787   ins_pipe( pipe_slow );
20788 %}
20789 
20790 // --------------------------------- DIV --------------------------------------
20791 
20792 // Floats vector div
20793 instruct vdivF(vec dst, vec src) %{
20794   predicate(UseAVX == 0);
20795   match(Set dst (DivVF dst src));
20796   format %{ "divps   $dst,$src\t! div packedF" %}
20797   ins_encode %{
20798     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20799   %}
20800   ins_pipe( pipe_slow );
20801 %}
20802 
20803 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20804   predicate(UseAVX > 0);
20805   match(Set dst (DivVF src1 src2));
20806   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20807   ins_encode %{
20808     int vlen_enc = vector_length_encoding(this);
20809     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20810   %}
20811   ins_pipe( pipe_slow );
20812 %}
20813 
20814 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20815   predicate((UseAVX > 0) &&
20816             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20817   match(Set dst (DivVF src (LoadVector mem)));
20818   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20819   ins_encode %{
20820     int vlen_enc = vector_length_encoding(this);
20821     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20822   %}
20823   ins_pipe( pipe_slow );
20824 %}
20825 
20826 // Doubles vector div
20827 instruct vdivD(vec dst, vec src) %{
20828   predicate(UseAVX == 0);
20829   match(Set dst (DivVD dst src));
20830   format %{ "divpd   $dst,$src\t! div packedD" %}
20831   ins_encode %{
20832     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20833   %}
20834   ins_pipe( pipe_slow );
20835 %}
20836 
20837 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20838   predicate(UseAVX > 0);
20839   match(Set dst (DivVD src1 src2));
20840   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20841   ins_encode %{
20842     int vlen_enc = vector_length_encoding(this);
20843     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20844   %}
20845   ins_pipe( pipe_slow );
20846 %}
20847 
20848 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20849   predicate((UseAVX > 0) &&
20850             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20851   match(Set dst (DivVD src (LoadVector mem)));
20852   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20853   ins_encode %{
20854     int vlen_enc = vector_length_encoding(this);
20855     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20856   %}
20857   ins_pipe( pipe_slow );
20858 %}
20859 
20860 // ------------------------------ MinMax ---------------------------------------
20861 
20862 // Byte, Short, Int vector Min/Max
20863 instruct minmax_reg_sse(vec dst, vec src) %{
20864   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20865             UseAVX == 0);
20866   match(Set dst (MinV dst src));
20867   match(Set dst (MaxV dst src));
20868   format %{ "vector_minmax  $dst,$src\t!  " %}
20869   ins_encode %{
20870     assert(UseSSE >= 4, "required");
20871 
20872     int opcode = this->ideal_Opcode();
20873     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20874     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20875   %}
20876   ins_pipe( pipe_slow );
20877 %}
20878 
20879 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20880   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20881             UseAVX > 0);
20882   match(Set dst (MinV src1 src2));
20883   match(Set dst (MaxV src1 src2));
20884   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20885   ins_encode %{
20886     int opcode = this->ideal_Opcode();
20887     int vlen_enc = vector_length_encoding(this);
20888     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20889 
20890     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20891   %}
20892   ins_pipe( pipe_slow );
20893 %}
20894 
20895 // Long vector Min/Max
20896 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20897   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20898             UseAVX == 0);
20899   match(Set dst (MinV dst src));
20900   match(Set dst (MaxV src dst));
20901   effect(TEMP dst, TEMP tmp);
20902   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20903   ins_encode %{
20904     assert(UseSSE >= 4, "required");
20905 
20906     int opcode = this->ideal_Opcode();
20907     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20908     assert(elem_bt == T_LONG, "sanity");
20909 
20910     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20911   %}
20912   ins_pipe( pipe_slow );
20913 %}
20914 
20915 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20916   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20917             UseAVX > 0 && !VM_Version::supports_avx512vl());
20918   match(Set dst (MinV src1 src2));
20919   match(Set dst (MaxV src1 src2));
20920   effect(TEMP dst);
20921   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20922   ins_encode %{
20923     int vlen_enc = vector_length_encoding(this);
20924     int opcode = this->ideal_Opcode();
20925     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20926     assert(elem_bt == T_LONG, "sanity");
20927 
20928     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20929   %}
20930   ins_pipe( pipe_slow );
20931 %}
20932 
20933 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20934   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20935             Matcher::vector_element_basic_type(n) == T_LONG);
20936   match(Set dst (MinV src1 src2));
20937   match(Set dst (MaxV src1 src2));
20938   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20939   ins_encode %{
20940     assert(UseAVX > 2, "required");
20941 
20942     int vlen_enc = vector_length_encoding(this);
20943     int opcode = this->ideal_Opcode();
20944     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20945     assert(elem_bt == T_LONG, "sanity");
20946 
20947     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20948   %}
20949   ins_pipe( pipe_slow );
20950 %}
20951 
20952 // Float/Double vector Min/Max
20953 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20954   predicate(VM_Version::supports_avx10_2() &&
20955             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20956   match(Set dst (MinV a b));
20957   match(Set dst (MaxV a b));
20958   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20959   ins_encode %{
20960     int vlen_enc = vector_length_encoding(this);
20961     int opcode = this->ideal_Opcode();
20962     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20963     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20964   %}
20965   ins_pipe( pipe_slow );
20966 %}
20967 
20968 // Float/Double vector Min/Max
20969 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20970   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20971             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20972             UseAVX > 0);
20973   match(Set dst (MinV a b));
20974   match(Set dst (MaxV a b));
20975   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20976   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20977   ins_encode %{
20978     assert(UseAVX > 0, "required");
20979 
20980     int opcode = this->ideal_Opcode();
20981     int vlen_enc = vector_length_encoding(this);
20982     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20983 
20984     __ vminmax_fp(opcode, elem_bt,
20985                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20986                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20987   %}
20988   ins_pipe( pipe_slow );
20989 %}
20990 
20991 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20992   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20993             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20994   match(Set dst (MinV a b));
20995   match(Set dst (MaxV a b));
20996   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20997   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20998   ins_encode %{
20999     assert(UseAVX > 2, "required");
21000 
21001     int opcode = this->ideal_Opcode();
21002     int vlen_enc = vector_length_encoding(this);
21003     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21004 
21005     __ evminmax_fp(opcode, elem_bt,
21006                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21007                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21008   %}
21009   ins_pipe( pipe_slow );
21010 %}
21011 
21012 // ------------------------------ Unsigned vector Min/Max ----------------------
21013 
21014 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21015   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21016   match(Set dst (UMinV a b));
21017   match(Set dst (UMaxV a b));
21018   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21019   ins_encode %{
21020     int opcode = this->ideal_Opcode();
21021     int vlen_enc = vector_length_encoding(this);
21022     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21023     assert(is_integral_type(elem_bt), "");
21024     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21025   %}
21026   ins_pipe( pipe_slow );
21027 %}
21028 
21029 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21030   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21031   match(Set dst (UMinV a (LoadVector b)));
21032   match(Set dst (UMaxV a (LoadVector b)));
21033   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21034   ins_encode %{
21035     int opcode = this->ideal_Opcode();
21036     int vlen_enc = vector_length_encoding(this);
21037     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21038     assert(is_integral_type(elem_bt), "");
21039     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21040   %}
21041   ins_pipe( pipe_slow );
21042 %}
21043 
21044 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21045   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21046   match(Set dst (UMinV a b));
21047   match(Set dst (UMaxV a b));
21048   effect(TEMP xtmp1, TEMP xtmp2);
21049   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21050   ins_encode %{
21051     int opcode = this->ideal_Opcode();
21052     int vlen_enc = vector_length_encoding(this);
21053     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21054   %}
21055   ins_pipe( pipe_slow );
21056 %}
21057 
21058 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21059   match(Set dst (UMinV (Binary dst src2) mask));
21060   match(Set dst (UMaxV (Binary dst src2) mask));
21061   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21062   ins_encode %{
21063     int vlen_enc = vector_length_encoding(this);
21064     BasicType bt = Matcher::vector_element_basic_type(this);
21065     int opc = this->ideal_Opcode();
21066     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21067                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21068   %}
21069   ins_pipe( pipe_slow );
21070 %}
21071 
21072 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21073   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21074   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21075   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21076   ins_encode %{
21077     int vlen_enc = vector_length_encoding(this);
21078     BasicType bt = Matcher::vector_element_basic_type(this);
21079     int opc = this->ideal_Opcode();
21080     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21081                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21082   %}
21083   ins_pipe( pipe_slow );
21084 %}
21085 
21086 // --------------------------------- Signum/CopySign ---------------------------
21087 
21088 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21089   match(Set dst (SignumF dst (Binary zero one)));
21090   effect(KILL cr);
21091   format %{ "signumF $dst, $dst" %}
21092   ins_encode %{
21093     int opcode = this->ideal_Opcode();
21094     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21095   %}
21096   ins_pipe( pipe_slow );
21097 %}
21098 
21099 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21100   match(Set dst (SignumD dst (Binary zero one)));
21101   effect(KILL cr);
21102   format %{ "signumD $dst, $dst" %}
21103   ins_encode %{
21104     int opcode = this->ideal_Opcode();
21105     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21106   %}
21107   ins_pipe( pipe_slow );
21108 %}
21109 
21110 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21111   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21112   match(Set dst (SignumVF src (Binary zero one)));
21113   match(Set dst (SignumVD src (Binary zero one)));
21114   effect(TEMP dst, TEMP xtmp1);
21115   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21116   ins_encode %{
21117     int opcode = this->ideal_Opcode();
21118     int vec_enc = vector_length_encoding(this);
21119     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21120                          $xtmp1$$XMMRegister, vec_enc);
21121   %}
21122   ins_pipe( pipe_slow );
21123 %}
21124 
21125 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21126   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21127   match(Set dst (SignumVF src (Binary zero one)));
21128   match(Set dst (SignumVD src (Binary zero one)));
21129   effect(TEMP dst, TEMP ktmp1);
21130   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21131   ins_encode %{
21132     int opcode = this->ideal_Opcode();
21133     int vec_enc = vector_length_encoding(this);
21134     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21135                           $ktmp1$$KRegister, vec_enc);
21136   %}
21137   ins_pipe( pipe_slow );
21138 %}
21139 
21140 // ---------------------------------------
21141 // For copySign use 0xE4 as writemask for vpternlog
21142 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21143 // C (xmm2) is set to 0x7FFFFFFF
21144 // Wherever xmm2 is 0, we want to pick from B (sign)
21145 // Wherever xmm2 is 1, we want to pick from A (src)
21146 //
21147 // A B C Result
21148 // 0 0 0 0
21149 // 0 0 1 0
21150 // 0 1 0 1
21151 // 0 1 1 0
21152 // 1 0 0 0
21153 // 1 0 1 1
21154 // 1 1 0 1
21155 // 1 1 1 1
21156 //
21157 // Result going from high bit to low bit is 0x11100100 = 0xe4
21158 // ---------------------------------------
21159 
21160 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21161   match(Set dst (CopySignF dst src));
21162   effect(TEMP tmp1, TEMP tmp2);
21163   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21164   ins_encode %{
21165     __ movl($tmp2$$Register, 0x7FFFFFFF);
21166     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21167     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21168   %}
21169   ins_pipe( pipe_slow );
21170 %}
21171 
21172 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21173   match(Set dst (CopySignD dst (Binary src zero)));
21174   ins_cost(100);
21175   effect(TEMP tmp1, TEMP tmp2);
21176   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21177   ins_encode %{
21178     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21179     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21180     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21181   %}
21182   ins_pipe( pipe_slow );
21183 %}
21184 
21185 //----------------------------- CompressBits/ExpandBits ------------------------
21186 
21187 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21188   predicate(n->bottom_type()->isa_int());
21189   match(Set dst (CompressBits src mask));
21190   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21191   ins_encode %{
21192     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21193   %}
21194   ins_pipe( pipe_slow );
21195 %}
21196 
21197 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21198   predicate(n->bottom_type()->isa_int());
21199   match(Set dst (ExpandBits src mask));
21200   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21201   ins_encode %{
21202     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21203   %}
21204   ins_pipe( pipe_slow );
21205 %}
21206 
21207 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21208   predicate(n->bottom_type()->isa_int());
21209   match(Set dst (CompressBits src (LoadI mask)));
21210   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21211   ins_encode %{
21212     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21213   %}
21214   ins_pipe( pipe_slow );
21215 %}
21216 
21217 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21218   predicate(n->bottom_type()->isa_int());
21219   match(Set dst (ExpandBits src (LoadI mask)));
21220   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21221   ins_encode %{
21222     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21223   %}
21224   ins_pipe( pipe_slow );
21225 %}
21226 
21227 // --------------------------------- Sqrt --------------------------------------
21228 
21229 instruct vsqrtF_reg(vec dst, vec src) %{
21230   match(Set dst (SqrtVF src));
21231   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21232   ins_encode %{
21233     assert(UseAVX > 0, "required");
21234     int vlen_enc = vector_length_encoding(this);
21235     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21236   %}
21237   ins_pipe( pipe_slow );
21238 %}
21239 
21240 instruct vsqrtF_mem(vec dst, memory mem) %{
21241   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21242   match(Set dst (SqrtVF (LoadVector mem)));
21243   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21244   ins_encode %{
21245     assert(UseAVX > 0, "required");
21246     int vlen_enc = vector_length_encoding(this);
21247     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21248   %}
21249   ins_pipe( pipe_slow );
21250 %}
21251 
21252 // Floating point vector sqrt
21253 instruct vsqrtD_reg(vec dst, vec src) %{
21254   match(Set dst (SqrtVD src));
21255   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21256   ins_encode %{
21257     assert(UseAVX > 0, "required");
21258     int vlen_enc = vector_length_encoding(this);
21259     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21260   %}
21261   ins_pipe( pipe_slow );
21262 %}
21263 
21264 instruct vsqrtD_mem(vec dst, memory mem) %{
21265   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21266   match(Set dst (SqrtVD (LoadVector mem)));
21267   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21268   ins_encode %{
21269     assert(UseAVX > 0, "required");
21270     int vlen_enc = vector_length_encoding(this);
21271     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21272   %}
21273   ins_pipe( pipe_slow );
21274 %}
21275 
21276 // ------------------------------ Shift ---------------------------------------
21277 
21278 // Left and right shift count vectors are the same on x86
21279 // (only lowest bits of xmm reg are used for count).
21280 instruct vshiftcnt(vec dst, rRegI cnt) %{
21281   match(Set dst (LShiftCntV cnt));
21282   match(Set dst (RShiftCntV cnt));
21283   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21284   ins_encode %{
21285     __ movdl($dst$$XMMRegister, $cnt$$Register);
21286   %}
21287   ins_pipe( pipe_slow );
21288 %}
21289 
21290 // Byte vector shift
21291 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21292   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21293   match(Set dst ( LShiftVB src shift));
21294   match(Set dst ( RShiftVB src shift));
21295   match(Set dst (URShiftVB src shift));
21296   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21297   format %{"vector_byte_shift $dst,$src,$shift" %}
21298   ins_encode %{
21299     assert(UseSSE > 3, "required");
21300     int opcode = this->ideal_Opcode();
21301     bool sign = (opcode != Op_URShiftVB);
21302     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21303     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21304     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21305     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21306     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21307   %}
21308   ins_pipe( pipe_slow );
21309 %}
21310 
21311 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21312   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21313             UseAVX <= 1);
21314   match(Set dst ( LShiftVB src shift));
21315   match(Set dst ( RShiftVB src shift));
21316   match(Set dst (URShiftVB src shift));
21317   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21318   format %{"vector_byte_shift $dst,$src,$shift" %}
21319   ins_encode %{
21320     assert(UseSSE > 3, "required");
21321     int opcode = this->ideal_Opcode();
21322     bool sign = (opcode != Op_URShiftVB);
21323     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21324     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21325     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21326     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21327     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21328     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21329     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21330     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21331     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21332   %}
21333   ins_pipe( pipe_slow );
21334 %}
21335 
21336 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21337   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21338             UseAVX > 1);
21339   match(Set dst ( LShiftVB src shift));
21340   match(Set dst ( RShiftVB src shift));
21341   match(Set dst (URShiftVB src shift));
21342   effect(TEMP dst, TEMP tmp);
21343   format %{"vector_byte_shift $dst,$src,$shift" %}
21344   ins_encode %{
21345     int opcode = this->ideal_Opcode();
21346     bool sign = (opcode != Op_URShiftVB);
21347     int vlen_enc = Assembler::AVX_256bit;
21348     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21349     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21350     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21351     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21352     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21353   %}
21354   ins_pipe( pipe_slow );
21355 %}
21356 
21357 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21358   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21359   match(Set dst ( LShiftVB src shift));
21360   match(Set dst ( RShiftVB src shift));
21361   match(Set dst (URShiftVB src shift));
21362   effect(TEMP dst, TEMP tmp);
21363   format %{"vector_byte_shift $dst,$src,$shift" %}
21364   ins_encode %{
21365     assert(UseAVX > 1, "required");
21366     int opcode = this->ideal_Opcode();
21367     bool sign = (opcode != Op_URShiftVB);
21368     int vlen_enc = Assembler::AVX_256bit;
21369     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21370     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21371     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21372     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21373     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21374     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21375     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21376     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21377     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21378   %}
21379   ins_pipe( pipe_slow );
21380 %}
21381 
21382 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21383   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21384   match(Set dst ( LShiftVB src shift));
21385   match(Set dst  (RShiftVB src shift));
21386   match(Set dst (URShiftVB src shift));
21387   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21388   format %{"vector_byte_shift $dst,$src,$shift" %}
21389   ins_encode %{
21390     assert(UseAVX > 2, "required");
21391     int opcode = this->ideal_Opcode();
21392     bool sign = (opcode != Op_URShiftVB);
21393     int vlen_enc = Assembler::AVX_512bit;
21394     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21395     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21396     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21397     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21398     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21399     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21400     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21401     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21403     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21404     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21405     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21406   %}
21407   ins_pipe( pipe_slow );
21408 %}
21409 
21410 // Shorts vector logical right shift produces incorrect Java result
21411 // for negative data because java code convert short value into int with
21412 // sign extension before a shift. But char vectors are fine since chars are
21413 // unsigned values.
21414 // Shorts/Chars vector left shift
21415 instruct vshiftS(vec dst, vec src, vec shift) %{
21416   predicate(!n->as_ShiftV()->is_var_shift());
21417   match(Set dst ( LShiftVS src shift));
21418   match(Set dst ( RShiftVS src shift));
21419   match(Set dst (URShiftVS src shift));
21420   effect(TEMP dst, USE src, USE shift);
21421   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21422   ins_encode %{
21423     int opcode = this->ideal_Opcode();
21424     if (UseAVX > 0) {
21425       int vlen_enc = vector_length_encoding(this);
21426       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21427     } else {
21428       int vlen = Matcher::vector_length(this);
21429       if (vlen == 2) {
21430         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21431         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432       } else if (vlen == 4) {
21433         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21434         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435       } else {
21436         assert (vlen == 8, "sanity");
21437         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21438         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21439       }
21440     }
21441   %}
21442   ins_pipe( pipe_slow );
21443 %}
21444 
21445 // Integers vector left shift
21446 instruct vshiftI(vec dst, vec src, vec shift) %{
21447   predicate(!n->as_ShiftV()->is_var_shift());
21448   match(Set dst ( LShiftVI src shift));
21449   match(Set dst ( RShiftVI src shift));
21450   match(Set dst (URShiftVI src shift));
21451   effect(TEMP dst, USE src, USE shift);
21452   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21453   ins_encode %{
21454     int opcode = this->ideal_Opcode();
21455     if (UseAVX > 0) {
21456       int vlen_enc = vector_length_encoding(this);
21457       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21458     } else {
21459       int vlen = Matcher::vector_length(this);
21460       if (vlen == 2) {
21461         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21462         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21463       } else {
21464         assert(vlen == 4, "sanity");
21465         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21466         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21467       }
21468     }
21469   %}
21470   ins_pipe( pipe_slow );
21471 %}
21472 
21473 // Integers vector left constant shift
21474 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21475   match(Set dst (LShiftVI src (LShiftCntV shift)));
21476   match(Set dst (RShiftVI src (RShiftCntV shift)));
21477   match(Set dst (URShiftVI src (RShiftCntV shift)));
21478   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21479   ins_encode %{
21480     int opcode = this->ideal_Opcode();
21481     if (UseAVX > 0) {
21482       int vector_len = vector_length_encoding(this);
21483       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21484     } else {
21485       int vlen = Matcher::vector_length(this);
21486       if (vlen == 2) {
21487         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21488         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21489       } else {
21490         assert(vlen == 4, "sanity");
21491         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21492         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21493       }
21494     }
21495   %}
21496   ins_pipe( pipe_slow );
21497 %}
21498 
21499 // Longs vector shift
21500 instruct vshiftL(vec dst, vec src, vec shift) %{
21501   predicate(!n->as_ShiftV()->is_var_shift());
21502   match(Set dst ( LShiftVL src shift));
21503   match(Set dst (URShiftVL src shift));
21504   effect(TEMP dst, USE src, USE shift);
21505   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21506   ins_encode %{
21507     int opcode = this->ideal_Opcode();
21508     if (UseAVX > 0) {
21509       int vlen_enc = vector_length_encoding(this);
21510       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21511     } else {
21512       assert(Matcher::vector_length(this) == 2, "");
21513       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21514       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21515     }
21516   %}
21517   ins_pipe( pipe_slow );
21518 %}
21519 
21520 // Longs vector constant shift
21521 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21522   match(Set dst (LShiftVL src (LShiftCntV shift)));
21523   match(Set dst (URShiftVL src (RShiftCntV shift)));
21524   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21525   ins_encode %{
21526     int opcode = this->ideal_Opcode();
21527     if (UseAVX > 0) {
21528       int vector_len = vector_length_encoding(this);
21529       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21530     } else {
21531       assert(Matcher::vector_length(this) == 2, "");
21532       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21533       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21534     }
21535   %}
21536   ins_pipe( pipe_slow );
21537 %}
21538 
21539 // -------------------ArithmeticRightShift -----------------------------------
21540 // Long vector arithmetic right shift
21541 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21542   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21543   match(Set dst (RShiftVL src shift));
21544   effect(TEMP dst, TEMP tmp);
21545   format %{ "vshiftq $dst,$src,$shift" %}
21546   ins_encode %{
21547     uint vlen = Matcher::vector_length(this);
21548     if (vlen == 2) {
21549       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21550       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21551       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21552       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21553       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21554       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21555     } else {
21556       assert(vlen == 4, "sanity");
21557       assert(UseAVX > 1, "required");
21558       int vlen_enc = Assembler::AVX_256bit;
21559       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21560       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21561       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21563       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21564     }
21565   %}
21566   ins_pipe( pipe_slow );
21567 %}
21568 
21569 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21570   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21571   match(Set dst (RShiftVL src shift));
21572   format %{ "vshiftq $dst,$src,$shift" %}
21573   ins_encode %{
21574     int vlen_enc = vector_length_encoding(this);
21575     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21576   %}
21577   ins_pipe( pipe_slow );
21578 %}
21579 
21580 // ------------------- Variable Shift -----------------------------
21581 // Byte variable shift
21582 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21583   predicate(Matcher::vector_length(n) <= 8 &&
21584             n->as_ShiftV()->is_var_shift() &&
21585             !VM_Version::supports_avx512bw());
21586   match(Set dst ( LShiftVB src shift));
21587   match(Set dst ( RShiftVB src shift));
21588   match(Set dst (URShiftVB src shift));
21589   effect(TEMP dst, TEMP vtmp);
21590   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21591   ins_encode %{
21592     assert(UseAVX >= 2, "required");
21593 
21594     int opcode = this->ideal_Opcode();
21595     int vlen_enc = Assembler::AVX_128bit;
21596     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21597     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21598   %}
21599   ins_pipe( pipe_slow );
21600 %}
21601 
21602 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21603   predicate(Matcher::vector_length(n) == 16 &&
21604             n->as_ShiftV()->is_var_shift() &&
21605             !VM_Version::supports_avx512bw());
21606   match(Set dst ( LShiftVB src shift));
21607   match(Set dst ( RShiftVB src shift));
21608   match(Set dst (URShiftVB src shift));
21609   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21610   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21611   ins_encode %{
21612     assert(UseAVX >= 2, "required");
21613 
21614     int opcode = this->ideal_Opcode();
21615     int vlen_enc = Assembler::AVX_128bit;
21616     // Shift lower half and get word result in dst
21617     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21618 
21619     // Shift upper half and get word result in vtmp1
21620     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21621     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21622     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21623 
21624     // Merge and down convert the two word results to byte in dst
21625     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21626   %}
21627   ins_pipe( pipe_slow );
21628 %}
21629 
21630 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21631   predicate(Matcher::vector_length(n) == 32 &&
21632             n->as_ShiftV()->is_var_shift() &&
21633             !VM_Version::supports_avx512bw());
21634   match(Set dst ( LShiftVB src shift));
21635   match(Set dst ( RShiftVB src shift));
21636   match(Set dst (URShiftVB src shift));
21637   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21638   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21639   ins_encode %{
21640     assert(UseAVX >= 2, "required");
21641 
21642     int opcode = this->ideal_Opcode();
21643     int vlen_enc = Assembler::AVX_128bit;
21644     // Process lower 128 bits and get result in dst
21645     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21646     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21647     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21648     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21649     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21650 
21651     // Process higher 128 bits and get result in vtmp3
21652     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21653     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21654     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21655     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21656     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21657     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21658     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21659 
21660     // Merge the two results in dst
21661     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21662   %}
21663   ins_pipe( pipe_slow );
21664 %}
21665 
21666 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21667   predicate(Matcher::vector_length(n) <= 32 &&
21668             n->as_ShiftV()->is_var_shift() &&
21669             VM_Version::supports_avx512bw());
21670   match(Set dst ( LShiftVB src shift));
21671   match(Set dst ( RShiftVB src shift));
21672   match(Set dst (URShiftVB src shift));
21673   effect(TEMP dst, TEMP vtmp);
21674   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21675   ins_encode %{
21676     assert(UseAVX > 2, "required");
21677 
21678     int opcode = this->ideal_Opcode();
21679     int vlen_enc = vector_length_encoding(this);
21680     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21681   %}
21682   ins_pipe( pipe_slow );
21683 %}
21684 
21685 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21686   predicate(Matcher::vector_length(n) == 64 &&
21687             n->as_ShiftV()->is_var_shift() &&
21688             VM_Version::supports_avx512bw());
21689   match(Set dst ( LShiftVB src shift));
21690   match(Set dst ( RShiftVB src shift));
21691   match(Set dst (URShiftVB src shift));
21692   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21693   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21694   ins_encode %{
21695     assert(UseAVX > 2, "required");
21696 
21697     int opcode = this->ideal_Opcode();
21698     int vlen_enc = Assembler::AVX_256bit;
21699     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21700     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21701     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21702     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21703     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21704   %}
21705   ins_pipe( pipe_slow );
21706 %}
21707 
21708 // Short variable shift
21709 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21710   predicate(Matcher::vector_length(n) <= 8 &&
21711             n->as_ShiftV()->is_var_shift() &&
21712             !VM_Version::supports_avx512bw());
21713   match(Set dst ( LShiftVS src shift));
21714   match(Set dst ( RShiftVS src shift));
21715   match(Set dst (URShiftVS src shift));
21716   effect(TEMP dst, TEMP vtmp);
21717   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21718   ins_encode %{
21719     assert(UseAVX >= 2, "required");
21720 
21721     int opcode = this->ideal_Opcode();
21722     bool sign = (opcode != Op_URShiftVS);
21723     int vlen_enc = Assembler::AVX_256bit;
21724     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21725     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21726     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21727     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21728     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21729     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21730   %}
21731   ins_pipe( pipe_slow );
21732 %}
21733 
21734 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21735   predicate(Matcher::vector_length(n) == 16 &&
21736             n->as_ShiftV()->is_var_shift() &&
21737             !VM_Version::supports_avx512bw());
21738   match(Set dst ( LShiftVS src shift));
21739   match(Set dst ( RShiftVS src shift));
21740   match(Set dst (URShiftVS src shift));
21741   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21742   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21743   ins_encode %{
21744     assert(UseAVX >= 2, "required");
21745 
21746     int opcode = this->ideal_Opcode();
21747     bool sign = (opcode != Op_URShiftVS);
21748     int vlen_enc = Assembler::AVX_256bit;
21749     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21750     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21751     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21752     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21754 
21755     // Shift upper half, with result in dst using vtmp1 as TEMP
21756     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21757     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21758     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21759     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21760     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21761     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21762 
21763     // Merge lower and upper half result into dst
21764     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21765     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21766   %}
21767   ins_pipe( pipe_slow );
21768 %}
21769 
21770 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21771   predicate(n->as_ShiftV()->is_var_shift() &&
21772             VM_Version::supports_avx512bw());
21773   match(Set dst ( LShiftVS src shift));
21774   match(Set dst ( RShiftVS src shift));
21775   match(Set dst (URShiftVS src shift));
21776   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21777   ins_encode %{
21778     assert(UseAVX > 2, "required");
21779 
21780     int opcode = this->ideal_Opcode();
21781     int vlen_enc = vector_length_encoding(this);
21782     if (!VM_Version::supports_avx512vl()) {
21783       vlen_enc = Assembler::AVX_512bit;
21784     }
21785     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21786   %}
21787   ins_pipe( pipe_slow );
21788 %}
21789 
21790 //Integer variable shift
21791 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21792   predicate(n->as_ShiftV()->is_var_shift());
21793   match(Set dst ( LShiftVI src shift));
21794   match(Set dst ( RShiftVI src shift));
21795   match(Set dst (URShiftVI src shift));
21796   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21797   ins_encode %{
21798     assert(UseAVX >= 2, "required");
21799 
21800     int opcode = this->ideal_Opcode();
21801     int vlen_enc = vector_length_encoding(this);
21802     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21803   %}
21804   ins_pipe( pipe_slow );
21805 %}
21806 
21807 //Long variable shift
21808 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21809   predicate(n->as_ShiftV()->is_var_shift());
21810   match(Set dst ( LShiftVL src shift));
21811   match(Set dst (URShiftVL src shift));
21812   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21813   ins_encode %{
21814     assert(UseAVX >= 2, "required");
21815 
21816     int opcode = this->ideal_Opcode();
21817     int vlen_enc = vector_length_encoding(this);
21818     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21819   %}
21820   ins_pipe( pipe_slow );
21821 %}
21822 
21823 //Long variable right shift arithmetic
21824 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21825   predicate(Matcher::vector_length(n) <= 4 &&
21826             n->as_ShiftV()->is_var_shift() &&
21827             UseAVX == 2);
21828   match(Set dst (RShiftVL src shift));
21829   effect(TEMP dst, TEMP vtmp);
21830   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21831   ins_encode %{
21832     int opcode = this->ideal_Opcode();
21833     int vlen_enc = vector_length_encoding(this);
21834     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21835                  $vtmp$$XMMRegister);
21836   %}
21837   ins_pipe( pipe_slow );
21838 %}
21839 
21840 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21841   predicate(n->as_ShiftV()->is_var_shift() &&
21842             UseAVX > 2);
21843   match(Set dst (RShiftVL src shift));
21844   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21845   ins_encode %{
21846     int opcode = this->ideal_Opcode();
21847     int vlen_enc = vector_length_encoding(this);
21848     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21849   %}
21850   ins_pipe( pipe_slow );
21851 %}
21852 
21853 // --------------------------------- AND --------------------------------------
21854 
21855 instruct vand(vec dst, vec src) %{
21856   predicate(UseAVX == 0);
21857   match(Set dst (AndV dst src));
21858   format %{ "pand    $dst,$src\t! and vectors" %}
21859   ins_encode %{
21860     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21861   %}
21862   ins_pipe( pipe_slow );
21863 %}
21864 
21865 instruct vand_reg(vec dst, vec src1, vec src2) %{
21866   predicate(UseAVX > 0);
21867   match(Set dst (AndV src1 src2));
21868   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21869   ins_encode %{
21870     int vlen_enc = vector_length_encoding(this);
21871     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21872   %}
21873   ins_pipe( pipe_slow );
21874 %}
21875 
21876 instruct vand_mem(vec dst, vec src, memory mem) %{
21877   predicate((UseAVX > 0) &&
21878             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21879   match(Set dst (AndV src (LoadVector mem)));
21880   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21881   ins_encode %{
21882     int vlen_enc = vector_length_encoding(this);
21883     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21884   %}
21885   ins_pipe( pipe_slow );
21886 %}
21887 
21888 // --------------------------------- OR ---------------------------------------
21889 
21890 instruct vor(vec dst, vec src) %{
21891   predicate(UseAVX == 0);
21892   match(Set dst (OrV dst src));
21893   format %{ "por     $dst,$src\t! or vectors" %}
21894   ins_encode %{
21895     __ por($dst$$XMMRegister, $src$$XMMRegister);
21896   %}
21897   ins_pipe( pipe_slow );
21898 %}
21899 
21900 instruct vor_reg(vec dst, vec src1, vec src2) %{
21901   predicate(UseAVX > 0);
21902   match(Set dst (OrV src1 src2));
21903   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21904   ins_encode %{
21905     int vlen_enc = vector_length_encoding(this);
21906     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21907   %}
21908   ins_pipe( pipe_slow );
21909 %}
21910 
21911 instruct vor_mem(vec dst, vec src, memory mem) %{
21912   predicate((UseAVX > 0) &&
21913             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21914   match(Set dst (OrV src (LoadVector mem)));
21915   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21916   ins_encode %{
21917     int vlen_enc = vector_length_encoding(this);
21918     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21919   %}
21920   ins_pipe( pipe_slow );
21921 %}
21922 
21923 // --------------------------------- XOR --------------------------------------
21924 
21925 instruct vxor(vec dst, vec src) %{
21926   predicate(UseAVX == 0);
21927   match(Set dst (XorV dst src));
21928   format %{ "pxor    $dst,$src\t! xor vectors" %}
21929   ins_encode %{
21930     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21931   %}
21932   ins_pipe( pipe_slow );
21933 %}
21934 
21935 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21936   predicate(UseAVX > 0);
21937   match(Set dst (XorV src1 src2));
21938   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21939   ins_encode %{
21940     int vlen_enc = vector_length_encoding(this);
21941     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21942   %}
21943   ins_pipe( pipe_slow );
21944 %}
21945 
21946 instruct vxor_mem(vec dst, vec src, memory mem) %{
21947   predicate((UseAVX > 0) &&
21948             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21949   match(Set dst (XorV src (LoadVector mem)));
21950   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21951   ins_encode %{
21952     int vlen_enc = vector_length_encoding(this);
21953     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21954   %}
21955   ins_pipe( pipe_slow );
21956 %}
21957 
21958 // --------------------------------- VectorCast --------------------------------------
21959 
21960 instruct vcastBtoX(vec dst, vec src) %{
21961   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21962   match(Set dst (VectorCastB2X src));
21963   format %{ "vector_cast_b2x $dst,$src\t!" %}
21964   ins_encode %{
21965     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21966     int vlen_enc = vector_length_encoding(this);
21967     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21968   %}
21969   ins_pipe( pipe_slow );
21970 %}
21971 
21972 instruct vcastBtoD(legVec dst, legVec src) %{
21973   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21974   match(Set dst (VectorCastB2X src));
21975   format %{ "vector_cast_b2x $dst,$src\t!" %}
21976   ins_encode %{
21977     int vlen_enc = vector_length_encoding(this);
21978     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21979   %}
21980   ins_pipe( pipe_slow );
21981 %}
21982 
21983 instruct castStoX(vec dst, vec src) %{
21984   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21985             Matcher::vector_length(n->in(1)) <= 8 && // src
21986             Matcher::vector_element_basic_type(n) == T_BYTE);
21987   match(Set dst (VectorCastS2X src));
21988   format %{ "vector_cast_s2x $dst,$src" %}
21989   ins_encode %{
21990     assert(UseAVX > 0, "required");
21991 
21992     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21993     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21994   %}
21995   ins_pipe( pipe_slow );
21996 %}
21997 
21998 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21999   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22000             Matcher::vector_length(n->in(1)) == 16 && // src
22001             Matcher::vector_element_basic_type(n) == T_BYTE);
22002   effect(TEMP dst, TEMP vtmp);
22003   match(Set dst (VectorCastS2X src));
22004   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22005   ins_encode %{
22006     assert(UseAVX > 0, "required");
22007 
22008     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22009     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22010     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22011     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22012   %}
22013   ins_pipe( pipe_slow );
22014 %}
22015 
22016 instruct vcastStoX_evex(vec dst, vec src) %{
22017   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22018             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22019   match(Set dst (VectorCastS2X src));
22020   format %{ "vector_cast_s2x $dst,$src\t!" %}
22021   ins_encode %{
22022     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22023     int src_vlen_enc = vector_length_encoding(this, $src);
22024     int vlen_enc = vector_length_encoding(this);
22025     switch (to_elem_bt) {
22026       case T_BYTE:
22027         if (!VM_Version::supports_avx512vl()) {
22028           vlen_enc = Assembler::AVX_512bit;
22029         }
22030         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22031         break;
22032       case T_INT:
22033         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22034         break;
22035       case T_FLOAT:
22036         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22038         break;
22039       case T_LONG:
22040         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22041         break;
22042       case T_DOUBLE: {
22043         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22044         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22045         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22046         break;
22047       }
22048       default:
22049         ShouldNotReachHere();
22050     }
22051   %}
22052   ins_pipe( pipe_slow );
22053 %}
22054 
22055 instruct castItoX(vec dst, vec src) %{
22056   predicate(UseAVX <= 2 &&
22057             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22058             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22059   match(Set dst (VectorCastI2X src));
22060   format %{ "vector_cast_i2x $dst,$src" %}
22061   ins_encode %{
22062     assert(UseAVX > 0, "required");
22063 
22064     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22065     int vlen_enc = vector_length_encoding(this, $src);
22066 
22067     if (to_elem_bt == T_BYTE) {
22068       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22069       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22070       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071     } else {
22072       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22073       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22074       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22075     }
22076   %}
22077   ins_pipe( pipe_slow );
22078 %}
22079 
22080 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22081   predicate(UseAVX <= 2 &&
22082             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22083             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22084   match(Set dst (VectorCastI2X src));
22085   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22086   effect(TEMP dst, TEMP vtmp);
22087   ins_encode %{
22088     assert(UseAVX > 0, "required");
22089 
22090     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22091     int vlen_enc = vector_length_encoding(this, $src);
22092 
22093     if (to_elem_bt == T_BYTE) {
22094       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22095       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22096       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22097       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22098     } else {
22099       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22100       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22101       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22102       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22103     }
22104   %}
22105   ins_pipe( pipe_slow );
22106 %}
22107 
22108 instruct vcastItoX_evex(vec dst, vec src) %{
22109   predicate(UseAVX > 2 ||
22110             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22111   match(Set dst (VectorCastI2X src));
22112   format %{ "vector_cast_i2x $dst,$src\t!" %}
22113   ins_encode %{
22114     assert(UseAVX > 0, "required");
22115 
22116     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22117     int src_vlen_enc = vector_length_encoding(this, $src);
22118     int dst_vlen_enc = vector_length_encoding(this);
22119     switch (dst_elem_bt) {
22120       case T_BYTE:
22121         if (!VM_Version::supports_avx512vl()) {
22122           src_vlen_enc = Assembler::AVX_512bit;
22123         }
22124         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22125         break;
22126       case T_SHORT:
22127         if (!VM_Version::supports_avx512vl()) {
22128           src_vlen_enc = Assembler::AVX_512bit;
22129         }
22130         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22131         break;
22132       case T_FLOAT:
22133         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22134         break;
22135       case T_LONG:
22136         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22137         break;
22138       case T_DOUBLE:
22139         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22140         break;
22141       default:
22142         ShouldNotReachHere();
22143     }
22144   %}
22145   ins_pipe( pipe_slow );
22146 %}
22147 
22148 instruct vcastLtoBS(vec dst, vec src) %{
22149   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22150             UseAVX <= 2);
22151   match(Set dst (VectorCastL2X src));
22152   format %{ "vector_cast_l2x  $dst,$src" %}
22153   ins_encode %{
22154     assert(UseAVX > 0, "required");
22155 
22156     int vlen = Matcher::vector_length_in_bytes(this, $src);
22157     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22158     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22159                                                       : ExternalAddress(vector_int_to_short_mask());
22160     if (vlen <= 16) {
22161       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22162       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22163       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22164     } else {
22165       assert(vlen <= 32, "required");
22166       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22167       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22168       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22169       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22170     }
22171     if (to_elem_bt == T_BYTE) {
22172       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22173     }
22174   %}
22175   ins_pipe( pipe_slow );
22176 %}
22177 
22178 instruct vcastLtoX_evex(vec dst, vec src) %{
22179   predicate(UseAVX > 2 ||
22180             (Matcher::vector_element_basic_type(n) == T_INT ||
22181              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22182              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22183   match(Set dst (VectorCastL2X src));
22184   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22185   ins_encode %{
22186     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22187     int vlen = Matcher::vector_length_in_bytes(this, $src);
22188     int vlen_enc = vector_length_encoding(this, $src);
22189     switch (to_elem_bt) {
22190       case T_BYTE:
22191         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22192           vlen_enc = Assembler::AVX_512bit;
22193         }
22194         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22195         break;
22196       case T_SHORT:
22197         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22198           vlen_enc = Assembler::AVX_512bit;
22199         }
22200         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22201         break;
22202       case T_INT:
22203         if (vlen == 8) {
22204           if ($dst$$XMMRegister != $src$$XMMRegister) {
22205             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22206           }
22207         } else if (vlen == 16) {
22208           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22209         } else if (vlen == 32) {
22210           if (UseAVX > 2) {
22211             if (!VM_Version::supports_avx512vl()) {
22212               vlen_enc = Assembler::AVX_512bit;
22213             }
22214             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22215           } else {
22216             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22217             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22218           }
22219         } else { // vlen == 64
22220           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22221         }
22222         break;
22223       case T_FLOAT:
22224         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22225         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22226         break;
22227       case T_DOUBLE:
22228         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22229         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22230         break;
22231 
22232       default: assert(false, "%s", type2name(to_elem_bt));
22233     }
22234   %}
22235   ins_pipe( pipe_slow );
22236 %}
22237 
22238 instruct vcastFtoD_reg(vec dst, vec src) %{
22239   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22240   match(Set dst (VectorCastF2X src));
22241   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22242   ins_encode %{
22243     int vlen_enc = vector_length_encoding(this);
22244     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22245   %}
22246   ins_pipe( pipe_slow );
22247 %}
22248 
22249 
22250 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22251   predicate(!VM_Version::supports_avx10_2() &&
22252             !VM_Version::supports_avx512vl() &&
22253             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22254             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22255             is_integral_type(Matcher::vector_element_basic_type(n)));
22256   match(Set dst (VectorCastF2X src));
22257   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22258   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22259   ins_encode %{
22260     int vlen_enc = vector_length_encoding(this, $src);
22261     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22262     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22263     // 32 bit addresses for register indirect addressing mode since stub constants
22264     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22265     // However, targets are free to increase this limit, but having a large code cache size
22266     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22267     // cap we save a temporary register allocation which in limiting case can prevent
22268     // spilling in high register pressure blocks.
22269     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22270                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22271                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22272   %}
22273   ins_pipe( pipe_slow );
22274 %}
22275 
22276 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22277   predicate(!VM_Version::supports_avx10_2() &&
22278             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22279             is_integral_type(Matcher::vector_element_basic_type(n)));
22280   match(Set dst (VectorCastF2X src));
22281   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22282   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22283   ins_encode %{
22284     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22285     if (to_elem_bt == T_LONG) {
22286       int vlen_enc = vector_length_encoding(this);
22287       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22288                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22289                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22290     } else {
22291       int vlen_enc = vector_length_encoding(this, $src);
22292       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22293                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22294                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22295     }
22296   %}
22297   ins_pipe( pipe_slow );
22298 %}
22299 
22300 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22301   predicate(VM_Version::supports_avx10_2() &&
22302             is_integral_type(Matcher::vector_element_basic_type(n)));
22303   match(Set dst (VectorCastF2X src));
22304   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22305   ins_encode %{
22306     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22307     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22308     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22309   %}
22310   ins_pipe( pipe_slow );
22311 %}
22312 
22313 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22314   predicate(VM_Version::supports_avx10_2() &&
22315             is_integral_type(Matcher::vector_element_basic_type(n)));
22316   match(Set dst (VectorCastF2X (LoadVector src)));
22317   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22318   ins_encode %{
22319     int vlen = Matcher::vector_length(this);
22320     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22321     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22322     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22323   %}
22324   ins_pipe( pipe_slow );
22325 %}
22326 
22327 instruct vcastDtoF_reg(vec dst, vec src) %{
22328   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22329   match(Set dst (VectorCastD2X src));
22330   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22331   ins_encode %{
22332     int vlen_enc = vector_length_encoding(this, $src);
22333     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22334   %}
22335   ins_pipe( pipe_slow );
22336 %}
22337 
22338 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22339   predicate(!VM_Version::supports_avx10_2() &&
22340             !VM_Version::supports_avx512vl() &&
22341             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22342             is_integral_type(Matcher::vector_element_basic_type(n)));
22343   match(Set dst (VectorCastD2X src));
22344   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22345   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22346   ins_encode %{
22347     int vlen_enc = vector_length_encoding(this, $src);
22348     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22349     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22350                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22351                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22352   %}
22353   ins_pipe( pipe_slow );
22354 %}
22355 
22356 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22357   predicate(!VM_Version::supports_avx10_2() &&
22358             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22359             is_integral_type(Matcher::vector_element_basic_type(n)));
22360   match(Set dst (VectorCastD2X src));
22361   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22362   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22363   ins_encode %{
22364     int vlen_enc = vector_length_encoding(this, $src);
22365     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22366     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22367                               ExternalAddress(vector_float_signflip());
22368     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22369                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22370   %}
22371   ins_pipe( pipe_slow );
22372 %}
22373 
22374 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22375   predicate(VM_Version::supports_avx10_2() &&
22376             is_integral_type(Matcher::vector_element_basic_type(n)));
22377   match(Set dst (VectorCastD2X src));
22378   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22379   ins_encode %{
22380     int vlen_enc = vector_length_encoding(this, $src);
22381     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22382     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22383   %}
22384   ins_pipe( pipe_slow );
22385 %}
22386 
22387 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22388   predicate(VM_Version::supports_avx10_2() &&
22389             is_integral_type(Matcher::vector_element_basic_type(n)));
22390   match(Set dst (VectorCastD2X (LoadVector src)));
22391   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22392   ins_encode %{
22393     int vlen = Matcher::vector_length(this);
22394     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22395     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22396     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22397   %}
22398   ins_pipe( pipe_slow );
22399 %}
22400 
22401 instruct vucast(vec dst, vec src) %{
22402   match(Set dst (VectorUCastB2X src));
22403   match(Set dst (VectorUCastS2X src));
22404   match(Set dst (VectorUCastI2X src));
22405   format %{ "vector_ucast $dst,$src\t!" %}
22406   ins_encode %{
22407     assert(UseAVX > 0, "required");
22408 
22409     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22410     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22411     int vlen_enc = vector_length_encoding(this);
22412     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22413   %}
22414   ins_pipe( pipe_slow );
22415 %}
22416 
22417 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22418   predicate(!VM_Version::supports_avx512vl() &&
22419             Matcher::vector_length_in_bytes(n) < 64 &&
22420             Matcher::vector_element_basic_type(n) == T_INT);
22421   match(Set dst (RoundVF src));
22422   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22423   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22424   ins_encode %{
22425     int vlen_enc = vector_length_encoding(this);
22426     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22427     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22428                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22429                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22430   %}
22431   ins_pipe( pipe_slow );
22432 %}
22433 
22434 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22435   predicate((VM_Version::supports_avx512vl() ||
22436              Matcher::vector_length_in_bytes(n) == 64) &&
22437              Matcher::vector_element_basic_type(n) == T_INT);
22438   match(Set dst (RoundVF src));
22439   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22440   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22441   ins_encode %{
22442     int vlen_enc = vector_length_encoding(this);
22443     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22444     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22445                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22446                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22447   %}
22448   ins_pipe( pipe_slow );
22449 %}
22450 
22451 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22452   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22453   match(Set dst (RoundVD src));
22454   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22455   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22456   ins_encode %{
22457     int vlen_enc = vector_length_encoding(this);
22458     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22459     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22460                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22461                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22462   %}
22463   ins_pipe( pipe_slow );
22464 %}
22465 
22466 // --------------------------------- VectorMaskCmp --------------------------------------
22467 
22468 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22469   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22470             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22471             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22472             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22473   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22474   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22475   ins_encode %{
22476     int vlen_enc = vector_length_encoding(this, $src1);
22477     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22478     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22479       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22480     } else {
22481       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22482     }
22483   %}
22484   ins_pipe( pipe_slow );
22485 %}
22486 
22487 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22488   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22489             n->bottom_type()->isa_vectmask() == nullptr &&
22490             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22491   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22492   effect(TEMP ktmp);
22493   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22494   ins_encode %{
22495     int vlen_enc = Assembler::AVX_512bit;
22496     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22497     KRegister mask = k0; // The comparison itself is not being masked.
22498     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22499       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22500       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22501     } else {
22502       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22503       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22504     }
22505   %}
22506   ins_pipe( pipe_slow );
22507 %}
22508 
22509 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22510   predicate(n->bottom_type()->isa_vectmask() &&
22511             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22512   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22513   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22514   ins_encode %{
22515     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22516     int vlen_enc = vector_length_encoding(this, $src1);
22517     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22518     KRegister mask = k0; // The comparison itself is not being masked.
22519     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22520       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22521     } else {
22522       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22523     }
22524   %}
22525   ins_pipe( pipe_slow );
22526 %}
22527 
22528 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22529   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22530             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22531             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22532             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22533             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22534             (n->in(2)->get_int() == BoolTest::eq ||
22535              n->in(2)->get_int() == BoolTest::lt ||
22536              n->in(2)->get_int() == BoolTest::gt)); // cond
22537   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22538   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22539   ins_encode %{
22540     int vlen_enc = vector_length_encoding(this, $src1);
22541     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22542     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22543     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22544   %}
22545   ins_pipe( pipe_slow );
22546 %}
22547 
22548 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22549   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22550             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22551             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22552             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22553             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22554             (n->in(2)->get_int() == BoolTest::ne ||
22555              n->in(2)->get_int() == BoolTest::le ||
22556              n->in(2)->get_int() == BoolTest::ge)); // cond
22557   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22558   effect(TEMP dst, TEMP xtmp);
22559   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22560   ins_encode %{
22561     int vlen_enc = vector_length_encoding(this, $src1);
22562     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22563     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22564     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22565   %}
22566   ins_pipe( pipe_slow );
22567 %}
22568 
22569 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22570   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22571             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22572             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22573             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22574             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22575   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22576   effect(TEMP dst, TEMP xtmp);
22577   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22578   ins_encode %{
22579     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22580     int vlen_enc = vector_length_encoding(this, $src1);
22581     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22582     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22583 
22584     if (vlen_enc == Assembler::AVX_128bit) {
22585       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22586     } else {
22587       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22588     }
22589     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22590     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22591     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22592   %}
22593   ins_pipe( pipe_slow );
22594 %}
22595 
22596 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22597   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22598              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22599              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22600   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22601   effect(TEMP ktmp);
22602   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22603   ins_encode %{
22604     assert(UseAVX > 2, "required");
22605 
22606     int vlen_enc = vector_length_encoding(this, $src1);
22607     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22608     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22609     KRegister mask = k0; // The comparison itself is not being masked.
22610     bool merge = false;
22611     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22612 
22613     switch (src1_elem_bt) {
22614       case T_INT: {
22615         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22616         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22617         break;
22618       }
22619       case T_LONG: {
22620         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22621         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22622         break;
22623       }
22624       default: assert(false, "%s", type2name(src1_elem_bt));
22625     }
22626   %}
22627   ins_pipe( pipe_slow );
22628 %}
22629 
22630 
22631 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22632   predicate(n->bottom_type()->isa_vectmask() &&
22633             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22634   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22635   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22636   ins_encode %{
22637     assert(UseAVX > 2, "required");
22638     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22639 
22640     int vlen_enc = vector_length_encoding(this, $src1);
22641     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22642     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22643     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22644 
22645     // Comparison i
22646     switch (src1_elem_bt) {
22647       case T_BYTE: {
22648         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649         break;
22650       }
22651       case T_SHORT: {
22652         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653         break;
22654       }
22655       case T_INT: {
22656         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22657         break;
22658       }
22659       case T_LONG: {
22660         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22661         break;
22662       }
22663       default: assert(false, "%s", type2name(src1_elem_bt));
22664     }
22665   %}
22666   ins_pipe( pipe_slow );
22667 %}
22668 
22669 // Extract
22670 
22671 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22672   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22673   match(Set dst (ExtractI src idx));
22674   match(Set dst (ExtractS src idx));
22675   match(Set dst (ExtractB src idx));
22676   format %{ "extractI $dst,$src,$idx\t!" %}
22677   ins_encode %{
22678     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22679 
22680     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22681     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22682   %}
22683   ins_pipe( pipe_slow );
22684 %}
22685 
22686 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22687   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22688             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22689   match(Set dst (ExtractI src idx));
22690   match(Set dst (ExtractS src idx));
22691   match(Set dst (ExtractB src idx));
22692   effect(TEMP vtmp);
22693   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22694   ins_encode %{
22695     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22696 
22697     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22698     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22699     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22700   %}
22701   ins_pipe( pipe_slow );
22702 %}
22703 
22704 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22705   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22706   match(Set dst (ExtractL src idx));
22707   format %{ "extractL $dst,$src,$idx\t!" %}
22708   ins_encode %{
22709     assert(UseSSE >= 4, "required");
22710     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22711 
22712     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22713   %}
22714   ins_pipe( pipe_slow );
22715 %}
22716 
22717 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22718   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22719             Matcher::vector_length(n->in(1)) == 8);  // src
22720   match(Set dst (ExtractL src idx));
22721   effect(TEMP vtmp);
22722   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22723   ins_encode %{
22724     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22725 
22726     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22727     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22728   %}
22729   ins_pipe( pipe_slow );
22730 %}
22731 
22732 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22733   predicate(Matcher::vector_length(n->in(1)) <= 4);
22734   match(Set dst (ExtractF src idx));
22735   effect(TEMP dst, TEMP vtmp);
22736   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22737   ins_encode %{
22738     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22739 
22740     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22741   %}
22742   ins_pipe( pipe_slow );
22743 %}
22744 
22745 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22746   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22747             Matcher::vector_length(n->in(1)/*src*/) == 16);
22748   match(Set dst (ExtractF src idx));
22749   effect(TEMP vtmp);
22750   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22751   ins_encode %{
22752     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22753 
22754     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22755     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22756   %}
22757   ins_pipe( pipe_slow );
22758 %}
22759 
22760 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22761   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22762   match(Set dst (ExtractD src idx));
22763   format %{ "extractD $dst,$src,$idx\t!" %}
22764   ins_encode %{
22765     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22766 
22767     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22768   %}
22769   ins_pipe( pipe_slow );
22770 %}
22771 
22772 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22773   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22774             Matcher::vector_length(n->in(1)) == 8);  // src
22775   match(Set dst (ExtractD src idx));
22776   effect(TEMP vtmp);
22777   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22778   ins_encode %{
22779     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22780 
22781     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22782     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22783   %}
22784   ins_pipe( pipe_slow );
22785 %}
22786 
22787 // --------------------------------- Vector Blend --------------------------------------
22788 
22789 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22790   predicate(UseAVX == 0);
22791   match(Set dst (VectorBlend (Binary dst src) mask));
22792   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22793   effect(TEMP tmp);
22794   ins_encode %{
22795     assert(UseSSE >= 4, "required");
22796 
22797     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22798       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22799     }
22800     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22801   %}
22802   ins_pipe( pipe_slow );
22803 %}
22804 
22805 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22806   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22807             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22808             Matcher::vector_length_in_bytes(n) <= 32 &&
22809             is_integral_type(Matcher::vector_element_basic_type(n)));
22810   match(Set dst (VectorBlend (Binary src1 src2) mask));
22811   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22812   ins_encode %{
22813     int vlen_enc = vector_length_encoding(this);
22814     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22815   %}
22816   ins_pipe( pipe_slow );
22817 %}
22818 
22819 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22820   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22821             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22822             Matcher::vector_length_in_bytes(n) <= 32 &&
22823             !is_integral_type(Matcher::vector_element_basic_type(n)));
22824   match(Set dst (VectorBlend (Binary src1 src2) mask));
22825   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22826   ins_encode %{
22827     int vlen_enc = vector_length_encoding(this);
22828     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22829   %}
22830   ins_pipe( pipe_slow );
22831 %}
22832 
22833 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22834   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22835             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22836             Matcher::vector_length_in_bytes(n) <= 32);
22837   match(Set dst (VectorBlend (Binary src1 src2) mask));
22838   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22839   effect(TEMP vtmp, TEMP dst);
22840   ins_encode %{
22841     int vlen_enc = vector_length_encoding(this);
22842     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22843     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22844     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22845   %}
22846   ins_pipe( pipe_slow );
22847 %}
22848 
22849 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22850   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22851             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22852   match(Set dst (VectorBlend (Binary src1 src2) mask));
22853   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22854   effect(TEMP ktmp);
22855   ins_encode %{
22856      int vlen_enc = Assembler::AVX_512bit;
22857      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22858     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22859     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22860   %}
22861   ins_pipe( pipe_slow );
22862 %}
22863 
22864 
22865 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22866   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22867             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22868              VM_Version::supports_avx512bw()));
22869   match(Set dst (VectorBlend (Binary src1 src2) mask));
22870   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22871   ins_encode %{
22872     int vlen_enc = vector_length_encoding(this);
22873     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22874     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22875   %}
22876   ins_pipe( pipe_slow );
22877 %}
22878 
22879 // --------------------------------- ABS --------------------------------------
22880 // a = |a|
22881 instruct vabsB_reg(vec dst, vec src) %{
22882   match(Set dst (AbsVB  src));
22883   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22884   ins_encode %{
22885     uint vlen = Matcher::vector_length(this);
22886     if (vlen <= 16) {
22887       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22888     } else {
22889       int vlen_enc = vector_length_encoding(this);
22890       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22891     }
22892   %}
22893   ins_pipe( pipe_slow );
22894 %}
22895 
22896 instruct vabsS_reg(vec dst, vec src) %{
22897   match(Set dst (AbsVS  src));
22898   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22899   ins_encode %{
22900     uint vlen = Matcher::vector_length(this);
22901     if (vlen <= 8) {
22902       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22903     } else {
22904       int vlen_enc = vector_length_encoding(this);
22905       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22906     }
22907   %}
22908   ins_pipe( pipe_slow );
22909 %}
22910 
22911 instruct vabsI_reg(vec dst, vec src) %{
22912   match(Set dst (AbsVI  src));
22913   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22914   ins_encode %{
22915     uint vlen = Matcher::vector_length(this);
22916     if (vlen <= 4) {
22917       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22918     } else {
22919       int vlen_enc = vector_length_encoding(this);
22920       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22921     }
22922   %}
22923   ins_pipe( pipe_slow );
22924 %}
22925 
22926 instruct vabsL_reg(vec dst, vec src) %{
22927   match(Set dst (AbsVL  src));
22928   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22929   ins_encode %{
22930     assert(UseAVX > 2, "required");
22931     int vlen_enc = vector_length_encoding(this);
22932     if (!VM_Version::supports_avx512vl()) {
22933       vlen_enc = Assembler::AVX_512bit;
22934     }
22935     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22936   %}
22937   ins_pipe( pipe_slow );
22938 %}
22939 
22940 // --------------------------------- ABSNEG --------------------------------------
22941 
22942 instruct vabsnegF(vec dst, vec src) %{
22943   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22944   match(Set dst (AbsVF src));
22945   match(Set dst (NegVF src));
22946   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22947   ins_cost(150);
22948   ins_encode %{
22949     int opcode = this->ideal_Opcode();
22950     int vlen = Matcher::vector_length(this);
22951     if (vlen == 2) {
22952       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22953     } else {
22954       assert(vlen == 8 || vlen == 16, "required");
22955       int vlen_enc = vector_length_encoding(this);
22956       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22957     }
22958   %}
22959   ins_pipe( pipe_slow );
22960 %}
22961 
22962 instruct vabsneg4F(vec dst) %{
22963   predicate(Matcher::vector_length(n) == 4);
22964   match(Set dst (AbsVF dst));
22965   match(Set dst (NegVF dst));
22966   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22967   ins_cost(150);
22968   ins_encode %{
22969     int opcode = this->ideal_Opcode();
22970     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22971   %}
22972   ins_pipe( pipe_slow );
22973 %}
22974 
22975 instruct vabsnegD(vec dst, vec src) %{
22976   match(Set dst (AbsVD  src));
22977   match(Set dst (NegVD  src));
22978   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22979   ins_encode %{
22980     int opcode = this->ideal_Opcode();
22981     uint vlen = Matcher::vector_length(this);
22982     if (vlen == 2) {
22983       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22984     } else {
22985       int vlen_enc = vector_length_encoding(this);
22986       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22987     }
22988   %}
22989   ins_pipe( pipe_slow );
22990 %}
22991 
22992 //------------------------------------- VectorTest --------------------------------------------
22993 
22994 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22995   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22996   match(Set cr (VectorTest src1 src2));
22997   effect(TEMP vtmp);
22998   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22999   ins_encode %{
23000     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23001     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23002     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23003   %}
23004   ins_pipe( pipe_slow );
23005 %}
23006 
23007 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23008   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23009   match(Set cr (VectorTest src1 src2));
23010   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23011   ins_encode %{
23012     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23013     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23014     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23015   %}
23016   ins_pipe( pipe_slow );
23017 %}
23018 
23019 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23020   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23021              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23022             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23023   match(Set cr (VectorTest src1 src2));
23024   effect(TEMP tmp);
23025   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23026   ins_encode %{
23027     uint masklen = Matcher::vector_length(this, $src1);
23028     __ kmovwl($tmp$$Register, $src1$$KRegister);
23029     __ andl($tmp$$Register, (1 << masklen) - 1);
23030     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23031   %}
23032   ins_pipe( pipe_slow );
23033 %}
23034 
23035 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23036   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23037              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23038             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23039   match(Set cr (VectorTest src1 src2));
23040   effect(TEMP tmp);
23041   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23042   ins_encode %{
23043     uint masklen = Matcher::vector_length(this, $src1);
23044     __ kmovwl($tmp$$Register, $src1$$KRegister);
23045     __ andl($tmp$$Register, (1 << masklen) - 1);
23046   %}
23047   ins_pipe( pipe_slow );
23048 %}
23049 
23050 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23051   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23052             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23053   match(Set cr (VectorTest src1 src2));
23054   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23055   ins_encode %{
23056     uint masklen = Matcher::vector_length(this, $src1);
23057     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23058   %}
23059   ins_pipe( pipe_slow );
23060 %}
23061 
23062 //------------------------------------- LoadMask --------------------------------------------
23063 
23064 instruct loadMask(legVec dst, legVec src) %{
23065   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23066   match(Set dst (VectorLoadMask src));
23067   effect(TEMP dst);
23068   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23069   ins_encode %{
23070     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23071     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23072     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23073   %}
23074   ins_pipe( pipe_slow );
23075 %}
23076 
23077 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23078   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23079   match(Set dst (VectorLoadMask src));
23080   effect(TEMP xtmp);
23081   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23082   ins_encode %{
23083     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23084                         true, Assembler::AVX_512bit);
23085   %}
23086   ins_pipe( pipe_slow );
23087 %}
23088 
23089 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23090   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23091   match(Set dst (VectorLoadMask src));
23092   effect(TEMP xtmp);
23093   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23094   ins_encode %{
23095     int vlen_enc = vector_length_encoding(in(1));
23096     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23097                         false, vlen_enc);
23098   %}
23099   ins_pipe( pipe_slow );
23100 %}
23101 
23102 //------------------------------------- StoreMask --------------------------------------------
23103 
23104 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23105   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23106   match(Set dst (VectorStoreMask src size));
23107   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23108   ins_encode %{
23109     int vlen = Matcher::vector_length(this);
23110     if (vlen <= 16 && UseAVX <= 2) {
23111       assert(UseSSE >= 3, "required");
23112       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23113     } else {
23114       assert(UseAVX > 0, "required");
23115       int src_vlen_enc = vector_length_encoding(this, $src);
23116       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23117     }
23118   %}
23119   ins_pipe( pipe_slow );
23120 %}
23121 
23122 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23123   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23124   match(Set dst (VectorStoreMask src size));
23125   effect(TEMP_DEF dst, TEMP xtmp);
23126   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23127   ins_encode %{
23128     int vlen_enc = Assembler::AVX_128bit;
23129     int vlen = Matcher::vector_length(this);
23130     if (vlen <= 8) {
23131       assert(UseSSE >= 3, "required");
23132       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23133       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23134       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23135     } else {
23136       assert(UseAVX > 0, "required");
23137       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23138       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23139       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23140     }
23141   %}
23142   ins_pipe( pipe_slow );
23143 %}
23144 
23145 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23146   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23147   match(Set dst (VectorStoreMask src size));
23148   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23149   effect(TEMP_DEF dst, TEMP xtmp);
23150   ins_encode %{
23151     int vlen_enc = Assembler::AVX_128bit;
23152     int vlen = Matcher::vector_length(this);
23153     if (vlen <= 4) {
23154       assert(UseSSE >= 3, "required");
23155       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23156       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23157       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23158       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23159     } else {
23160       assert(UseAVX > 0, "required");
23161       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23162       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23163       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23164       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23165       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23166     }
23167   %}
23168   ins_pipe( pipe_slow );
23169 %}
23170 
23171 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23172   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23173   match(Set dst (VectorStoreMask src size));
23174   effect(TEMP_DEF dst, TEMP xtmp);
23175   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23176   ins_encode %{
23177     assert(UseSSE >= 3, "required");
23178     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23179     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23180     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23181     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23182     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23183   %}
23184   ins_pipe( pipe_slow );
23185 %}
23186 
23187 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23188   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23189   match(Set dst (VectorStoreMask src size));
23190   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23191   effect(TEMP_DEF dst, TEMP vtmp);
23192   ins_encode %{
23193     int vlen_enc = Assembler::AVX_128bit;
23194     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23195     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23196     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23197     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23198     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23199     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23200     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23201   %}
23202   ins_pipe( pipe_slow );
23203 %}
23204 
23205 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23206   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23207   match(Set dst (VectorStoreMask src size));
23208   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23209   ins_encode %{
23210     int src_vlen_enc = vector_length_encoding(this, $src);
23211     int dst_vlen_enc = vector_length_encoding(this);
23212     if (!VM_Version::supports_avx512vl()) {
23213       src_vlen_enc = Assembler::AVX_512bit;
23214     }
23215     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23216     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23217   %}
23218   ins_pipe( pipe_slow );
23219 %}
23220 
23221 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23222   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23223   match(Set dst (VectorStoreMask src size));
23224   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23225   ins_encode %{
23226     int src_vlen_enc = vector_length_encoding(this, $src);
23227     int dst_vlen_enc = vector_length_encoding(this);
23228     if (!VM_Version::supports_avx512vl()) {
23229       src_vlen_enc = Assembler::AVX_512bit;
23230     }
23231     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23232     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23233   %}
23234   ins_pipe( pipe_slow );
23235 %}
23236 
23237 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23238   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23239   match(Set dst (VectorStoreMask mask size));
23240   effect(TEMP_DEF dst);
23241   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23242   ins_encode %{
23243     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23244     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23245                  false, Assembler::AVX_512bit, noreg);
23246     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23247   %}
23248   ins_pipe( pipe_slow );
23249 %}
23250 
23251 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23252   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23253   match(Set dst (VectorStoreMask mask size));
23254   effect(TEMP_DEF dst);
23255   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23256   ins_encode %{
23257     int dst_vlen_enc = vector_length_encoding(this);
23258     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23259     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23260   %}
23261   ins_pipe( pipe_slow );
23262 %}
23263 
23264 instruct vmaskcast_evex(kReg dst) %{
23265   match(Set dst (VectorMaskCast dst));
23266   ins_cost(0);
23267   format %{ "vector_mask_cast $dst" %}
23268   ins_encode %{
23269     // empty
23270   %}
23271   ins_pipe(empty);
23272 %}
23273 
23274 instruct vmaskcast(vec dst) %{
23275   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23276   match(Set dst (VectorMaskCast dst));
23277   ins_cost(0);
23278   format %{ "vector_mask_cast $dst" %}
23279   ins_encode %{
23280     // empty
23281   %}
23282   ins_pipe(empty);
23283 %}
23284 
23285 instruct vmaskcast_avx(vec dst, vec src) %{
23286   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23287   match(Set dst (VectorMaskCast src));
23288   format %{ "vector_mask_cast $dst, $src" %}
23289   ins_encode %{
23290     int vlen = Matcher::vector_length(this);
23291     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23292     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23293     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23294   %}
23295   ins_pipe(pipe_slow);
23296 %}
23297 
23298 //-------------------------------- Load Iota Indices ----------------------------------
23299 
23300 instruct loadIotaIndices(vec dst, immI_0 src) %{
23301   match(Set dst (VectorLoadConst src));
23302   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23303   ins_encode %{
23304      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23305      BasicType bt = Matcher::vector_element_basic_type(this);
23306      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23307   %}
23308   ins_pipe( pipe_slow );
23309 %}
23310 
23311 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23312   match(Set dst (PopulateIndex src1 src2));
23313   effect(TEMP dst, TEMP vtmp);
23314   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23315   ins_encode %{
23316      assert($src2$$constant == 1, "required");
23317      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23318      int vlen_enc = vector_length_encoding(this);
23319      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23320      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23321      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23322      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23323   %}
23324   ins_pipe( pipe_slow );
23325 %}
23326 
23327 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23328   match(Set dst (PopulateIndex src1 src2));
23329   effect(TEMP dst, TEMP vtmp);
23330   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23331   ins_encode %{
23332      assert($src2$$constant == 1, "required");
23333      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23334      int vlen_enc = vector_length_encoding(this);
23335      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23336      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23337      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23338      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23339   %}
23340   ins_pipe( pipe_slow );
23341 %}
23342 
23343 //-------------------------------- Rearrange ----------------------------------
23344 
23345 // LoadShuffle/Rearrange for Byte
23346 instruct rearrangeB(vec dst, vec shuffle) %{
23347   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23348             Matcher::vector_length(n) < 32);
23349   match(Set dst (VectorRearrange dst shuffle));
23350   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23351   ins_encode %{
23352     assert(UseSSE >= 4, "required");
23353     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23354   %}
23355   ins_pipe( pipe_slow );
23356 %}
23357 
23358 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23359   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23360             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23361   match(Set dst (VectorRearrange src shuffle));
23362   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23363   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23364   ins_encode %{
23365     assert(UseAVX >= 2, "required");
23366     // Swap src into vtmp1
23367     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23368     // Shuffle swapped src to get entries from other 128 bit lane
23369     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23370     // Shuffle original src to get entries from self 128 bit lane
23371     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23372     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23373     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23374     // Perform the blend
23375     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23376   %}
23377   ins_pipe( pipe_slow );
23378 %}
23379 
23380 
23381 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23382   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23383             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23384   match(Set dst (VectorRearrange src shuffle));
23385   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23386   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23387   ins_encode %{
23388     int vlen_enc = vector_length_encoding(this);
23389     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23390                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23391                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23392   %}
23393   ins_pipe( pipe_slow );
23394 %}
23395 
23396 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23397   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23398             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23399   match(Set dst (VectorRearrange src shuffle));
23400   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23401   ins_encode %{
23402     int vlen_enc = vector_length_encoding(this);
23403     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23404   %}
23405   ins_pipe( pipe_slow );
23406 %}
23407 
23408 // LoadShuffle/Rearrange for Short
23409 
23410 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23411   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23412             !VM_Version::supports_avx512bw());
23413   match(Set dst (VectorLoadShuffle src));
23414   effect(TEMP dst, TEMP vtmp);
23415   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23416   ins_encode %{
23417     // Create a byte shuffle mask from short shuffle mask
23418     // only byte shuffle instruction available on these platforms
23419     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23420     if (UseAVX == 0) {
23421       assert(vlen_in_bytes <= 16, "required");
23422       // Multiply each shuffle by two to get byte index
23423       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23424       __ psllw($vtmp$$XMMRegister, 1);
23425 
23426       // Duplicate to create 2 copies of byte index
23427       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23428       __ psllw($dst$$XMMRegister, 8);
23429       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23430 
23431       // Add one to get alternate byte index
23432       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23433       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23434     } else {
23435       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23436       int vlen_enc = vector_length_encoding(this);
23437       // Multiply each shuffle by two to get byte index
23438       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23439 
23440       // Duplicate to create 2 copies of byte index
23441       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23442       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23443 
23444       // Add one to get alternate byte index
23445       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23446     }
23447   %}
23448   ins_pipe( pipe_slow );
23449 %}
23450 
23451 instruct rearrangeS(vec dst, vec shuffle) %{
23452   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23453             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23454   match(Set dst (VectorRearrange dst shuffle));
23455   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23456   ins_encode %{
23457     assert(UseSSE >= 4, "required");
23458     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23459   %}
23460   ins_pipe( pipe_slow );
23461 %}
23462 
23463 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23464   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23465             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23466   match(Set dst (VectorRearrange src shuffle));
23467   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23468   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23469   ins_encode %{
23470     assert(UseAVX >= 2, "required");
23471     // Swap src into vtmp1
23472     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23473     // Shuffle swapped src to get entries from other 128 bit lane
23474     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23475     // Shuffle original src to get entries from self 128 bit lane
23476     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23477     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23478     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23479     // Perform the blend
23480     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23481   %}
23482   ins_pipe( pipe_slow );
23483 %}
23484 
23485 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23486   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23487             VM_Version::supports_avx512bw());
23488   match(Set dst (VectorRearrange src shuffle));
23489   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23490   ins_encode %{
23491     int vlen_enc = vector_length_encoding(this);
23492     if (!VM_Version::supports_avx512vl()) {
23493       vlen_enc = Assembler::AVX_512bit;
23494     }
23495     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23496   %}
23497   ins_pipe( pipe_slow );
23498 %}
23499 
23500 // LoadShuffle/Rearrange for Integer and Float
23501 
23502 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23503   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23504             Matcher::vector_length(n) == 4 && UseAVX == 0);
23505   match(Set dst (VectorLoadShuffle src));
23506   effect(TEMP dst, TEMP vtmp);
23507   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23508   ins_encode %{
23509     assert(UseSSE >= 4, "required");
23510 
23511     // Create a byte shuffle mask from int shuffle mask
23512     // only byte shuffle instruction available on these platforms
23513 
23514     // Duplicate and multiply each shuffle by 4
23515     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23516     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23517     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23518     __ psllw($vtmp$$XMMRegister, 2);
23519 
23520     // Duplicate again to create 4 copies of byte index
23521     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23522     __ psllw($dst$$XMMRegister, 8);
23523     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23524 
23525     // Add 3,2,1,0 to get alternate byte index
23526     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23527     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23528   %}
23529   ins_pipe( pipe_slow );
23530 %}
23531 
23532 instruct rearrangeI(vec dst, vec shuffle) %{
23533   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23534             UseAVX == 0);
23535   match(Set dst (VectorRearrange dst shuffle));
23536   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23537   ins_encode %{
23538     assert(UseSSE >= 4, "required");
23539     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23540   %}
23541   ins_pipe( pipe_slow );
23542 %}
23543 
23544 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23545   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23546             UseAVX > 0);
23547   match(Set dst (VectorRearrange src shuffle));
23548   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23549   ins_encode %{
23550     int vlen_enc = vector_length_encoding(this);
23551     BasicType bt = Matcher::vector_element_basic_type(this);
23552     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23553   %}
23554   ins_pipe( pipe_slow );
23555 %}
23556 
23557 // LoadShuffle/Rearrange for Long and Double
23558 
23559 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23560   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23561             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23562   match(Set dst (VectorLoadShuffle src));
23563   effect(TEMP dst, TEMP vtmp);
23564   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23565   ins_encode %{
23566     assert(UseAVX >= 2, "required");
23567 
23568     int vlen_enc = vector_length_encoding(this);
23569     // Create a double word shuffle mask from long shuffle mask
23570     // only double word shuffle instruction available on these platforms
23571 
23572     // Multiply each shuffle by two to get double word index
23573     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23574 
23575     // Duplicate each double word shuffle
23576     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23577     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23578 
23579     // Add one to get alternate double word index
23580     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23581   %}
23582   ins_pipe( pipe_slow );
23583 %}
23584 
23585 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23586   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23587             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23588   match(Set dst (VectorRearrange src shuffle));
23589   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23590   ins_encode %{
23591     assert(UseAVX >= 2, "required");
23592 
23593     int vlen_enc = vector_length_encoding(this);
23594     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23595   %}
23596   ins_pipe( pipe_slow );
23597 %}
23598 
23599 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23600   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23601             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23602   match(Set dst (VectorRearrange src shuffle));
23603   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23604   ins_encode %{
23605     assert(UseAVX > 2, "required");
23606 
23607     int vlen_enc = vector_length_encoding(this);
23608     if (vlen_enc == Assembler::AVX_128bit) {
23609       vlen_enc = Assembler::AVX_256bit;
23610     }
23611     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23612   %}
23613   ins_pipe( pipe_slow );
23614 %}
23615 
23616 // --------------------------------- FMA --------------------------------------
23617 // a * b + c
23618 
23619 instruct vfmaF_reg(vec a, vec b, vec c) %{
23620   match(Set c (FmaVF  c (Binary a b)));
23621   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23622   ins_cost(150);
23623   ins_encode %{
23624     assert(UseFMA, "not enabled");
23625     int vlen_enc = vector_length_encoding(this);
23626     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23627   %}
23628   ins_pipe( pipe_slow );
23629 %}
23630 
23631 instruct vfmaF_mem(vec a, memory b, vec c) %{
23632   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23633   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23634   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23635   ins_cost(150);
23636   ins_encode %{
23637     assert(UseFMA, "not enabled");
23638     int vlen_enc = vector_length_encoding(this);
23639     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23640   %}
23641   ins_pipe( pipe_slow );
23642 %}
23643 
23644 instruct vfmaD_reg(vec a, vec b, vec c) %{
23645   match(Set c (FmaVD  c (Binary a b)));
23646   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23647   ins_cost(150);
23648   ins_encode %{
23649     assert(UseFMA, "not enabled");
23650     int vlen_enc = vector_length_encoding(this);
23651     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23652   %}
23653   ins_pipe( pipe_slow );
23654 %}
23655 
23656 instruct vfmaD_mem(vec a, memory b, vec c) %{
23657   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23658   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23659   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23660   ins_cost(150);
23661   ins_encode %{
23662     assert(UseFMA, "not enabled");
23663     int vlen_enc = vector_length_encoding(this);
23664     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23665   %}
23666   ins_pipe( pipe_slow );
23667 %}
23668 
23669 // --------------------------------- Vector Multiply Add --------------------------------------
23670 
23671 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23672   predicate(UseAVX == 0);
23673   match(Set dst (MulAddVS2VI dst src1));
23674   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23675   ins_encode %{
23676     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23677   %}
23678   ins_pipe( pipe_slow );
23679 %}
23680 
23681 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23682   predicate(UseAVX > 0);
23683   match(Set dst (MulAddVS2VI src1 src2));
23684   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23685   ins_encode %{
23686     int vlen_enc = vector_length_encoding(this);
23687     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23688   %}
23689   ins_pipe( pipe_slow );
23690 %}
23691 
23692 // --------------------------------- Vector Multiply Add Add ----------------------------------
23693 
23694 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23695   predicate(VM_Version::supports_avx512_vnni());
23696   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23697   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23698   ins_encode %{
23699     assert(UseAVX > 2, "required");
23700     int vlen_enc = vector_length_encoding(this);
23701     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23702   %}
23703   ins_pipe( pipe_slow );
23704   ins_cost(10);
23705 %}
23706 
23707 // --------------------------------- PopCount --------------------------------------
23708 
23709 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23710   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23711   match(Set dst (PopCountVI src));
23712   match(Set dst (PopCountVL src));
23713   format %{ "vector_popcount_integral $dst, $src" %}
23714   ins_encode %{
23715     int opcode = this->ideal_Opcode();
23716     int vlen_enc = vector_length_encoding(this, $src);
23717     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23718     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23719   %}
23720   ins_pipe( pipe_slow );
23721 %}
23722 
23723 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23724   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23725   match(Set dst (PopCountVI src mask));
23726   match(Set dst (PopCountVL src mask));
23727   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23728   ins_encode %{
23729     int vlen_enc = vector_length_encoding(this, $src);
23730     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23731     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23732     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23733   %}
23734   ins_pipe( pipe_slow );
23735 %}
23736 
23737 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23738   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23739   match(Set dst (PopCountVI src));
23740   match(Set dst (PopCountVL src));
23741   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23742   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23743   ins_encode %{
23744     int opcode = this->ideal_Opcode();
23745     int vlen_enc = vector_length_encoding(this, $src);
23746     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23747     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23748                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23749   %}
23750   ins_pipe( pipe_slow );
23751 %}
23752 
23753 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23754 
23755 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23756   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23757                                               Matcher::vector_length_in_bytes(n->in(1))));
23758   match(Set dst (CountTrailingZerosV src));
23759   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23760   ins_cost(400);
23761   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23762   ins_encode %{
23763     int vlen_enc = vector_length_encoding(this, $src);
23764     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23765     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23766                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23767   %}
23768   ins_pipe( pipe_slow );
23769 %}
23770 
23771 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23772   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23773             VM_Version::supports_avx512cd() &&
23774             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23775   match(Set dst (CountTrailingZerosV src));
23776   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23777   ins_cost(400);
23778   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23779   ins_encode %{
23780     int vlen_enc = vector_length_encoding(this, $src);
23781     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23782     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23783                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23784   %}
23785   ins_pipe( pipe_slow );
23786 %}
23787 
23788 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23789   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23790   match(Set dst (CountTrailingZerosV src));
23791   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23792   ins_cost(400);
23793   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23794   ins_encode %{
23795     int vlen_enc = vector_length_encoding(this, $src);
23796     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23797     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23798                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23799                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23800   %}
23801   ins_pipe( pipe_slow );
23802 %}
23803 
23804 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23805   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23806   match(Set dst (CountTrailingZerosV src));
23807   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23808   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23809   ins_encode %{
23810     int vlen_enc = vector_length_encoding(this, $src);
23811     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23812     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23813                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23814   %}
23815   ins_pipe( pipe_slow );
23816 %}
23817 
23818 
23819 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23820 
23821 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23822   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23823   effect(TEMP dst);
23824   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23825   ins_encode %{
23826     int vector_len = vector_length_encoding(this);
23827     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23828   %}
23829   ins_pipe( pipe_slow );
23830 %}
23831 
23832 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23833   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23834   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23835   effect(TEMP dst);
23836   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23837   ins_encode %{
23838     int vector_len = vector_length_encoding(this);
23839     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23840   %}
23841   ins_pipe( pipe_slow );
23842 %}
23843 
23844 // --------------------------------- Rotation Operations ----------------------------------
23845 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23846   match(Set dst (RotateLeftV src shift));
23847   match(Set dst (RotateRightV src shift));
23848   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23849   ins_encode %{
23850     int opcode      = this->ideal_Opcode();
23851     int vector_len  = vector_length_encoding(this);
23852     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23853     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23854   %}
23855   ins_pipe( pipe_slow );
23856 %}
23857 
23858 instruct vprorate(vec dst, vec src, vec shift) %{
23859   match(Set dst (RotateLeftV src shift));
23860   match(Set dst (RotateRightV src shift));
23861   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23862   ins_encode %{
23863     int opcode      = this->ideal_Opcode();
23864     int vector_len  = vector_length_encoding(this);
23865     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23866     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23867   %}
23868   ins_pipe( pipe_slow );
23869 %}
23870 
23871 // ---------------------------------- Masked Operations ------------------------------------
23872 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23873   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23874   match(Set dst (LoadVectorMasked mem mask));
23875   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23876   ins_encode %{
23877     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23878     int vlen_enc = vector_length_encoding(this);
23879     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23880   %}
23881   ins_pipe( pipe_slow );
23882 %}
23883 
23884 
23885 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23886   predicate(n->in(3)->bottom_type()->isa_vectmask());
23887   match(Set dst (LoadVectorMasked mem mask));
23888   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23889   ins_encode %{
23890     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23891     int vector_len = vector_length_encoding(this);
23892     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23893   %}
23894   ins_pipe( pipe_slow );
23895 %}
23896 
23897 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23898   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23899   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23900   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23901   ins_encode %{
23902     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23903     int vlen_enc = vector_length_encoding(src_node);
23904     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23905     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23906   %}
23907   ins_pipe( pipe_slow );
23908 %}
23909 
23910 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23911   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23912   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23913   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23914   ins_encode %{
23915     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23916     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23917     int vlen_enc = vector_length_encoding(src_node);
23918     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23919   %}
23920   ins_pipe( pipe_slow );
23921 %}
23922 
23923 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23924   match(Set addr (VerifyVectorAlignment addr mask));
23925   effect(KILL cr);
23926   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23927   ins_encode %{
23928     Label Lskip;
23929     // check if masked bits of addr are zero
23930     __ testq($addr$$Register, $mask$$constant);
23931     __ jccb(Assembler::equal, Lskip);
23932     __ stop("verify_vector_alignment found a misaligned vector memory access");
23933     __ bind(Lskip);
23934   %}
23935   ins_pipe(pipe_slow);
23936 %}
23937 
23938 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23939   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23940   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23941   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23942   ins_encode %{
23943     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23944     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23945 
23946     Label DONE;
23947     int vlen_enc = vector_length_encoding(this, $src1);
23948     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23949 
23950     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23951     __ mov64($dst$$Register, -1L);
23952     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23953     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23954     __ jccb(Assembler::carrySet, DONE);
23955     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23956     __ notq($dst$$Register);
23957     __ tzcntq($dst$$Register, $dst$$Register);
23958     __ bind(DONE);
23959   %}
23960   ins_pipe( pipe_slow );
23961 %}
23962 
23963 
23964 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23965   match(Set dst (VectorMaskGen len));
23966   effect(TEMP temp, KILL cr);
23967   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23968   ins_encode %{
23969     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23970   %}
23971   ins_pipe( pipe_slow );
23972 %}
23973 
23974 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23975   match(Set dst (VectorMaskGen len));
23976   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23977   effect(TEMP temp);
23978   ins_encode %{
23979     if ($len$$constant > 0) {
23980       __ mov64($temp$$Register, right_n_bits($len$$constant));
23981       __ kmovql($dst$$KRegister, $temp$$Register);
23982     } else {
23983       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23984     }
23985   %}
23986   ins_pipe( pipe_slow );
23987 %}
23988 
23989 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23990   predicate(n->in(1)->bottom_type()->isa_vectmask());
23991   match(Set dst (VectorMaskToLong mask));
23992   effect(TEMP dst, KILL cr);
23993   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23994   ins_encode %{
23995     int opcode = this->ideal_Opcode();
23996     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23997     int mask_len = Matcher::vector_length(this, $mask);
23998     int mask_size = mask_len * type2aelembytes(mbt);
23999     int vlen_enc = vector_length_encoding(this, $mask);
24000     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24001                              $dst$$Register, mask_len, mask_size, vlen_enc);
24002   %}
24003   ins_pipe( pipe_slow );
24004 %}
24005 
24006 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24007   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24008   match(Set dst (VectorMaskToLong mask));
24009   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24010   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24011   ins_encode %{
24012     int opcode = this->ideal_Opcode();
24013     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24014     int mask_len = Matcher::vector_length(this, $mask);
24015     int vlen_enc = vector_length_encoding(this, $mask);
24016     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24017                              $dst$$Register, mask_len, mbt, vlen_enc);
24018   %}
24019   ins_pipe( pipe_slow );
24020 %}
24021 
24022 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24023   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24024   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24025   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24026   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24027   ins_encode %{
24028     int opcode = this->ideal_Opcode();
24029     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24030     int mask_len = Matcher::vector_length(this, $mask);
24031     int vlen_enc = vector_length_encoding(this, $mask);
24032     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24033                              $dst$$Register, mask_len, mbt, vlen_enc);
24034   %}
24035   ins_pipe( pipe_slow );
24036 %}
24037 
24038 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24039   predicate(n->in(1)->bottom_type()->isa_vectmask());
24040   match(Set dst (VectorMaskTrueCount mask));
24041   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24042   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24043   ins_encode %{
24044     int opcode = this->ideal_Opcode();
24045     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24046     int mask_len = Matcher::vector_length(this, $mask);
24047     int mask_size = mask_len * type2aelembytes(mbt);
24048     int vlen_enc = vector_length_encoding(this, $mask);
24049     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24050                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24051   %}
24052   ins_pipe( pipe_slow );
24053 %}
24054 
24055 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24056   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24057   match(Set dst (VectorMaskTrueCount mask));
24058   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24059   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24060   ins_encode %{
24061     int opcode = this->ideal_Opcode();
24062     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24063     int mask_len = Matcher::vector_length(this, $mask);
24064     int vlen_enc = vector_length_encoding(this, $mask);
24065     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24066                              $tmp$$Register, mask_len, mbt, vlen_enc);
24067   %}
24068   ins_pipe( pipe_slow );
24069 %}
24070 
24071 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24072   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24073   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24074   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24075   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24076   ins_encode %{
24077     int opcode = this->ideal_Opcode();
24078     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24079     int mask_len = Matcher::vector_length(this, $mask);
24080     int vlen_enc = vector_length_encoding(this, $mask);
24081     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24082                              $tmp$$Register, mask_len, mbt, vlen_enc);
24083   %}
24084   ins_pipe( pipe_slow );
24085 %}
24086 
24087 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24088   predicate(n->in(1)->bottom_type()->isa_vectmask());
24089   match(Set dst (VectorMaskFirstTrue mask));
24090   match(Set dst (VectorMaskLastTrue mask));
24091   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24092   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24093   ins_encode %{
24094     int opcode = this->ideal_Opcode();
24095     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24096     int mask_len = Matcher::vector_length(this, $mask);
24097     int mask_size = mask_len * type2aelembytes(mbt);
24098     int vlen_enc = vector_length_encoding(this, $mask);
24099     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24100                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24101   %}
24102   ins_pipe( pipe_slow );
24103 %}
24104 
24105 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24106   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24107   match(Set dst (VectorMaskFirstTrue mask));
24108   match(Set dst (VectorMaskLastTrue mask));
24109   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24110   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24111   ins_encode %{
24112     int opcode = this->ideal_Opcode();
24113     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24114     int mask_len = Matcher::vector_length(this, $mask);
24115     int vlen_enc = vector_length_encoding(this, $mask);
24116     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24117                              $tmp$$Register, mask_len, mbt, vlen_enc);
24118   %}
24119   ins_pipe( pipe_slow );
24120 %}
24121 
24122 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24123   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24124   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24125   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24126   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24127   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24128   ins_encode %{
24129     int opcode = this->ideal_Opcode();
24130     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24131     int mask_len = Matcher::vector_length(this, $mask);
24132     int vlen_enc = vector_length_encoding(this, $mask);
24133     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24134                              $tmp$$Register, mask_len, mbt, vlen_enc);
24135   %}
24136   ins_pipe( pipe_slow );
24137 %}
24138 
24139 // --------------------------------- Compress/Expand Operations ---------------------------
24140 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24141   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24142   match(Set dst (CompressV src mask));
24143   match(Set dst (ExpandV src mask));
24144   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24145   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24146   ins_encode %{
24147     int opcode = this->ideal_Opcode();
24148     int vlen_enc = vector_length_encoding(this);
24149     BasicType bt  = Matcher::vector_element_basic_type(this);
24150     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24151                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24152   %}
24153   ins_pipe( pipe_slow );
24154 %}
24155 
24156 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24157   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24158   match(Set dst (CompressV src mask));
24159   match(Set dst (ExpandV src mask));
24160   format %{ "vector_compress_expand $dst, $src, $mask" %}
24161   ins_encode %{
24162     int opcode = this->ideal_Opcode();
24163     int vector_len = vector_length_encoding(this);
24164     BasicType bt  = Matcher::vector_element_basic_type(this);
24165     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24166   %}
24167   ins_pipe( pipe_slow );
24168 %}
24169 
24170 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24171   match(Set dst (CompressM mask));
24172   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24173   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24174   ins_encode %{
24175     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24176     int mask_len = Matcher::vector_length(this);
24177     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24178   %}
24179   ins_pipe( pipe_slow );
24180 %}
24181 
24182 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24183 
24184 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24185   predicate(!VM_Version::supports_gfni());
24186   match(Set dst (ReverseV src));
24187   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24188   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24189   ins_encode %{
24190     int vec_enc = vector_length_encoding(this);
24191     BasicType bt = Matcher::vector_element_basic_type(this);
24192     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24193                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24194   %}
24195   ins_pipe( pipe_slow );
24196 %}
24197 
24198 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24199   predicate(VM_Version::supports_gfni());
24200   match(Set dst (ReverseV src));
24201   effect(TEMP dst, TEMP xtmp);
24202   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24203   ins_encode %{
24204     int vec_enc = vector_length_encoding(this);
24205     BasicType bt  = Matcher::vector_element_basic_type(this);
24206     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24207     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24208                                $xtmp$$XMMRegister);
24209   %}
24210   ins_pipe( pipe_slow );
24211 %}
24212 
24213 instruct vreverse_byte_reg(vec dst, vec src) %{
24214   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24215   match(Set dst (ReverseBytesV src));
24216   effect(TEMP dst);
24217   format %{ "vector_reverse_byte $dst, $src" %}
24218   ins_encode %{
24219     int vec_enc = vector_length_encoding(this);
24220     BasicType bt = Matcher::vector_element_basic_type(this);
24221     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24222   %}
24223   ins_pipe( pipe_slow );
24224 %}
24225 
24226 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24227   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24228   match(Set dst (ReverseBytesV src));
24229   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24230   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24231   ins_encode %{
24232     int vec_enc = vector_length_encoding(this);
24233     BasicType bt = Matcher::vector_element_basic_type(this);
24234     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24235                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24236   %}
24237   ins_pipe( pipe_slow );
24238 %}
24239 
24240 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24241 
24242 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24243   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24244                                               Matcher::vector_length_in_bytes(n->in(1))));
24245   match(Set dst (CountLeadingZerosV src));
24246   format %{ "vector_count_leading_zeros $dst, $src" %}
24247   ins_encode %{
24248      int vlen_enc = vector_length_encoding(this, $src);
24249      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24250      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24251                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24252   %}
24253   ins_pipe( pipe_slow );
24254 %}
24255 
24256 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24257   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24258                                               Matcher::vector_length_in_bytes(n->in(1))));
24259   match(Set dst (CountLeadingZerosV src mask));
24260   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24261   ins_encode %{
24262     int vlen_enc = vector_length_encoding(this, $src);
24263     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24264     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24265     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24266                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24267   %}
24268   ins_pipe( pipe_slow );
24269 %}
24270 
24271 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24272   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24273             VM_Version::supports_avx512cd() &&
24274             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24275   match(Set dst (CountLeadingZerosV src));
24276   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24277   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24278   ins_encode %{
24279     int vlen_enc = vector_length_encoding(this, $src);
24280     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24281     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24282                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24283   %}
24284   ins_pipe( pipe_slow );
24285 %}
24286 
24287 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24288   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24289   match(Set dst (CountLeadingZerosV src));
24290   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24291   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24292   ins_encode %{
24293     int vlen_enc = vector_length_encoding(this, $src);
24294     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24295     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24296                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24297                                        $rtmp$$Register, true, vlen_enc);
24298   %}
24299   ins_pipe( pipe_slow );
24300 %}
24301 
24302 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24303   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24304             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24305   match(Set dst (CountLeadingZerosV src));
24306   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24307   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24308   ins_encode %{
24309     int vlen_enc = vector_length_encoding(this, $src);
24310     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24311     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24312                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24313   %}
24314   ins_pipe( pipe_slow );
24315 %}
24316 
24317 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24318   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24319             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24320   match(Set dst (CountLeadingZerosV src));
24321   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24322   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24323   ins_encode %{
24324     int vlen_enc = vector_length_encoding(this, $src);
24325     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24326     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24327                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24328   %}
24329   ins_pipe( pipe_slow );
24330 %}
24331 
24332 // ---------------------------------- Vector Masked Operations ------------------------------------
24333 
24334 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24335   match(Set dst (AddVB (Binary dst src2) mask));
24336   match(Set dst (AddVS (Binary dst src2) mask));
24337   match(Set dst (AddVI (Binary dst src2) mask));
24338   match(Set dst (AddVL (Binary dst src2) mask));
24339   match(Set dst (AddVF (Binary dst src2) mask));
24340   match(Set dst (AddVD (Binary dst src2) mask));
24341   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24342   ins_encode %{
24343     int vlen_enc = vector_length_encoding(this);
24344     BasicType bt = Matcher::vector_element_basic_type(this);
24345     int opc = this->ideal_Opcode();
24346     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24347                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24348   %}
24349   ins_pipe( pipe_slow );
24350 %}
24351 
24352 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24353   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24354   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24355   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24356   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24357   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24358   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24359   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24360   ins_encode %{
24361     int vlen_enc = vector_length_encoding(this);
24362     BasicType bt = Matcher::vector_element_basic_type(this);
24363     int opc = this->ideal_Opcode();
24364     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24366   %}
24367   ins_pipe( pipe_slow );
24368 %}
24369 
24370 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24371   match(Set dst (XorV (Binary dst src2) mask));
24372   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24373   ins_encode %{
24374     int vlen_enc = vector_length_encoding(this);
24375     BasicType bt = Matcher::vector_element_basic_type(this);
24376     int opc = this->ideal_Opcode();
24377     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24378                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24379   %}
24380   ins_pipe( pipe_slow );
24381 %}
24382 
24383 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24384   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24385   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24386   ins_encode %{
24387     int vlen_enc = vector_length_encoding(this);
24388     BasicType bt = Matcher::vector_element_basic_type(this);
24389     int opc = this->ideal_Opcode();
24390     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24391                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24392   %}
24393   ins_pipe( pipe_slow );
24394 %}
24395 
24396 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24397   match(Set dst (OrV (Binary dst src2) mask));
24398   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24399   ins_encode %{
24400     int vlen_enc = vector_length_encoding(this);
24401     BasicType bt = Matcher::vector_element_basic_type(this);
24402     int opc = this->ideal_Opcode();
24403     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24404                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24405   %}
24406   ins_pipe( pipe_slow );
24407 %}
24408 
24409 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24410   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24411   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24412   ins_encode %{
24413     int vlen_enc = vector_length_encoding(this);
24414     BasicType bt = Matcher::vector_element_basic_type(this);
24415     int opc = this->ideal_Opcode();
24416     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24417                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24418   %}
24419   ins_pipe( pipe_slow );
24420 %}
24421 
24422 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24423   match(Set dst (AndV (Binary dst src2) mask));
24424   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24425   ins_encode %{
24426     int vlen_enc = vector_length_encoding(this);
24427     BasicType bt = Matcher::vector_element_basic_type(this);
24428     int opc = this->ideal_Opcode();
24429     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24430                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24431   %}
24432   ins_pipe( pipe_slow );
24433 %}
24434 
24435 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24436   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24437   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24438   ins_encode %{
24439     int vlen_enc = vector_length_encoding(this);
24440     BasicType bt = Matcher::vector_element_basic_type(this);
24441     int opc = this->ideal_Opcode();
24442     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24443                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24444   %}
24445   ins_pipe( pipe_slow );
24446 %}
24447 
24448 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24449   match(Set dst (SubVB (Binary dst src2) mask));
24450   match(Set dst (SubVS (Binary dst src2) mask));
24451   match(Set dst (SubVI (Binary dst src2) mask));
24452   match(Set dst (SubVL (Binary dst src2) mask));
24453   match(Set dst (SubVF (Binary dst src2) mask));
24454   match(Set dst (SubVD (Binary dst src2) mask));
24455   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24456   ins_encode %{
24457     int vlen_enc = vector_length_encoding(this);
24458     BasicType bt = Matcher::vector_element_basic_type(this);
24459     int opc = this->ideal_Opcode();
24460     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24461                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24462   %}
24463   ins_pipe( pipe_slow );
24464 %}
24465 
24466 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24467   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24468   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24469   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24470   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24471   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24472   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24473   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24474   ins_encode %{
24475     int vlen_enc = vector_length_encoding(this);
24476     BasicType bt = Matcher::vector_element_basic_type(this);
24477     int opc = this->ideal_Opcode();
24478     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24479                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24480   %}
24481   ins_pipe( pipe_slow );
24482 %}
24483 
24484 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24485   match(Set dst (MulVS (Binary dst src2) mask));
24486   match(Set dst (MulVI (Binary dst src2) mask));
24487   match(Set dst (MulVL (Binary dst src2) mask));
24488   match(Set dst (MulVF (Binary dst src2) mask));
24489   match(Set dst (MulVD (Binary dst src2) mask));
24490   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24491   ins_encode %{
24492     int vlen_enc = vector_length_encoding(this);
24493     BasicType bt = Matcher::vector_element_basic_type(this);
24494     int opc = this->ideal_Opcode();
24495     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24496                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24497   %}
24498   ins_pipe( pipe_slow );
24499 %}
24500 
24501 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24502   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24503   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24504   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24505   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24506   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24507   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24508   ins_encode %{
24509     int vlen_enc = vector_length_encoding(this);
24510     BasicType bt = Matcher::vector_element_basic_type(this);
24511     int opc = this->ideal_Opcode();
24512     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24513                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24514   %}
24515   ins_pipe( pipe_slow );
24516 %}
24517 
24518 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24519   match(Set dst (SqrtVF dst mask));
24520   match(Set dst (SqrtVD dst mask));
24521   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24522   ins_encode %{
24523     int vlen_enc = vector_length_encoding(this);
24524     BasicType bt = Matcher::vector_element_basic_type(this);
24525     int opc = this->ideal_Opcode();
24526     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24527                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24528   %}
24529   ins_pipe( pipe_slow );
24530 %}
24531 
24532 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24533   match(Set dst (DivVF (Binary dst src2) mask));
24534   match(Set dst (DivVD (Binary dst src2) mask));
24535   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24536   ins_encode %{
24537     int vlen_enc = vector_length_encoding(this);
24538     BasicType bt = Matcher::vector_element_basic_type(this);
24539     int opc = this->ideal_Opcode();
24540     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24541                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24542   %}
24543   ins_pipe( pipe_slow );
24544 %}
24545 
24546 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24547   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24548   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24549   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24550   ins_encode %{
24551     int vlen_enc = vector_length_encoding(this);
24552     BasicType bt = Matcher::vector_element_basic_type(this);
24553     int opc = this->ideal_Opcode();
24554     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24555                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24556   %}
24557   ins_pipe( pipe_slow );
24558 %}
24559 
24560 
24561 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24562   match(Set dst (RotateLeftV (Binary dst shift) mask));
24563   match(Set dst (RotateRightV (Binary dst shift) mask));
24564   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24565   ins_encode %{
24566     int vlen_enc = vector_length_encoding(this);
24567     BasicType bt = Matcher::vector_element_basic_type(this);
24568     int opc = this->ideal_Opcode();
24569     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24570                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24571   %}
24572   ins_pipe( pipe_slow );
24573 %}
24574 
24575 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24576   match(Set dst (RotateLeftV (Binary dst src2) mask));
24577   match(Set dst (RotateRightV (Binary dst src2) mask));
24578   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24579   ins_encode %{
24580     int vlen_enc = vector_length_encoding(this);
24581     BasicType bt = Matcher::vector_element_basic_type(this);
24582     int opc = this->ideal_Opcode();
24583     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24584                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24585   %}
24586   ins_pipe( pipe_slow );
24587 %}
24588 
24589 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24590   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24591   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24592   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24593   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24594   ins_encode %{
24595     int vlen_enc = vector_length_encoding(this);
24596     BasicType bt = Matcher::vector_element_basic_type(this);
24597     int opc = this->ideal_Opcode();
24598     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24599                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24600   %}
24601   ins_pipe( pipe_slow );
24602 %}
24603 
24604 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24605   predicate(!n->as_ShiftV()->is_var_shift());
24606   match(Set dst (LShiftVS (Binary dst src2) mask));
24607   match(Set dst (LShiftVI (Binary dst src2) mask));
24608   match(Set dst (LShiftVL (Binary dst src2) mask));
24609   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24610   ins_encode %{
24611     int vlen_enc = vector_length_encoding(this);
24612     BasicType bt = Matcher::vector_element_basic_type(this);
24613     int opc = this->ideal_Opcode();
24614     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24615                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24616   %}
24617   ins_pipe( pipe_slow );
24618 %}
24619 
24620 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24621   predicate(n->as_ShiftV()->is_var_shift());
24622   match(Set dst (LShiftVS (Binary dst src2) mask));
24623   match(Set dst (LShiftVI (Binary dst src2) mask));
24624   match(Set dst (LShiftVL (Binary dst src2) mask));
24625   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24626   ins_encode %{
24627     int vlen_enc = vector_length_encoding(this);
24628     BasicType bt = Matcher::vector_element_basic_type(this);
24629     int opc = this->ideal_Opcode();
24630     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24631                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24632   %}
24633   ins_pipe( pipe_slow );
24634 %}
24635 
24636 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24637   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24638   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24639   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24640   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24641   ins_encode %{
24642     int vlen_enc = vector_length_encoding(this);
24643     BasicType bt = Matcher::vector_element_basic_type(this);
24644     int opc = this->ideal_Opcode();
24645     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24646                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24647   %}
24648   ins_pipe( pipe_slow );
24649 %}
24650 
24651 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24652   predicate(!n->as_ShiftV()->is_var_shift());
24653   match(Set dst (RShiftVS (Binary dst src2) mask));
24654   match(Set dst (RShiftVI (Binary dst src2) mask));
24655   match(Set dst (RShiftVL (Binary dst src2) mask));
24656   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24657   ins_encode %{
24658     int vlen_enc = vector_length_encoding(this);
24659     BasicType bt = Matcher::vector_element_basic_type(this);
24660     int opc = this->ideal_Opcode();
24661     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24662                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24663   %}
24664   ins_pipe( pipe_slow );
24665 %}
24666 
24667 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24668   predicate(n->as_ShiftV()->is_var_shift());
24669   match(Set dst (RShiftVS (Binary dst src2) mask));
24670   match(Set dst (RShiftVI (Binary dst src2) mask));
24671   match(Set dst (RShiftVL (Binary dst src2) mask));
24672   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24673   ins_encode %{
24674     int vlen_enc = vector_length_encoding(this);
24675     BasicType bt = Matcher::vector_element_basic_type(this);
24676     int opc = this->ideal_Opcode();
24677     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24678                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24679   %}
24680   ins_pipe( pipe_slow );
24681 %}
24682 
24683 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24684   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24685   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24686   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24687   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24688   ins_encode %{
24689     int vlen_enc = vector_length_encoding(this);
24690     BasicType bt = Matcher::vector_element_basic_type(this);
24691     int opc = this->ideal_Opcode();
24692     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24693                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24694   %}
24695   ins_pipe( pipe_slow );
24696 %}
24697 
24698 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24699   predicate(!n->as_ShiftV()->is_var_shift());
24700   match(Set dst (URShiftVS (Binary dst src2) mask));
24701   match(Set dst (URShiftVI (Binary dst src2) mask));
24702   match(Set dst (URShiftVL (Binary dst src2) mask));
24703   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24704   ins_encode %{
24705     int vlen_enc = vector_length_encoding(this);
24706     BasicType bt = Matcher::vector_element_basic_type(this);
24707     int opc = this->ideal_Opcode();
24708     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24709                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24710   %}
24711   ins_pipe( pipe_slow );
24712 %}
24713 
24714 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24715   predicate(n->as_ShiftV()->is_var_shift());
24716   match(Set dst (URShiftVS (Binary dst src2) mask));
24717   match(Set dst (URShiftVI (Binary dst src2) mask));
24718   match(Set dst (URShiftVL (Binary dst src2) mask));
24719   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24720   ins_encode %{
24721     int vlen_enc = vector_length_encoding(this);
24722     BasicType bt = Matcher::vector_element_basic_type(this);
24723     int opc = this->ideal_Opcode();
24724     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24725                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24726   %}
24727   ins_pipe( pipe_slow );
24728 %}
24729 
24730 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24731   match(Set dst (MaxV (Binary dst src2) mask));
24732   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24733   ins_encode %{
24734     int vlen_enc = vector_length_encoding(this);
24735     BasicType bt = Matcher::vector_element_basic_type(this);
24736     int opc = this->ideal_Opcode();
24737     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24738                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24739   %}
24740   ins_pipe( pipe_slow );
24741 %}
24742 
24743 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24744   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24745   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24746   ins_encode %{
24747     int vlen_enc = vector_length_encoding(this);
24748     BasicType bt = Matcher::vector_element_basic_type(this);
24749     int opc = this->ideal_Opcode();
24750     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24751                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24752   %}
24753   ins_pipe( pipe_slow );
24754 %}
24755 
24756 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24757   match(Set dst (MinV (Binary dst src2) mask));
24758   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24759   ins_encode %{
24760     int vlen_enc = vector_length_encoding(this);
24761     BasicType bt = Matcher::vector_element_basic_type(this);
24762     int opc = this->ideal_Opcode();
24763     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24764                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24765   %}
24766   ins_pipe( pipe_slow );
24767 %}
24768 
24769 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24770   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24771   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24772   ins_encode %{
24773     int vlen_enc = vector_length_encoding(this);
24774     BasicType bt = Matcher::vector_element_basic_type(this);
24775     int opc = this->ideal_Opcode();
24776     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24777                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24778   %}
24779   ins_pipe( pipe_slow );
24780 %}
24781 
24782 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24783   match(Set dst (VectorRearrange (Binary dst src2) mask));
24784   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24785   ins_encode %{
24786     int vlen_enc = vector_length_encoding(this);
24787     BasicType bt = Matcher::vector_element_basic_type(this);
24788     int opc = this->ideal_Opcode();
24789     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24790                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24791   %}
24792   ins_pipe( pipe_slow );
24793 %}
24794 
24795 instruct vabs_masked(vec dst, kReg mask) %{
24796   match(Set dst (AbsVB dst mask));
24797   match(Set dst (AbsVS dst mask));
24798   match(Set dst (AbsVI dst mask));
24799   match(Set dst (AbsVL dst mask));
24800   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24801   ins_encode %{
24802     int vlen_enc = vector_length_encoding(this);
24803     BasicType bt = Matcher::vector_element_basic_type(this);
24804     int opc = this->ideal_Opcode();
24805     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24807   %}
24808   ins_pipe( pipe_slow );
24809 %}
24810 
24811 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24812   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24813   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24814   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24815   ins_encode %{
24816     assert(UseFMA, "Needs FMA instructions support.");
24817     int vlen_enc = vector_length_encoding(this);
24818     BasicType bt = Matcher::vector_element_basic_type(this);
24819     int opc = this->ideal_Opcode();
24820     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24821                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24822   %}
24823   ins_pipe( pipe_slow );
24824 %}
24825 
24826 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24827   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24828   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24829   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24830   ins_encode %{
24831     assert(UseFMA, "Needs FMA instructions support.");
24832     int vlen_enc = vector_length_encoding(this);
24833     BasicType bt = Matcher::vector_element_basic_type(this);
24834     int opc = this->ideal_Opcode();
24835     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24836                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24837   %}
24838   ins_pipe( pipe_slow );
24839 %}
24840 
24841 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24842   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24843   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24844   ins_encode %{
24845     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24846     int vlen_enc = vector_length_encoding(this, $src1);
24847     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24848 
24849     // Comparison i
24850     switch (src1_elem_bt) {
24851       case T_BYTE: {
24852         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24853         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24854         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24855         break;
24856       }
24857       case T_SHORT: {
24858         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24859         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24860         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24861         break;
24862       }
24863       case T_INT: {
24864         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24865         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24866         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24867         break;
24868       }
24869       case T_LONG: {
24870         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24871         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24872         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24873         break;
24874       }
24875       case T_FLOAT: {
24876         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24877         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24878         break;
24879       }
24880       case T_DOUBLE: {
24881         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24882         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24883         break;
24884       }
24885       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24886     }
24887   %}
24888   ins_pipe( pipe_slow );
24889 %}
24890 
24891 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24892   predicate(Matcher::vector_length(n) <= 32);
24893   match(Set dst (MaskAll src));
24894   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24895   ins_encode %{
24896     int mask_len = Matcher::vector_length(this);
24897     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24898   %}
24899   ins_pipe( pipe_slow );
24900 %}
24901 
24902 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24903   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24904   match(Set dst (XorVMask src (MaskAll cnt)));
24905   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24906   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24907   ins_encode %{
24908     uint masklen = Matcher::vector_length(this);
24909     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24910   %}
24911   ins_pipe( pipe_slow );
24912 %}
24913 
24914 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24915   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24916             (Matcher::vector_length(n) == 16) ||
24917             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24918   match(Set dst (XorVMask src (MaskAll cnt)));
24919   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24920   ins_encode %{
24921     uint masklen = Matcher::vector_length(this);
24922     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24923   %}
24924   ins_pipe( pipe_slow );
24925 %}
24926 
24927 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24928   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24929   match(Set dst (VectorLongToMask src));
24930   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24931   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24932   ins_encode %{
24933     int mask_len = Matcher::vector_length(this);
24934     int vec_enc  = vector_length_encoding(mask_len);
24935     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24936                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24937   %}
24938   ins_pipe( pipe_slow );
24939 %}
24940 
24941 
24942 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24943   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24944   match(Set dst (VectorLongToMask src));
24945   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24946   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24947   ins_encode %{
24948     int mask_len = Matcher::vector_length(this);
24949     assert(mask_len <= 32, "invalid mask length");
24950     int vec_enc  = vector_length_encoding(mask_len);
24951     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24952                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24953   %}
24954   ins_pipe( pipe_slow );
24955 %}
24956 
24957 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24958   predicate(n->bottom_type()->isa_vectmask());
24959   match(Set dst (VectorLongToMask src));
24960   format %{ "long_to_mask_evex $dst, $src\t!" %}
24961   ins_encode %{
24962     __ kmov($dst$$KRegister, $src$$Register);
24963   %}
24964   ins_pipe( pipe_slow );
24965 %}
24966 
24967 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24968   match(Set dst (AndVMask src1 src2));
24969   match(Set dst (OrVMask src1 src2));
24970   match(Set dst (XorVMask src1 src2));
24971   effect(TEMP kscratch);
24972   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24973   ins_encode %{
24974     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24975     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24976     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24977     uint masklen = Matcher::vector_length(this);
24978     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24979     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24980   %}
24981   ins_pipe( pipe_slow );
24982 %}
24983 
24984 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24985   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24986   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24987   ins_encode %{
24988     int vlen_enc = vector_length_encoding(this);
24989     BasicType bt = Matcher::vector_element_basic_type(this);
24990     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24991                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24992   %}
24993   ins_pipe( pipe_slow );
24994 %}
24995 
24996 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24997   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24998   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24999   ins_encode %{
25000     int vlen_enc = vector_length_encoding(this);
25001     BasicType bt = Matcher::vector_element_basic_type(this);
25002     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25003                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25004   %}
25005   ins_pipe( pipe_slow );
25006 %}
25007 
25008 instruct castMM(kReg dst)
25009 %{
25010   match(Set dst (CastVV dst));
25011 
25012   size(0);
25013   format %{ "# castVV of $dst" %}
25014   ins_encode(/* empty encoding */);
25015   ins_cost(0);
25016   ins_pipe(empty);
25017 %}
25018 
25019 instruct castVV(vec dst)
25020 %{
25021   match(Set dst (CastVV dst));
25022 
25023   size(0);
25024   format %{ "# castVV of $dst" %}
25025   ins_encode(/* empty encoding */);
25026   ins_cost(0);
25027   ins_pipe(empty);
25028 %}
25029 
25030 instruct castVVLeg(legVec dst)
25031 %{
25032   match(Set dst (CastVV dst));
25033 
25034   size(0);
25035   format %{ "# castVV of $dst" %}
25036   ins_encode(/* empty encoding */);
25037   ins_cost(0);
25038   ins_pipe(empty);
25039 %}
25040 
25041 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25042 %{
25043   match(Set dst (IsInfiniteF src));
25044   effect(TEMP ktmp, KILL cr);
25045   format %{ "float_class_check $dst, $src" %}
25046   ins_encode %{
25047     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25048     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25049   %}
25050   ins_pipe(pipe_slow);
25051 %}
25052 
25053 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25054 %{
25055   match(Set dst (IsInfiniteD src));
25056   effect(TEMP ktmp, KILL cr);
25057   format %{ "double_class_check $dst, $src" %}
25058   ins_encode %{
25059     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25060     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25061   %}
25062   ins_pipe(pipe_slow);
25063 %}
25064 
25065 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25066 %{
25067   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25068             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25069   match(Set dst (SaturatingAddV src1 src2));
25070   match(Set dst (SaturatingSubV src1 src2));
25071   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25072   ins_encode %{
25073     int vlen_enc = vector_length_encoding(this);
25074     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25075     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25076                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25077   %}
25078   ins_pipe(pipe_slow);
25079 %}
25080 
25081 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25082 %{
25083   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25084             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25085   match(Set dst (SaturatingAddV src1 src2));
25086   match(Set dst (SaturatingSubV src1 src2));
25087   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25088   ins_encode %{
25089     int vlen_enc = vector_length_encoding(this);
25090     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25091     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25092                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25093   %}
25094   ins_pipe(pipe_slow);
25095 %}
25096 
25097 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25098 %{
25099   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25100             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25101             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25102   match(Set dst (SaturatingAddV src1 src2));
25103   match(Set dst (SaturatingSubV src1 src2));
25104   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25105   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25106   ins_encode %{
25107     int vlen_enc = vector_length_encoding(this);
25108     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25109     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25110                                         $src1$$XMMRegister, $src2$$XMMRegister,
25111                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25112                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25113   %}
25114   ins_pipe(pipe_slow);
25115 %}
25116 
25117 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25118 %{
25119   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25120             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25121             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25122   match(Set dst (SaturatingAddV src1 src2));
25123   match(Set dst (SaturatingSubV src1 src2));
25124   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25125   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25126   ins_encode %{
25127     int vlen_enc = vector_length_encoding(this);
25128     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25129     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25130                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25131                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25132   %}
25133   ins_pipe(pipe_slow);
25134 %}
25135 
25136 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25137 %{
25138   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25139             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25140             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25141   match(Set dst (SaturatingAddV src1 src2));
25142   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25143   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25144   ins_encode %{
25145     int vlen_enc = vector_length_encoding(this);
25146     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25147     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25148                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25149   %}
25150   ins_pipe(pipe_slow);
25151 %}
25152 
25153 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25154 %{
25155   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25156             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25157             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25158   match(Set dst (SaturatingAddV src1 src2));
25159   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25160   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25161   ins_encode %{
25162     int vlen_enc = vector_length_encoding(this);
25163     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25164     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25165                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25166   %}
25167   ins_pipe(pipe_slow);
25168 %}
25169 
25170 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25171 %{
25172   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25173             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25174             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25175   match(Set dst (SaturatingSubV src1 src2));
25176   effect(TEMP ktmp);
25177   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25178   ins_encode %{
25179     int vlen_enc = vector_length_encoding(this);
25180     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25181     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25182                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25183   %}
25184   ins_pipe(pipe_slow);
25185 %}
25186 
25187 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25188 %{
25189   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25190             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25191             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25192   match(Set dst (SaturatingSubV src1 src2));
25193   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25194   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25195   ins_encode %{
25196     int vlen_enc = vector_length_encoding(this);
25197     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25198     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25199                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25200   %}
25201   ins_pipe(pipe_slow);
25202 %}
25203 
25204 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25205 %{
25206   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25207             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25208   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25209   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25210   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25211   ins_encode %{
25212     int vlen_enc = vector_length_encoding(this);
25213     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25214     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25215                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25216   %}
25217   ins_pipe(pipe_slow);
25218 %}
25219 
25220 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25221 %{
25222   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25223             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25224   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25225   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25226   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25227   ins_encode %{
25228     int vlen_enc = vector_length_encoding(this);
25229     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25230     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25231                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25232   %}
25233   ins_pipe(pipe_slow);
25234 %}
25235 
25236 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25237   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25238             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25239   match(Set dst (SaturatingAddV (Binary dst src) mask));
25240   match(Set dst (SaturatingSubV (Binary dst src) mask));
25241   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25242   ins_encode %{
25243     int vlen_enc = vector_length_encoding(this);
25244     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25245     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25246                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25247   %}
25248   ins_pipe( pipe_slow );
25249 %}
25250 
25251 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25252   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25253             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25254   match(Set dst (SaturatingAddV (Binary dst src) mask));
25255   match(Set dst (SaturatingSubV (Binary dst src) mask));
25256   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25257   ins_encode %{
25258     int vlen_enc = vector_length_encoding(this);
25259     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25260     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25261                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25262   %}
25263   ins_pipe( pipe_slow );
25264 %}
25265 
25266 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25267   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25269   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25270   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25271   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25272   ins_encode %{
25273     int vlen_enc = vector_length_encoding(this);
25274     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25276                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25277   %}
25278   ins_pipe( pipe_slow );
25279 %}
25280 
25281 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25282   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25283             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25284   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25285   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25286   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25287   ins_encode %{
25288     int vlen_enc = vector_length_encoding(this);
25289     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25290     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25291                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25292   %}
25293   ins_pipe( pipe_slow );
25294 %}
25295 
25296 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25297 %{
25298   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25299   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25300   ins_encode %{
25301     int vlen_enc = vector_length_encoding(this);
25302     BasicType bt = Matcher::vector_element_basic_type(this);
25303     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25304   %}
25305   ins_pipe(pipe_slow);
25306 %}
25307 
25308 instruct reinterpretS2HF(regF dst, rRegI src)
25309 %{
25310   match(Set dst (ReinterpretS2HF src));
25311   format %{ "evmovw $dst, $src" %}
25312   ins_encode %{
25313     __ evmovw($dst$$XMMRegister, $src$$Register);
25314   %}
25315   ins_pipe(pipe_slow);
25316 %}
25317 
25318 instruct reinterpretHF2S(rRegI dst, regF src)
25319 %{
25320   match(Set dst (ReinterpretHF2S src));
25321   format %{ "evmovw $dst, $src" %}
25322   ins_encode %{
25323     __ evmovw($dst$$Register, $src$$XMMRegister);
25324     __ narrow_subword_type($dst$$Register, T_SHORT);
25325   %}
25326   ins_pipe(pipe_slow);
25327 %}
25328 
25329 instruct convF2HFAndS2HF(regF dst, regF src)
25330 %{
25331   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25332   format %{ "convF2HFAndS2HF $dst, $src" %}
25333   ins_encode %{
25334     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25335   %}
25336   ins_pipe(pipe_slow);
25337 %}
25338 
25339 instruct convHF2SAndHF2F(regF dst, regF src)
25340 %{
25341   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25342   format %{ "convHF2SAndHF2F $dst, $src" %}
25343   ins_encode %{
25344     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25345   %}
25346   ins_pipe(pipe_slow);
25347 %}
25348 
25349 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25350 %{
25351   match(Set dst (SqrtHF src));
25352   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25353   ins_encode %{
25354     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25355   %}
25356   ins_pipe(pipe_slow);
25357 %}
25358 
25359 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25360 %{
25361   match(Set dst (AddHF src1 src2));
25362   match(Set dst (DivHF src1 src2));
25363   match(Set dst (MulHF src1 src2));
25364   match(Set dst (SubHF src1 src2));
25365   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25366   ins_encode %{
25367     int opcode = this->ideal_Opcode();
25368     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25369   %}
25370   ins_pipe(pipe_slow);
25371 %}
25372 
25373 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25374 %{
25375   predicate(VM_Version::supports_avx10_2());
25376   match(Set dst (MaxHF src1 src2));
25377   match(Set dst (MinHF src1 src2));
25378 
25379   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25380   ins_encode %{
25381     int opcode = this->ideal_Opcode();
25382     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25383   %}
25384   ins_pipe( pipe_slow );
25385 %}
25386 
25387 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25388 %{
25389   predicate(!VM_Version::supports_avx10_2());
25390   match(Set dst (MaxHF src1 src2));
25391   match(Set dst (MinHF src1 src2));
25392   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25393 
25394   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25395   ins_encode %{
25396     int opcode = this->ideal_Opcode();
25397     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25398                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25399   %}
25400   ins_pipe( pipe_slow );
25401 %}
25402 
25403 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25404 %{
25405   match(Set dst (FmaHF  src2 (Binary dst src1)));
25406   effect(DEF dst);
25407   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25408   ins_encode %{
25409     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25410   %}
25411   ins_pipe( pipe_slow );
25412 %}
25413 
25414 
25415 instruct vector_sqrt_HF_reg(vec dst, vec src)
25416 %{
25417   match(Set dst (SqrtVHF src));
25418   format %{ "vector_sqrt_fp16 $dst, $src" %}
25419   ins_encode %{
25420     int vlen_enc = vector_length_encoding(this);
25421     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25422   %}
25423   ins_pipe(pipe_slow);
25424 %}
25425 
25426 instruct vector_sqrt_HF_mem(vec dst, memory src)
25427 %{
25428   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25429   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25430   ins_encode %{
25431     int vlen_enc = vector_length_encoding(this);
25432     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25433   %}
25434   ins_pipe(pipe_slow);
25435 %}
25436 
25437 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25438 %{
25439   match(Set dst (AddVHF src1 src2));
25440   match(Set dst (DivVHF src1 src2));
25441   match(Set dst (MulVHF src1 src2));
25442   match(Set dst (SubVHF src1 src2));
25443   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25444   ins_encode %{
25445     int vlen_enc = vector_length_encoding(this);
25446     int opcode = this->ideal_Opcode();
25447     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25448   %}
25449   ins_pipe(pipe_slow);
25450 %}
25451 
25452 
25453 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25454 %{
25455   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25456   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25457   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25458   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25459   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25460   ins_encode %{
25461     int vlen_enc = vector_length_encoding(this);
25462     int opcode = this->ideal_Opcode();
25463     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25464   %}
25465   ins_pipe(pipe_slow);
25466 %}
25467 
25468 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25469 %{
25470   match(Set dst (FmaVHF src2 (Binary dst src1)));
25471   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25472   ins_encode %{
25473     int vlen_enc = vector_length_encoding(this);
25474     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25475   %}
25476   ins_pipe( pipe_slow );
25477 %}
25478 
25479 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25480 %{
25481   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25482   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25483   ins_encode %{
25484     int vlen_enc = vector_length_encoding(this);
25485     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25486   %}
25487   ins_pipe( pipe_slow );
25488 %}
25489 
25490 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25491 %{
25492   predicate(VM_Version::supports_avx10_2());
25493   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25494   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25495   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25496   ins_encode %{
25497     int vlen_enc = vector_length_encoding(this);
25498     int opcode = this->ideal_Opcode();
25499     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25500                             k0, vlen_enc);
25501   %}
25502   ins_pipe( pipe_slow );
25503 %}
25504 
25505 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25506 %{
25507   predicate(VM_Version::supports_avx10_2());
25508   match(Set dst (MinVHF src1 src2));
25509   match(Set dst (MaxVHF src1 src2));
25510   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25511   ins_encode %{
25512     int vlen_enc = vector_length_encoding(this);
25513     int opcode = this->ideal_Opcode();
25514     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25515                             k0, vlen_enc);
25516   %}
25517   ins_pipe( pipe_slow );
25518 %}
25519 
25520 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25521 %{
25522   predicate(!VM_Version::supports_avx10_2());
25523   match(Set dst (MinVHF src1 src2));
25524   match(Set dst (MaxVHF src1 src2));
25525   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25526   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25527   ins_encode %{
25528     int vlen_enc = vector_length_encoding(this);
25529     int opcode = this->ideal_Opcode();
25530     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25531                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25532   %}
25533   ins_pipe( pipe_slow );
25534 %}
25535 
25536 //----------PEEPHOLE RULES-----------------------------------------------------
25537 // These must follow all instruction definitions as they use the names
25538 // defined in the instructions definitions.
25539 //
25540 // peeppredicate ( rule_predicate );
25541 // // the predicate unless which the peephole rule will be ignored
25542 //
25543 // peepmatch ( root_instr_name [preceding_instruction]* );
25544 //
25545 // peepprocedure ( procedure_name );
25546 // // provide a procedure name to perform the optimization, the procedure should
25547 // // reside in the architecture dependent peephole file, the method has the
25548 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25549 // // with the arguments being the basic block, the current node index inside the
25550 // // block, the register allocator, the functions upon invoked return a new node
25551 // // defined in peepreplace, and the rules of the nodes appearing in the
25552 // // corresponding peepmatch, the function return true if successful, else
25553 // // return false
25554 //
25555 // peepconstraint %{
25556 // (instruction_number.operand_name relational_op instruction_number.operand_name
25557 //  [, ...] );
25558 // // instruction numbers are zero-based using left to right order in peepmatch
25559 //
25560 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25561 // // provide an instruction_number.operand_name for each operand that appears
25562 // // in the replacement instruction's match rule
25563 //
25564 // ---------VM FLAGS---------------------------------------------------------
25565 //
25566 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25567 //
25568 // Each peephole rule is given an identifying number starting with zero and
25569 // increasing by one in the order seen by the parser.  An individual peephole
25570 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25571 // on the command-line.
25572 //
25573 // ---------CURRENT LIMITATIONS----------------------------------------------
25574 //
25575 // Only transformations inside a basic block (do we need more for peephole)
25576 //
25577 // ---------EXAMPLE----------------------------------------------------------
25578 //
25579 // // pertinent parts of existing instructions in architecture description
25580 // instruct movI(rRegI dst, rRegI src)
25581 // %{
25582 //   match(Set dst (CopyI src));
25583 // %}
25584 //
25585 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25586 // %{
25587 //   match(Set dst (AddI dst src));
25588 //   effect(KILL cr);
25589 // %}
25590 //
25591 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25592 // %{
25593 //   match(Set dst (AddI dst src));
25594 // %}
25595 //
25596 // 1. Simple replacement
25597 // - Only match adjacent instructions in same basic block
25598 // - Only equality constraints
25599 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25600 // - Only one replacement instruction
25601 //
25602 // // Change (inc mov) to lea
25603 // peephole %{
25604 //   // lea should only be emitted when beneficial
25605 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25606 //   // increment preceded by register-register move
25607 //   peepmatch ( incI_rReg movI );
25608 //   // require that the destination register of the increment
25609 //   // match the destination register of the move
25610 //   peepconstraint ( 0.dst == 1.dst );
25611 //   // construct a replacement instruction that sets
25612 //   // the destination to ( move's source register + one )
25613 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25614 // %}
25615 //
25616 // 2. Procedural replacement
25617 // - More flexible finding relevent nodes
25618 // - More flexible constraints
25619 // - More flexible transformations
25620 // - May utilise architecture-dependent API more effectively
25621 // - Currently only one replacement instruction due to adlc parsing capabilities
25622 //
25623 // // Change (inc mov) to lea
25624 // peephole %{
25625 //   // lea should only be emitted when beneficial
25626 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25627 //   // the rule numbers of these nodes inside are passed into the function below
25628 //   peepmatch ( incI_rReg movI );
25629 //   // the method that takes the responsibility of transformation
25630 //   peepprocedure ( inc_mov_to_lea );
25631 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25632 //   // node is passed into the function above
25633 //   peepreplace ( leaI_rReg_immI() );
25634 // %}
25635 
25636 // These instructions is not matched by the matcher but used by the peephole
25637 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25638 %{
25639   predicate(false);
25640   match(Set dst (AddI src1 src2));
25641   format %{ "leal    $dst, [$src1 + $src2]" %}
25642   ins_encode %{
25643     Register dst = $dst$$Register;
25644     Register src1 = $src1$$Register;
25645     Register src2 = $src2$$Register;
25646     if (src1 != rbp && src1 != r13) {
25647       __ leal(dst, Address(src1, src2, Address::times_1));
25648     } else {
25649       assert(src2 != rbp && src2 != r13, "");
25650       __ leal(dst, Address(src2, src1, Address::times_1));
25651     }
25652   %}
25653   ins_pipe(ialu_reg_reg);
25654 %}
25655 
25656 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25657 %{
25658   predicate(false);
25659   match(Set dst (AddI src1 src2));
25660   format %{ "leal    $dst, [$src1 + $src2]" %}
25661   ins_encode %{
25662     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25663   %}
25664   ins_pipe(ialu_reg_reg);
25665 %}
25666 
25667 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25668 %{
25669   predicate(false);
25670   match(Set dst (LShiftI src shift));
25671   format %{ "leal    $dst, [$src << $shift]" %}
25672   ins_encode %{
25673     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25674     Register src = $src$$Register;
25675     if (scale == Address::times_2 && src != rbp && src != r13) {
25676       __ leal($dst$$Register, Address(src, src, Address::times_1));
25677     } else {
25678       __ leal($dst$$Register, Address(noreg, src, scale));
25679     }
25680   %}
25681   ins_pipe(ialu_reg_reg);
25682 %}
25683 
25684 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25685 %{
25686   predicate(false);
25687   match(Set dst (AddL src1 src2));
25688   format %{ "leaq    $dst, [$src1 + $src2]" %}
25689   ins_encode %{
25690     Register dst = $dst$$Register;
25691     Register src1 = $src1$$Register;
25692     Register src2 = $src2$$Register;
25693     if (src1 != rbp && src1 != r13) {
25694       __ leaq(dst, Address(src1, src2, Address::times_1));
25695     } else {
25696       assert(src2 != rbp && src2 != r13, "");
25697       __ leaq(dst, Address(src2, src1, Address::times_1));
25698     }
25699   %}
25700   ins_pipe(ialu_reg_reg);
25701 %}
25702 
25703 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25704 %{
25705   predicate(false);
25706   match(Set dst (AddL src1 src2));
25707   format %{ "leaq    $dst, [$src1 + $src2]" %}
25708   ins_encode %{
25709     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25710   %}
25711   ins_pipe(ialu_reg_reg);
25712 %}
25713 
25714 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25715 %{
25716   predicate(false);
25717   match(Set dst (LShiftL src shift));
25718   format %{ "leaq    $dst, [$src << $shift]" %}
25719   ins_encode %{
25720     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25721     Register src = $src$$Register;
25722     if (scale == Address::times_2 && src != rbp && src != r13) {
25723       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25724     } else {
25725       __ leaq($dst$$Register, Address(noreg, src, scale));
25726     }
25727   %}
25728   ins_pipe(ialu_reg_reg);
25729 %}
25730 
25731 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25732 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25733 // processors with at least partial ALU support for lea
25734 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25735 // beneficial for processors with full ALU support
25736 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25737 
25738 peephole
25739 %{
25740   peeppredicate(VM_Version::supports_fast_2op_lea());
25741   peepmatch (addI_rReg);
25742   peepprocedure (lea_coalesce_reg);
25743   peepreplace (leaI_rReg_rReg_peep());
25744 %}
25745 
25746 peephole
25747 %{
25748   peeppredicate(VM_Version::supports_fast_2op_lea());
25749   peepmatch (addI_rReg_imm);
25750   peepprocedure (lea_coalesce_imm);
25751   peepreplace (leaI_rReg_immI_peep());
25752 %}
25753 
25754 peephole
25755 %{
25756   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25757                 VM_Version::is_intel_cascade_lake());
25758   peepmatch (incI_rReg);
25759   peepprocedure (lea_coalesce_imm);
25760   peepreplace (leaI_rReg_immI_peep());
25761 %}
25762 
25763 peephole
25764 %{
25765   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25766                 VM_Version::is_intel_cascade_lake());
25767   peepmatch (decI_rReg);
25768   peepprocedure (lea_coalesce_imm);
25769   peepreplace (leaI_rReg_immI_peep());
25770 %}
25771 
25772 peephole
25773 %{
25774   peeppredicate(VM_Version::supports_fast_2op_lea());
25775   peepmatch (salI_rReg_immI2);
25776   peepprocedure (lea_coalesce_imm);
25777   peepreplace (leaI_rReg_immI2_peep());
25778 %}
25779 
25780 peephole
25781 %{
25782   peeppredicate(VM_Version::supports_fast_2op_lea());
25783   peepmatch (addL_rReg);
25784   peepprocedure (lea_coalesce_reg);
25785   peepreplace (leaL_rReg_rReg_peep());
25786 %}
25787 
25788 peephole
25789 %{
25790   peeppredicate(VM_Version::supports_fast_2op_lea());
25791   peepmatch (addL_rReg_imm);
25792   peepprocedure (lea_coalesce_imm);
25793   peepreplace (leaL_rReg_immL32_peep());
25794 %}
25795 
25796 peephole
25797 %{
25798   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25799                 VM_Version::is_intel_cascade_lake());
25800   peepmatch (incL_rReg);
25801   peepprocedure (lea_coalesce_imm);
25802   peepreplace (leaL_rReg_immL32_peep());
25803 %}
25804 
25805 peephole
25806 %{
25807   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25808                 VM_Version::is_intel_cascade_lake());
25809   peepmatch (decL_rReg);
25810   peepprocedure (lea_coalesce_imm);
25811   peepreplace (leaL_rReg_immL32_peep());
25812 %}
25813 
25814 peephole
25815 %{
25816   peeppredicate(VM_Version::supports_fast_2op_lea());
25817   peepmatch (salL_rReg_immI2);
25818   peepprocedure (lea_coalesce_imm);
25819   peepreplace (leaL_rReg_immI2_peep());
25820 %}
25821 
25822 peephole
25823 %{
25824   peepmatch (leaPCompressedOopOffset);
25825   peepprocedure (lea_remove_redundant);
25826 %}
25827 
25828 peephole
25829 %{
25830   peepmatch (leaP8Narrow);
25831   peepprocedure (lea_remove_redundant);
25832 %}
25833 
25834 peephole
25835 %{
25836   peepmatch (leaP32Narrow);
25837   peepprocedure (lea_remove_redundant);
25838 %}
25839 
25840 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25841 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25842 
25843 //int variant
25844 peephole
25845 %{
25846   peepmatch (testI_reg);
25847   peepprocedure (test_may_remove);
25848 %}
25849 
25850 //long variant
25851 peephole
25852 %{
25853   peepmatch (testL_reg);
25854   peepprocedure (test_may_remove);
25855 %}
25856 
25857 
25858 //----------SMARTSPILL RULES---------------------------------------------------
25859 // These must follow all instruction definitions as they use the names
25860 // defined in the instructions definitions.