1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je                  #
 1746 // |-jz -> a | b       # a & b
 1747 // |    -> a           #
 1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1749                             XMMRegister a, XMMRegister b,
 1750                             XMMRegister xmmt, Register rt,
 1751                             bool min, enum FP_PREC pt) {
 1752 
 1753   Label nan, zero, below, above, done;
 1754 
 1755   emit_fp_ucom(masm, pt, a, b);
 1756 
 1757   if (dst->encoding() != (min ? b : a)->encoding()) {
 1758     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1759   } else {
 1760     __ jccb(Assembler::above, done);
 1761   }
 1762 
 1763   __ jccb(Assembler::parity, nan);  // PF=1
 1764   __ jccb(Assembler::below, below); // CF=1
 1765 
 1766   // equal
 1767   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1768   emit_fp_ucom(masm, pt, a, xmmt);
 1769 
 1770   __ jccb(Assembler::equal, zero);
 1771   movfp(masm, pt, dst, a, rt);
 1772 
 1773   __ jmp(done);
 1774 
 1775   __ bind(zero);
 1776   if (min) {
 1777     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1778   } else {
 1779     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1780   }
 1781 
 1782   __ jmp(done);
 1783 
 1784   __ bind(above);
 1785   movfp(masm, pt, dst, min ? b : a, rt);
 1786 
 1787   __ jmp(done);
 1788 
 1789   __ bind(nan);
 1790   if (pt == fp_prec_hlf) {
 1791     __ movl(rt, 0x00007e00); // Float16.NaN
 1792     __ evmovw(dst, rt);
 1793   } else if (pt == fp_prec_flt) {
 1794     __ movl(rt, 0x7fc00000); // Float.NaN
 1795     __ movdl(dst, rt);
 1796   } else {
 1797     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1798     __ movdq(dst, rt);
 1799   }
 1800   __ jmp(done);
 1801 
 1802   __ bind(below);
 1803   movfp(masm, pt, dst, min ? a : b, rt);
 1804 
 1805   __ bind(done);
 1806 }
 1807 
 1808 //=============================================================================
 1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1810 
 1811 int ConstantTable::calculate_table_base_offset() const {
 1812   return 0;  // absolute addressing, no offset
 1813 }
 1814 
 1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1817   ShouldNotReachHere();
 1818 }
 1819 
 1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1821   // Empty encoding
 1822 }
 1823 
 1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1825   return 0;
 1826 }
 1827 
 1828 #ifndef PRODUCT
 1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1830   st->print("# MachConstantBaseNode (empty encoding)");
 1831 }
 1832 #endif
 1833 
 1834 
 1835 //=============================================================================
 1836 #ifndef PRODUCT
 1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1838   Compile* C = ra_->C;
 1839 
 1840   int framesize = C->output()->frame_size_in_bytes();
 1841   int bangsize = C->output()->bang_size_in_bytes();
 1842   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1843   // Remove wordSize for return addr which is already pushed.
 1844   framesize -= wordSize;
 1845 
 1846   if (C->output()->need_stack_bang(bangsize)) {
 1847     framesize -= wordSize;
 1848     st->print("# stack bang (%d bytes)", bangsize);
 1849     st->print("\n\t");
 1850     st->print("pushq   rbp\t# Save rbp");
 1851     if (PreserveFramePointer) {
 1852         st->print("\n\t");
 1853         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1854     }
 1855     if (framesize) {
 1856       st->print("\n\t");
 1857       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1858     }
 1859   } else {
 1860     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1861     st->print("\n\t");
 1862     framesize -= wordSize;
 1863     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1864     if (PreserveFramePointer) {
 1865       st->print("\n\t");
 1866       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1867       if (framesize > 0) {
 1868         st->print("\n\t");
 1869         st->print("addq    rbp, #%d", framesize);
 1870       }
 1871     }
 1872   }
 1873 
 1874   if (VerifyStackAtCalls) {
 1875     st->print("\n\t");
 1876     framesize -= wordSize;
 1877     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1878 #ifdef ASSERT
 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP
 1954   framesize -= 2*wordSize;
 1955 
 1956   if (framesize) {
 1957     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1958     st->print("\t");
 1959   }
 1960 
 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 
 2038 static enum RC rc_class(OptoReg::Name reg)
 2039 {
 2040   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2041 
 2042   if (OptoReg::is_stack(reg)) return rc_stack;
 2043 
 2044   VMReg r = OptoReg::as_VMReg(reg);
 2045 
 2046   if (r->is_Register()) return rc_int;
 2047 
 2048   if (r->is_KRegister()) return rc_kreg;
 2049 
 2050   assert(r->is_XMMRegister(), "must be");
 2051   return rc_float;
 2052 }
 2053 
 2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2056                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2057 
 2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2059                      int stack_offset, int reg, uint ireg, outputStream* st);
 2060 
 2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2062                                       int dst_offset, uint ireg, outputStream* st) {
 2063   if (masm) {
 2064     switch (ireg) {
 2065     case Op_VecS:
 2066       __ movq(Address(rsp, -8), rax);
 2067       __ movl(rax, Address(rsp, src_offset));
 2068       __ movl(Address(rsp, dst_offset), rax);
 2069       __ movq(rax, Address(rsp, -8));
 2070       break;
 2071     case Op_VecD:
 2072       __ pushq(Address(rsp, src_offset));
 2073       __ popq (Address(rsp, dst_offset));
 2074       break;
 2075     case Op_VecX:
 2076       __ pushq(Address(rsp, src_offset));
 2077       __ popq (Address(rsp, dst_offset));
 2078       __ pushq(Address(rsp, src_offset+8));
 2079       __ popq (Address(rsp, dst_offset+8));
 2080       break;
 2081     case Op_VecY:
 2082       __ vmovdqu(Address(rsp, -32), xmm0);
 2083       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2084       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2085       __ vmovdqu(xmm0, Address(rsp, -32));
 2086       break;
 2087     case Op_VecZ:
 2088       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2089       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2090       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2091       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2092       break;
 2093     default:
 2094       ShouldNotReachHere();
 2095     }
 2096 #ifndef PRODUCT
 2097   } else {
 2098     switch (ireg) {
 2099     case Op_VecS:
 2100       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2101                 "movl    rax, [rsp + #%d]\n\t"
 2102                 "movl    [rsp + #%d], rax\n\t"
 2103                 "movq    rax, [rsp - #8]",
 2104                 src_offset, dst_offset);
 2105       break;
 2106     case Op_VecD:
 2107       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2108                 "popq    [rsp + #%d]",
 2109                 src_offset, dst_offset);
 2110       break;
 2111      case Op_VecX:
 2112       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2113                 "popq    [rsp + #%d]\n\t"
 2114                 "pushq   [rsp + #%d]\n\t"
 2115                 "popq    [rsp + #%d]",
 2116                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2117       break;
 2118     case Op_VecY:
 2119       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2120                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2121                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2122                 "vmovdqu xmm0, [rsp - #32]",
 2123                 src_offset, dst_offset);
 2124       break;
 2125     case Op_VecZ:
 2126       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2127                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2128                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2129                 "vmovdqu xmm0, [rsp - #64]",
 2130                 src_offset, dst_offset);
 2131       break;
 2132     default:
 2133       ShouldNotReachHere();
 2134     }
 2135 #endif
 2136   }
 2137 }
 2138 
 2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2140                                        PhaseRegAlloc* ra_,
 2141                                        bool do_size,
 2142                                        outputStream* st) const {
 2143   assert(masm != nullptr || st  != nullptr, "sanity");
 2144   // Get registers to move
 2145   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2146   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2147   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2148   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2149 
 2150   enum RC src_second_rc = rc_class(src_second);
 2151   enum RC src_first_rc = rc_class(src_first);
 2152   enum RC dst_second_rc = rc_class(dst_second);
 2153   enum RC dst_first_rc = rc_class(dst_first);
 2154 
 2155   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2156          "must move at least 1 register" );
 2157 
 2158   if (src_first == dst_first && src_second == dst_second) {
 2159     // Self copy, no move
 2160     return 0;
 2161   }
 2162   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2163     uint ireg = ideal_reg();
 2164     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2165     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2166     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2167       // mem -> mem
 2168       int src_offset = ra_->reg2offset(src_first);
 2169       int dst_offset = ra_->reg2offset(dst_first);
 2170       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2171     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2172       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2173     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2174       int stack_offset = ra_->reg2offset(dst_first);
 2175       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2176     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2177       int stack_offset = ra_->reg2offset(src_first);
 2178       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2179     } else {
 2180       ShouldNotReachHere();
 2181     }
 2182     return 0;
 2183   }
 2184   if (src_first_rc == rc_stack) {
 2185     // mem ->
 2186     if (dst_first_rc == rc_stack) {
 2187       // mem -> mem
 2188       assert(src_second != dst_first, "overlap");
 2189       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2190           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2191         // 64-bit
 2192         int src_offset = ra_->reg2offset(src_first);
 2193         int dst_offset = ra_->reg2offset(dst_first);
 2194         if (masm) {
 2195           __ pushq(Address(rsp, src_offset));
 2196           __ popq (Address(rsp, dst_offset));
 2197 #ifndef PRODUCT
 2198         } else {
 2199           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2200                     "popq    [rsp + #%d]",
 2201                      src_offset, dst_offset);
 2202 #endif
 2203         }
 2204       } else {
 2205         // 32-bit
 2206         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2207         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2208         // No pushl/popl, so:
 2209         int src_offset = ra_->reg2offset(src_first);
 2210         int dst_offset = ra_->reg2offset(dst_first);
 2211         if (masm) {
 2212           __ movq(Address(rsp, -8), rax);
 2213           __ movl(rax, Address(rsp, src_offset));
 2214           __ movl(Address(rsp, dst_offset), rax);
 2215           __ movq(rax, Address(rsp, -8));
 2216 #ifndef PRODUCT
 2217         } else {
 2218           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2219                     "movl    rax, [rsp + #%d]\n\t"
 2220                     "movl    [rsp + #%d], rax\n\t"
 2221                     "movq    rax, [rsp - #8]",
 2222                      src_offset, dst_offset);
 2223 #endif
 2224         }
 2225       }
 2226       return 0;
 2227     } else if (dst_first_rc == rc_int) {
 2228       // mem -> gpr
 2229       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2230           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2231         // 64-bit
 2232         int offset = ra_->reg2offset(src_first);
 2233         if (masm) {
 2234           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2235 #ifndef PRODUCT
 2236         } else {
 2237           st->print("movq    %s, [rsp + #%d]\t# spill",
 2238                      Matcher::regName[dst_first],
 2239                      offset);
 2240 #endif
 2241         }
 2242       } else {
 2243         // 32-bit
 2244         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2245         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2246         int offset = ra_->reg2offset(src_first);
 2247         if (masm) {
 2248           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2249 #ifndef PRODUCT
 2250         } else {
 2251           st->print("movl    %s, [rsp + #%d]\t# spill",
 2252                      Matcher::regName[dst_first],
 2253                      offset);
 2254 #endif
 2255         }
 2256       }
 2257       return 0;
 2258     } else if (dst_first_rc == rc_float) {
 2259       // mem-> xmm
 2260       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2261           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2262         // 64-bit
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("%s  %s, [rsp + #%d]\t# spill",
 2269                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2270                      Matcher::regName[dst_first],
 2271                      offset);
 2272 #endif
 2273         }
 2274       } else {
 2275         // 32-bit
 2276         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2277         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2278         int offset = ra_->reg2offset(src_first);
 2279         if (masm) {
 2280           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2281 #ifndef PRODUCT
 2282         } else {
 2283           st->print("movss   %s, [rsp + #%d]\t# spill",
 2284                      Matcher::regName[dst_first],
 2285                      offset);
 2286 #endif
 2287         }
 2288       }
 2289       return 0;
 2290     } else if (dst_first_rc == rc_kreg) {
 2291       // mem -> kreg
 2292       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2293           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2294         // 64-bit
 2295         int offset = ra_->reg2offset(src_first);
 2296         if (masm) {
 2297           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2298 #ifndef PRODUCT
 2299         } else {
 2300           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2301                      Matcher::regName[dst_first],
 2302                      offset);
 2303 #endif
 2304         }
 2305       }
 2306       return 0;
 2307     }
 2308   } else if (src_first_rc == rc_int) {
 2309     // gpr ->
 2310     if (dst_first_rc == rc_stack) {
 2311       // gpr -> mem
 2312       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2313           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2314         // 64-bit
 2315         int offset = ra_->reg2offset(dst_first);
 2316         if (masm) {
 2317           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2318 #ifndef PRODUCT
 2319         } else {
 2320           st->print("movq    [rsp + #%d], %s\t# spill",
 2321                      offset,
 2322                      Matcher::regName[src_first]);
 2323 #endif
 2324         }
 2325       } else {
 2326         // 32-bit
 2327         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2328         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2329         int offset = ra_->reg2offset(dst_first);
 2330         if (masm) {
 2331           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2332 #ifndef PRODUCT
 2333         } else {
 2334           st->print("movl    [rsp + #%d], %s\t# spill",
 2335                      offset,
 2336                      Matcher::regName[src_first]);
 2337 #endif
 2338         }
 2339       }
 2340       return 0;
 2341     } else if (dst_first_rc == rc_int) {
 2342       // gpr -> gpr
 2343       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2344           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2345         // 64-bit
 2346         if (masm) {
 2347           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movq    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       } else {
 2358         // 32-bit
 2359         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2360         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2361         if (masm) {
 2362           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2363                   as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movl    %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371         return 0;
 2372       }
 2373     } else if (dst_first_rc == rc_float) {
 2374       // gpr -> xmm
 2375       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2376           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2377         // 64-bit
 2378         if (masm) {
 2379           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2380 #ifndef PRODUCT
 2381         } else {
 2382           st->print("movdq   %s, %s\t# spill",
 2383                      Matcher::regName[dst_first],
 2384                      Matcher::regName[src_first]);
 2385 #endif
 2386         }
 2387       } else {
 2388         // 32-bit
 2389         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2390         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2391         if (masm) {
 2392           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393 #ifndef PRODUCT
 2394         } else {
 2395           st->print("movdl   %s, %s\t# spill",
 2396                      Matcher::regName[dst_first],
 2397                      Matcher::regName[src_first]);
 2398 #endif
 2399         }
 2400       }
 2401       return 0;
 2402     } else if (dst_first_rc == rc_kreg) {
 2403       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2404           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2405         // 64-bit
 2406         if (masm) {
 2407           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2408   #ifndef PRODUCT
 2409         } else {
 2410            st->print("kmovq   %s, %s\t# spill",
 2411                        Matcher::regName[dst_first],
 2412                        Matcher::regName[src_first]);
 2413   #endif
 2414         }
 2415       }
 2416       Unimplemented();
 2417       return 0;
 2418     }
 2419   } else if (src_first_rc == rc_float) {
 2420     // xmm ->
 2421     if (dst_first_rc == rc_stack) {
 2422       // xmm -> mem
 2423       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2424           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2425         // 64-bit
 2426         int offset = ra_->reg2offset(dst_first);
 2427         if (masm) {
 2428           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2429 #ifndef PRODUCT
 2430         } else {
 2431           st->print("movsd   [rsp + #%d], %s\t# spill",
 2432                      offset,
 2433                      Matcher::regName[src_first]);
 2434 #endif
 2435         }
 2436       } else {
 2437         // 32-bit
 2438         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2439         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2440         int offset = ra_->reg2offset(dst_first);
 2441         if (masm) {
 2442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movss   [rsp + #%d], %s\t# spill",
 2446                      offset,
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       }
 2451       return 0;
 2452     } else if (dst_first_rc == rc_int) {
 2453       // xmm -> gpr
 2454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2456         // 64-bit
 2457         if (masm) {
 2458           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2459 #ifndef PRODUCT
 2460         } else {
 2461           st->print("movdq   %s, %s\t# spill",
 2462                      Matcher::regName[dst_first],
 2463                      Matcher::regName[src_first]);
 2464 #endif
 2465         }
 2466       } else {
 2467         // 32-bit
 2468         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2469         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2470         if (masm) {
 2471           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("movdl   %s, %s\t# spill",
 2475                      Matcher::regName[dst_first],
 2476                      Matcher::regName[src_first]);
 2477 #endif
 2478         }
 2479       }
 2480       return 0;
 2481     } else if (dst_first_rc == rc_float) {
 2482       // xmm -> xmm
 2483       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2484           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2485         // 64-bit
 2486         if (masm) {
 2487           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2488 #ifndef PRODUCT
 2489         } else {
 2490           st->print("%s  %s, %s\t# spill",
 2491                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2492                      Matcher::regName[dst_first],
 2493                      Matcher::regName[src_first]);
 2494 #endif
 2495         }
 2496       } else {
 2497         // 32-bit
 2498         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2499         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2500         if (masm) {
 2501           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2502 #ifndef PRODUCT
 2503         } else {
 2504           st->print("%s  %s, %s\t# spill",
 2505                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2506                      Matcher::regName[dst_first],
 2507                      Matcher::regName[src_first]);
 2508 #endif
 2509         }
 2510       }
 2511       return 0;
 2512     } else if (dst_first_rc == rc_kreg) {
 2513       assert(false, "Illegal spilling");
 2514       return 0;
 2515     }
 2516   } else if (src_first_rc == rc_kreg) {
 2517     if (dst_first_rc == rc_stack) {
 2518       // mem -> kreg
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         int offset = ra_->reg2offset(dst_first);
 2523         if (masm) {
 2524           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2528                      offset,
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       return 0;
 2534     } else if (dst_first_rc == rc_int) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       Unimplemented();
 2549       return 0;
 2550     } else if (dst_first_rc == rc_kreg) {
 2551       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2552           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2553         // 64-bit
 2554         if (masm) {
 2555           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2556 #ifndef PRODUCT
 2557         } else {
 2558          st->print("kmovq   %s, %s\t# spill",
 2559                      Matcher::regName[dst_first],
 2560                      Matcher::regName[src_first]);
 2561 #endif
 2562         }
 2563       }
 2564       return 0;
 2565     } else if (dst_first_rc == rc_float) {
 2566       assert(false, "Illegal spill");
 2567       return 0;
 2568     }
 2569   }
 2570 
 2571   assert(0," foo ");
 2572   Unimplemented();
 2573   return 0;
 2574 }
 2575 
 2576 #ifndef PRODUCT
 2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2578   implementation(nullptr, ra_, false, st);
 2579 }
 2580 #endif
 2581 
 2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2583   implementation(masm, ra_, false, nullptr);
 2584 }
 2585 
 2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2587   return MachNode::size(ra_);
 2588 }
 2589 
 2590 //=============================================================================
 2591 #ifndef PRODUCT
 2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_reg_first(this);
 2596   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2597             Matcher::regName[reg], offset);
 2598 }
 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 
 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif
 2660 
 2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2662                                             int oper_index) {
 2663   if (mdef == nullptr) {
 2664     return false;
 2665   }
 2666 
 2667   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2668       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2669     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2670     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2671     return false;
 2672   }
 2673 
 2674   // Complex memory operand covers multiple incoming edges needed for
 2675   // address computation. Biasing def towards any address component will not
 2676   // result in NDD demotion by assembler.
 2677   if (mdef->operand_num_edges(oper_index) != 1) {
 2678     return false;
 2679   }
 2680 
 2681   // Demotion candidate must be register mask compatible with definition.
 2682   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2683   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2684     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2685     return false;
 2686   }
 2687 
 2688   switch (oper_index) {
 2689   // First operand of MachNode corresponding to Intel APX NDD selection
 2690   // pattern can share its assigned register with definition operand if
 2691   // their live ranges do not overlap. In such a scenario we can demote
 2692   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2693   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2694   // are decorated with a special flag by instruction selector.
 2695   case 1:
 2696     return is_ndd_demotable_opr1(mdef);
 2697 
 2698   // Definition operand of commutative operation can be biased towards second
 2699   // operand.
 2700   case 2:
 2701     return is_ndd_demotable_opr2(mdef);
 2702 
 2703   // Current scheme only selects up to two biasing candidates
 2704   default:
 2705     assert(false, "unhandled operand index: %s", mdef->Name());
 2706     break;
 2707   }
 2708 
 2709   return false;
 2710 }
 2711 
 2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2713   assert(EnableVectorSupport, "sanity");
 2714   int lo = XMM0_num;
 2715   int hi = XMM0b_num;
 2716   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2717   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2718   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2719   return OptoRegPair(hi, lo);
 2720 }
 2721 
 2722 // Is this branch offset short enough that a short branch can be used?
 2723 //
 2724 // NOTE: If the platform does not provide any short branch variants, then
 2725 //       this method should return false for offset 0.
 2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2727   // The passed offset is relative to address of the branch.
 2728   // On 86 a branch displacement is calculated relative to address
 2729   // of a next instruction.
 2730   offset -= br_size;
 2731 
 2732   // the short version of jmpConUCF2 contains multiple branches,
 2733   // making the reach slightly less
 2734   if (rule == jmpConUCF2_rule)
 2735     return (-126 <= offset && offset <= 125);
 2736   return (-128 <= offset && offset <= 127);
 2737 }
 2738 
 2739 #ifdef ASSERT
 2740 // Return whether or not this register is ever used as an argument.
 2741 bool Matcher::can_be_java_arg(int reg)
 2742 {
 2743   return
 2744     reg ==  RDI_num || reg == RDI_H_num ||
 2745     reg ==  RSI_num || reg == RSI_H_num ||
 2746     reg ==  RDX_num || reg == RDX_H_num ||
 2747     reg ==  RCX_num || reg == RCX_H_num ||
 2748     reg ==   R8_num || reg ==  R8_H_num ||
 2749     reg ==   R9_num || reg ==  R9_H_num ||
 2750     reg ==  R12_num || reg == R12_H_num ||
 2751     reg == XMM0_num || reg == XMM0b_num ||
 2752     reg == XMM1_num || reg == XMM1b_num ||
 2753     reg == XMM2_num || reg == XMM2b_num ||
 2754     reg == XMM3_num || reg == XMM3b_num ||
 2755     reg == XMM4_num || reg == XMM4b_num ||
 2756     reg == XMM5_num || reg == XMM5b_num ||
 2757     reg == XMM6_num || reg == XMM6b_num ||
 2758     reg == XMM7_num || reg == XMM7b_num;
 2759 }
 2760 #endif
 2761 
 2762 uint Matcher::int_pressure_limit()
 2763 {
 2764   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2765 }
 2766 
 2767 uint Matcher::float_pressure_limit()
 2768 {
 2769   // After experiment around with different values, the following default threshold
 2770   // works best for LCM's register pressure scheduling on x64.
 2771   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2772   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2773   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2774 }
 2775 
 2776 // Register for DIVI projection of divmodI
 2777 const RegMask& Matcher::divI_proj_mask() {
 2778   return INT_RAX_REG_mask();
 2779 }
 2780 
 2781 // Register for MODI projection of divmodI
 2782 const RegMask& Matcher::modI_proj_mask() {
 2783   return INT_RDX_REG_mask();
 2784 }
 2785 
 2786 // Register for DIVL projection of divmodL
 2787 const RegMask& Matcher::divL_proj_mask() {
 2788   return LONG_RAX_REG_mask();
 2789 }
 2790 
 2791 // Register for MODL projection of divmodL
 2792 const RegMask& Matcher::modL_proj_mask() {
 2793   return LONG_RDX_REG_mask();
 2794 }
 2795 
 2796 %}
 2797 
 2798 source_hpp %{
 2799 // Header information of the source block.
 2800 // Method declarations/definitions which are used outside
 2801 // the ad-scope can conveniently be defined here.
 2802 //
 2803 // To keep related declarations/definitions/uses close together,
 2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2805 
 2806 #include "runtime/vm_version.hpp"
 2807 
 2808 class NativeJump;
 2809 
 2810 class CallStubImpl {
 2811 
 2812   //--------------------------------------------------------------
 2813   //---<  Used for optimization in Compile::shorten_branches  >---
 2814   //--------------------------------------------------------------
 2815 
 2816  public:
 2817   // Size of call trampoline stub.
 2818   static uint size_call_trampoline() {
 2819     return 0; // no call trampolines on this platform
 2820   }
 2821 
 2822   // number of relocations needed by a call trampoline stub
 2823   static uint reloc_call_trampoline() {
 2824     return 0; // no call trampolines on this platform
 2825   }
 2826 };
 2827 
 2828 class HandlerImpl {
 2829 
 2830  public:
 2831 
 2832   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2833 
 2834   static uint size_deopt_handler() {
 2835     // one call and one jmp.
 2836     return 7;
 2837   }
 2838 };
 2839 
 2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2841   switch(bytes) {
 2842     case  4: // fall-through
 2843     case  8: // fall-through
 2844     case 16: return Assembler::AVX_128bit;
 2845     case 32: return Assembler::AVX_256bit;
 2846     case 64: return Assembler::AVX_512bit;
 2847 
 2848     default: {
 2849       ShouldNotReachHere();
 2850       return Assembler::AVX_NoVec;
 2851     }
 2852   }
 2853 }
 2854 
 2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2856   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2857 }
 2858 
 2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2860   uint def_idx = use->operand_index(opnd);
 2861   Node* def = use->in(def_idx);
 2862   return vector_length_encoding(def);
 2863 }
 2864 
 2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2866   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2867          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2868 }
 2869 
 2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2871   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2872            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2873 }
 2874 
 2875 class Node::PD {
 2876 public:
 2877   enum NodeFlags : uint64_t {
 2878     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2879     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2880     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2881     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2882     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2883     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2884     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2885     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2886     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2887     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2888     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2889     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2890     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2891     _last_flag                = Flag_ndd_demotable_opr2
 2892   };
 2893 };
 2894 
 2895 %} // end source_hpp
 2896 
 2897 source %{
 2898 
 2899 #include "opto/addnode.hpp"
 2900 #include "c2_intelJccErratum_x86.hpp"
 2901 
 2902 void PhaseOutput::pd_perform_mach_node_analysis() {
 2903   if (VM_Version::has_intel_jcc_erratum()) {
 2904     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2905     _buf_sizes._code += extra_padding;
 2906   }
 2907 }
 2908 
 2909 int MachNode::pd_alignment_required() const {
 2910   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2911     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2912     return IntelJccErratum::largest_jcc_size() + 1;
 2913   } else {
 2914     return 1;
 2915   }
 2916 }
 2917 
 2918 int MachNode::compute_padding(int current_offset) const {
 2919   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2920     Compile* C = Compile::current();
 2921     PhaseOutput* output = C->output();
 2922     Block* block = output->block();
 2923     int index = output->index();
 2924     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2925   } else {
 2926     return 0;
 2927   }
 2928 }
 2929 
 2930 // Emit deopt handler code.
 2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2932 
 2933   // Note that the code buffer's insts_mark is always relative to insts.
 2934   // That's why we must use the macroassembler to generate a handler.
 2935   address base = __ start_a_stub(size_deopt_handler());
 2936   if (base == nullptr) {
 2937     ciEnv::current()->record_failure("CodeCache is full");
 2938     return 0;  // CodeBuffer::expand failed
 2939   }
 2940   int offset = __ offset();
 2941 
 2942   Label start;
 2943   __ bind(start);
 2944 
 2945   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2946 
 2947   int entry_offset = __ offset();
 2948 
 2949   __ jmp(start);
 2950 
 2951   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2952   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2953          "out of bounds read in post-call NOP check");
 2954   __ end_a_stub();
 2955   return entry_offset;
 2956 }
 2957 
 2958 static Assembler::Width widthForType(BasicType bt) {
 2959   if (bt == T_BYTE) {
 2960     return Assembler::B;
 2961   } else if (bt == T_SHORT) {
 2962     return Assembler::W;
 2963   } else if (bt == T_INT) {
 2964     return Assembler::D;
 2965   } else {
 2966     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2967     return Assembler::Q;
 2968   }
 2969 }
 2970 
 2971 //=============================================================================
 2972 
 2973   // Float masks come from different places depending on platform.
 2974   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2975   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2976   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2977   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2978   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2979   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2980   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2981   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2982   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2983   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2984   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2985   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2986   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2987   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2988   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2989   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2990   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2991   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2992   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2993 
 2994 //=============================================================================
 2995 bool Matcher::match_rule_supported(int opcode) {
 2996   if (!has_match_rule(opcode)) {
 2997     return false; // no match rule present
 2998   }
 2999   switch (opcode) {
 3000     case Op_AbsVL:
 3001     case Op_StoreVectorScatter:
 3002       if (UseAVX < 3) {
 3003         return false;
 3004       }
 3005       break;
 3006     case Op_PopCountI:
 3007     case Op_PopCountL:
 3008       if (!UsePopCountInstruction) {
 3009         return false;
 3010       }
 3011       break;
 3012     case Op_PopCountVI:
 3013       if (UseAVX < 2) {
 3014         return false;
 3015       }
 3016       break;
 3017     case Op_CompressV:
 3018     case Op_ExpandV:
 3019     case Op_PopCountVL:
 3020       if (UseAVX < 2) {
 3021         return false;
 3022       }
 3023       break;
 3024     case Op_MulVI:
 3025       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3026         return false;
 3027       }
 3028       break;
 3029     case Op_MulVL:
 3030       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3031         return false;
 3032       }
 3033       break;
 3034     case Op_MulReductionVL:
 3035       if (VM_Version::supports_avx512dq() == false) {
 3036         return false;
 3037       }
 3038       break;
 3039     case Op_AbsVB:
 3040     case Op_AbsVS:
 3041     case Op_AbsVI:
 3042     case Op_AddReductionVI:
 3043     case Op_AndReductionV:
 3044     case Op_OrReductionV:
 3045     case Op_XorReductionV:
 3046       if (UseSSE < 3) { // requires at least SSSE3
 3047         return false;
 3048       }
 3049       break;
 3050     case Op_MaxHF:
 3051     case Op_MinHF:
 3052       if (!VM_Version::supports_avx512vlbw()) {
 3053         return false;
 3054       }  // fallthrough
 3055     case Op_AddHF:
 3056     case Op_DivHF:
 3057     case Op_FmaHF:
 3058     case Op_MulHF:
 3059     case Op_ReinterpretS2HF:
 3060     case Op_ReinterpretHF2S:
 3061     case Op_SubHF:
 3062     case Op_SqrtHF:
 3063       if (!VM_Version::supports_avx512_fp16()) {
 3064         return false;
 3065       }
 3066       break;
 3067     case Op_VectorLoadShuffle:
 3068     case Op_VectorRearrange:
 3069     case Op_MulReductionVI:
 3070       if (UseSSE < 4) { // requires at least SSE4
 3071         return false;
 3072       }
 3073       break;
 3074     case Op_IsInfiniteF:
 3075     case Op_IsInfiniteD:
 3076       if (!VM_Version::supports_avx512dq()) {
 3077         return false;
 3078       }
 3079       break;
 3080     case Op_SqrtVD:
 3081     case Op_SqrtVF:
 3082     case Op_VectorMaskCmp:
 3083     case Op_VectorCastB2X:
 3084     case Op_VectorCastS2X:
 3085     case Op_VectorCastI2X:
 3086     case Op_VectorCastL2X:
 3087     case Op_VectorCastF2X:
 3088     case Op_VectorCastD2X:
 3089     case Op_VectorUCastB2X:
 3090     case Op_VectorUCastS2X:
 3091     case Op_VectorUCastI2X:
 3092     case Op_VectorMaskCast:
 3093       if (UseAVX < 1) { // enabled for AVX only
 3094         return false;
 3095       }
 3096       break;
 3097     case Op_PopulateIndex:
 3098       if (UseAVX < 2) {
 3099         return false;
 3100       }
 3101       break;
 3102     case Op_RoundVF:
 3103       if (UseAVX < 2) { // enabled for AVX2 only
 3104         return false;
 3105       }
 3106       break;
 3107     case Op_RoundVD:
 3108       if (UseAVX < 3) {
 3109         return false;  // enabled for AVX3 only
 3110       }
 3111       break;
 3112     case Op_CompareAndSwapL:
 3113     case Op_CompareAndSwapP:
 3114       break;
 3115     case Op_StrIndexOf:
 3116       if (!UseSSE42Intrinsics) {
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_StrIndexOfChar:
 3121       if (!UseSSE42Intrinsics) {
 3122         return false;
 3123       }
 3124       break;
 3125     case Op_OnSpinWait:
 3126       if (VM_Version::supports_on_spin_wait() == false) {
 3127         return false;
 3128       }
 3129       break;
 3130     case Op_MulVB:
 3131     case Op_LShiftVB:
 3132     case Op_RShiftVB:
 3133     case Op_URShiftVB:
 3134     case Op_VectorInsert:
 3135     case Op_VectorLoadMask:
 3136     case Op_VectorStoreMask:
 3137     case Op_VectorBlend:
 3138       if (UseSSE < 4) {
 3139         return false;
 3140       }
 3141       break;
 3142     case Op_MaxD:
 3143     case Op_MaxF:
 3144     case Op_MinD:
 3145     case Op_MinF:
 3146       if (UseAVX < 1) { // enabled for AVX only
 3147         return false;
 3148       }
 3149       break;
 3150     case Op_CacheWB:
 3151     case Op_CacheWBPreSync:
 3152     case Op_CacheWBPostSync:
 3153       if (!VM_Version::supports_data_cache_line_flush()) {
 3154         return false;
 3155       }
 3156       break;
 3157     case Op_ExtractB:
 3158     case Op_ExtractL:
 3159     case Op_ExtractI:
 3160     case Op_RoundDoubleMode:
 3161       if (UseSSE < 4) {
 3162         return false;
 3163       }
 3164       break;
 3165     case Op_RoundDoubleModeV:
 3166       if (VM_Version::supports_avx() == false) {
 3167         return false; // 128bit vroundpd is not available
 3168       }
 3169       break;
 3170     case Op_LoadVectorGather:
 3171     case Op_LoadVectorGatherMasked:
 3172       if (UseAVX < 2) {
 3173         return false;
 3174       }
 3175       break;
 3176     case Op_FmaF:
 3177     case Op_FmaD:
 3178     case Op_FmaVD:
 3179     case Op_FmaVF:
 3180       if (!UseFMA) {
 3181         return false;
 3182       }
 3183       break;
 3184     case Op_MacroLogicV:
 3185       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3186         return false;
 3187       }
 3188       break;
 3189 
 3190     case Op_VectorCmpMasked:
 3191     case Op_VectorMaskGen:
 3192       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3193         return false;
 3194       }
 3195       break;
 3196     case Op_VectorMaskFirstTrue:
 3197     case Op_VectorMaskLastTrue:
 3198     case Op_VectorMaskTrueCount:
 3199     case Op_VectorMaskToLong:
 3200       if (UseAVX < 1) {
 3201          return false;
 3202       }
 3203       break;
 3204     case Op_RoundF:
 3205     case Op_RoundD:
 3206       break;
 3207     case Op_CopySignD:
 3208     case Op_CopySignF:
 3209       if (UseAVX < 3)  {
 3210         return false;
 3211       }
 3212       if (!VM_Version::supports_avx512vl()) {
 3213         return false;
 3214       }
 3215       break;
 3216     case Op_CompressBits:
 3217     case Op_ExpandBits:
 3218       if (!VM_Version::supports_bmi2()) {
 3219         return false;
 3220       }
 3221       break;
 3222     case Op_CompressM:
 3223       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3224         return false;
 3225       }
 3226       break;
 3227     case Op_ConvF2HF:
 3228     case Op_ConvHF2F:
 3229       if (!VM_Version::supports_float16()) {
 3230         return false;
 3231       }
 3232       break;
 3233     case Op_VectorCastF2HF:
 3234     case Op_VectorCastHF2F:
 3235       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3236         return false;
 3237       }
 3238       break;
 3239   }
 3240   return true;  // Match rules are supported by default.
 3241 }
 3242 
 3243 //------------------------------------------------------------------------
 3244 
 3245 static inline bool is_pop_count_instr_target(BasicType bt) {
 3246   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3247          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3248 }
 3249 
 3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3251   return match_rule_supported_vector(opcode, vlen, bt);
 3252 }
 3253 
 3254 // Identify extra cases that we might want to provide match rules for vector nodes and
 3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3257   if (!match_rule_supported(opcode)) {
 3258     return false;
 3259   }
 3260   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3261   //   * SSE2 supports 128bit vectors for all types;
 3262   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3263   //   * AVX2 supports 256bit vectors for all types;
 3264   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3265   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3266   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3267   // And MaxVectorSize is taken into account as well.
 3268   if (!vector_size_supported(bt, vlen)) {
 3269     return false;
 3270   }
 3271   // Special cases which require vector length follow:
 3272   //   * implementation limitations
 3273   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3274   //   * 128bit vroundpd instruction is present only in AVX1
 3275   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3276   switch (opcode) {
 3277     case Op_MaxVHF:
 3278     case Op_MinVHF:
 3279       if (!VM_Version::supports_avx512bw()) {
 3280         return false;
 3281       }
 3282     case Op_AddVHF:
 3283     case Op_DivVHF:
 3284     case Op_FmaVHF:
 3285     case Op_MulVHF:
 3286     case Op_SubVHF:
 3287     case Op_SqrtVHF:
 3288       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3289         return false;
 3290       }
 3291       if (!VM_Version::supports_avx512_fp16()) {
 3292         return false;
 3293       }
 3294       break;
 3295     case Op_AbsVF:
 3296     case Op_NegVF:
 3297       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3298         return false; // 512bit vandps and vxorps are not available
 3299       }
 3300       break;
 3301     case Op_AbsVD:
 3302     case Op_NegVD:
 3303       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3304         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3305       }
 3306       break;
 3307     case Op_RotateRightV:
 3308     case Op_RotateLeftV:
 3309       if (bt != T_INT && bt != T_LONG) {
 3310         return false;
 3311       } // fallthrough
 3312     case Op_MacroLogicV:
 3313       if (!VM_Version::supports_evex() ||
 3314           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3315         return false;
 3316       }
 3317       break;
 3318     case Op_ClearArray:
 3319     case Op_VectorMaskGen:
 3320     case Op_VectorCmpMasked:
 3321       if (!VM_Version::supports_avx512bw()) {
 3322         return false;
 3323       }
 3324       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3325         return false;
 3326       }
 3327       break;
 3328     case Op_LoadVectorMasked:
 3329     case Op_StoreVectorMasked:
 3330       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3331         return false;
 3332       }
 3333       break;
 3334     case Op_UMinV:
 3335     case Op_UMaxV:
 3336       if (UseAVX == 0) {
 3337         return false;
 3338       }
 3339       break;
 3340     case Op_UMinReductionV:
 3341     case Op_UMaxReductionV:
 3342       if (UseAVX == 0) {
 3343         return false;
 3344       }
 3345       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_MaxV:
 3353     case Op_MinV:
 3354       if (UseSSE < 4 && is_integral_type(bt)) {
 3355         return false;
 3356       }
 3357       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3358           // Float/Double intrinsics are enabled for AVX family currently.
 3359           if (UseAVX == 0) {
 3360             return false;
 3361           }
 3362           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3363             return false;
 3364           }
 3365       }
 3366       break;
 3367     case Op_CallLeafVector:
 3368       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3369         return false;
 3370       }
 3371       break;
 3372     case Op_AddReductionVI:
 3373       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3374         return false;
 3375       }
 3376       // fallthrough
 3377     case Op_AndReductionV:
 3378     case Op_OrReductionV:
 3379     case Op_XorReductionV:
 3380       if (is_subword_type(bt) && (UseSSE < 4)) {
 3381         return false;
 3382       }
 3383       break;
 3384     case Op_MinReductionV:
 3385     case Op_MaxReductionV:
 3386       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3387         return false;
 3388       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3389         return false;
 3390       }
 3391       // Float/Double intrinsics enabled for AVX family.
 3392       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3393         return false;
 3394       }
 3395       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3396         return false;
 3397       }
 3398       break;
 3399     case Op_VectorBlend:
 3400       if (UseAVX == 0 && size_in_bits < 128) {
 3401         return false;
 3402       }
 3403       break;
 3404     case Op_VectorTest:
 3405       if (UseSSE < 4) {
 3406         return false; // Implementation limitation
 3407       } else if (size_in_bits < 32) {
 3408         return false; // Implementation limitation
 3409       }
 3410       break;
 3411     case Op_VectorLoadShuffle:
 3412     case Op_VectorRearrange:
 3413       if(vlen == 2) {
 3414         return false; // Implementation limitation due to how shuffle is loaded
 3415       } else if (size_in_bits == 256 && UseAVX < 2) {
 3416         return false; // Implementation limitation
 3417       }
 3418       break;
 3419     case Op_VectorLoadMask:
 3420     case Op_VectorMaskCast:
 3421       if (size_in_bits == 256 && UseAVX < 2) {
 3422         return false; // Implementation limitation
 3423       }
 3424       // fallthrough
 3425     case Op_VectorStoreMask:
 3426       if (vlen == 2) {
 3427         return false; // Implementation limitation
 3428       }
 3429       break;
 3430     case Op_PopulateIndex:
 3431       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3432         return false;
 3433       }
 3434       break;
 3435     case Op_VectorCastB2X:
 3436     case Op_VectorCastS2X:
 3437     case Op_VectorCastI2X:
 3438       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3439         return false;
 3440       }
 3441       break;
 3442     case Op_VectorCastL2X:
 3443       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3444         return false;
 3445       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3446         return false;
 3447       }
 3448       break;
 3449     case Op_VectorCastF2X: {
 3450         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3451         // happen after intermediate conversion to integer and special handling
 3452         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3453         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3454         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3455           return false;
 3456         }
 3457       }
 3458       // fallthrough
 3459     case Op_VectorCastD2X:
 3460       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3461         return false;
 3462       }
 3463       break;
 3464     case Op_VectorCastF2HF:
 3465     case Op_VectorCastHF2F:
 3466       if (!VM_Version::supports_f16c() &&
 3467          ((!VM_Version::supports_evex() ||
 3468          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_RoundVD:
 3473       if (!VM_Version::supports_avx512dq()) {
 3474         return false;
 3475       }
 3476       break;
 3477     case Op_MulReductionVI:
 3478       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3479         return false;
 3480       }
 3481       break;
 3482     case Op_LoadVectorGatherMasked:
 3483       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3484         return false;
 3485       }
 3486       if (is_subword_type(bt) &&
 3487          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3488           (size_in_bits < 64)                                      ||
 3489           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3490         return false;
 3491       }
 3492       break;
 3493     case Op_StoreVectorScatterMasked:
 3494     case Op_StoreVectorScatter:
 3495       if (is_subword_type(bt)) {
 3496         return false;
 3497       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3498         return false;
 3499       }
 3500       // fallthrough
 3501     case Op_LoadVectorGather:
 3502       if (!is_subword_type(bt) && size_in_bits == 64) {
 3503         return false;
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits < 64) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SaturatingAddV:
 3510     case Op_SaturatingSubV:
 3511       if (UseAVX < 1) {
 3512         return false; // Implementation limitation
 3513       }
 3514       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3515         return false;
 3516       }
 3517       break;
 3518     case Op_SelectFromTwoVector:
 3519        if (size_in_bits < 128) {
 3520          return false;
 3521        }
 3522        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3523          return false;
 3524        }
 3525        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3526          return false;
 3527        }
 3528        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3529          return false;
 3530        }
 3531        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3532          return false;
 3533        }
 3534        break;
 3535     case Op_MaskAll:
 3536       if (!VM_Version::supports_evex()) {
 3537         return false;
 3538       }
 3539       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3540         return false;
 3541       }
 3542       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3543         return false;
 3544       }
 3545       break;
 3546     case Op_VectorMaskCmp:
 3547       if (vlen < 2 || size_in_bits < 32) {
 3548         return false;
 3549       }
 3550       break;
 3551     case Op_CompressM:
 3552       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3553         return false;
 3554       }
 3555       break;
 3556     case Op_CompressV:
 3557     case Op_ExpandV:
 3558       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3559         return false;
 3560       }
 3561       if (size_in_bits < 128 ) {
 3562         return false;
 3563       }
 3564     case Op_VectorLongToMask:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3569         return false;
 3570       }
 3571       break;
 3572     case Op_SignumVD:
 3573     case Op_SignumVF:
 3574       if (UseAVX < 1) {
 3575         return false;
 3576       }
 3577       break;
 3578     case Op_PopCountVI:
 3579     case Op_PopCountVL: {
 3580         if (!is_pop_count_instr_target(bt) &&
 3581             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3582           return false;
 3583         }
 3584       }
 3585       break;
 3586     case Op_ReverseV:
 3587     case Op_ReverseBytesV:
 3588       if (UseAVX < 2) {
 3589         return false;
 3590       }
 3591       break;
 3592     case Op_CountTrailingZerosV:
 3593     case Op_CountLeadingZerosV:
 3594       if (UseAVX < 2) {
 3595         return false;
 3596       }
 3597       break;
 3598   }
 3599   return true;  // Per default match rules are supported.
 3600 }
 3601 
 3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3603   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3604   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3605   // of their non-masked counterpart with mask edge being the differentiator.
 3606   // This routine does a strict check on the existence of masked operation patterns
 3607   // by returning a default false value for all the other opcodes apart from the
 3608   // ones whose masked instruction patterns are defined in this file.
 3609   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3610     return false;
 3611   }
 3612 
 3613   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3614   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3615     return false;
 3616   }
 3617   switch(opcode) {
 3618     // Unary masked operations
 3619     case Op_AbsVB:
 3620     case Op_AbsVS:
 3621       if(!VM_Version::supports_avx512bw()) {
 3622         return false;  // Implementation limitation
 3623       }
 3624     case Op_AbsVI:
 3625     case Op_AbsVL:
 3626       return true;
 3627 
 3628     // Ternary masked operations
 3629     case Op_FmaVF:
 3630     case Op_FmaVD:
 3631       return true;
 3632 
 3633     case Op_MacroLogicV:
 3634       if(bt != T_INT && bt != T_LONG) {
 3635         return false;
 3636       }
 3637       return true;
 3638 
 3639     // Binary masked operations
 3640     case Op_AddVB:
 3641     case Op_AddVS:
 3642     case Op_SubVB:
 3643     case Op_SubVS:
 3644     case Op_MulVS:
 3645     case Op_LShiftVS:
 3646     case Op_RShiftVS:
 3647     case Op_URShiftVS:
 3648       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3649       if (!VM_Version::supports_avx512bw()) {
 3650         return false;  // Implementation limitation
 3651       }
 3652       return true;
 3653 
 3654     case Op_MulVL:
 3655       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3656       if (!VM_Version::supports_avx512dq()) {
 3657         return false;  // Implementation limitation
 3658       }
 3659       return true;
 3660 
 3661     case Op_AndV:
 3662     case Op_OrV:
 3663     case Op_XorV:
 3664     case Op_RotateRightV:
 3665     case Op_RotateLeftV:
 3666       if (bt != T_INT && bt != T_LONG) {
 3667         return false; // Implementation limitation
 3668       }
 3669       return true;
 3670 
 3671     case Op_VectorLoadMask:
 3672       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3673       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3674         return false;
 3675       }
 3676       return true;
 3677 
 3678     case Op_AddVI:
 3679     case Op_AddVL:
 3680     case Op_AddVF:
 3681     case Op_AddVD:
 3682     case Op_SubVI:
 3683     case Op_SubVL:
 3684     case Op_SubVF:
 3685     case Op_SubVD:
 3686     case Op_MulVI:
 3687     case Op_MulVF:
 3688     case Op_MulVD:
 3689     case Op_DivVF:
 3690     case Op_DivVD:
 3691     case Op_SqrtVF:
 3692     case Op_SqrtVD:
 3693     case Op_LShiftVI:
 3694     case Op_LShiftVL:
 3695     case Op_RShiftVI:
 3696     case Op_RShiftVL:
 3697     case Op_URShiftVI:
 3698     case Op_URShiftVL:
 3699     case Op_LoadVectorMasked:
 3700     case Op_StoreVectorMasked:
 3701     case Op_LoadVectorGatherMasked:
 3702     case Op_StoreVectorScatterMasked:
 3703       return true;
 3704 
 3705     case Op_UMinV:
 3706     case Op_UMaxV:
 3707       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3708         return false;
 3709       } // fallthrough
 3710     case Op_MaxV:
 3711     case Op_MinV:
 3712       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719     case Op_SaturatingAddV:
 3720     case Op_SaturatingSubV:
 3721       if (!is_subword_type(bt)) {
 3722         return false;
 3723       }
 3724       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3725         return false; // Implementation limitation
 3726       }
 3727       return true;
 3728 
 3729     case Op_VectorMaskCmp:
 3730       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3731         return false; // Implementation limitation
 3732       }
 3733       return true;
 3734 
 3735     case Op_VectorRearrange:
 3736       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3737         return false; // Implementation limitation
 3738       }
 3739       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3740         return false; // Implementation limitation
 3741       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     // Binary Logical operations
 3747     case Op_AndVMask:
 3748     case Op_OrVMask:
 3749     case Op_XorVMask:
 3750       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3751         return false; // Implementation limitation
 3752       }
 3753       return true;
 3754 
 3755     case Op_PopCountVI:
 3756     case Op_PopCountVL:
 3757       if (!is_pop_count_instr_target(bt)) {
 3758         return false;
 3759       }
 3760       return true;
 3761 
 3762     case Op_MaskAll:
 3763       return true;
 3764 
 3765     case Op_CountLeadingZerosV:
 3766       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3767         return true;
 3768       }
 3769     default:
 3770       return false;
 3771   }
 3772 }
 3773 
 3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3775   return false;
 3776 }
 3777 
 3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3780   switch (elem_bt) {
 3781     case T_BYTE:  return false;
 3782     case T_SHORT: return !VM_Version::supports_avx512bw();
 3783     case T_INT:   return !VM_Version::supports_avx();
 3784     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3785     default:
 3786       ShouldNotReachHere();
 3787       return false;
 3788   }
 3789 }
 3790 
 3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3792   // Prefer predicate if the mask type is "TypeVectMask".
 3793   return vt->isa_vectmask() != nullptr;
 3794 }
 3795 
 3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3797   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3798   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3799   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3800       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3801     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3802     return new legVecZOper();
 3803   }
 3804   if (legacy) {
 3805     switch (ideal_reg) {
 3806       case Op_VecS: return new legVecSOper();
 3807       case Op_VecD: return new legVecDOper();
 3808       case Op_VecX: return new legVecXOper();
 3809       case Op_VecY: return new legVecYOper();
 3810       case Op_VecZ: return new legVecZOper();
 3811     }
 3812   } else {
 3813     switch (ideal_reg) {
 3814       case Op_VecS: return new vecSOper();
 3815       case Op_VecD: return new vecDOper();
 3816       case Op_VecX: return new vecXOper();
 3817       case Op_VecY: return new vecYOper();
 3818       case Op_VecZ: return new vecZOper();
 3819     }
 3820   }
 3821   ShouldNotReachHere();
 3822   return nullptr;
 3823 }
 3824 
 3825 bool Matcher::is_reg2reg_move(MachNode* m) {
 3826   switch (m->rule()) {
 3827     case MoveVec2Leg_rule:
 3828     case MoveLeg2Vec_rule:
 3829     case MoveF2VL_rule:
 3830     case MoveF2LEG_rule:
 3831     case MoveVL2F_rule:
 3832     case MoveLEG2F_rule:
 3833     case MoveD2VL_rule:
 3834     case MoveD2LEG_rule:
 3835     case MoveVL2D_rule:
 3836     case MoveLEG2D_rule:
 3837       return true;
 3838     default:
 3839       return false;
 3840   }
 3841 }
 3842 
 3843 bool Matcher::is_generic_vector(MachOper* opnd) {
 3844   switch (opnd->opcode()) {
 3845     case VEC:
 3846     case LEGVEC:
 3847       return true;
 3848     default:
 3849       return false;
 3850   }
 3851 }
 3852 
 3853 //------------------------------------------------------------------------
 3854 
 3855 const RegMask* Matcher::predicate_reg_mask(void) {
 3856   return &_VECTMASK_REG_mask;
 3857 }
 3858 
 3859 // Max vector size in bytes. 0 if not supported.
 3860 int Matcher::vector_width_in_bytes(BasicType bt) {
 3861   assert(is_java_primitive(bt), "only primitive type vectors");
 3862   // SSE2 supports 128bit vectors for all types.
 3863   // AVX2 supports 256bit vectors for all types.
 3864   // AVX2/EVEX supports 512bit vectors for all types.
 3865   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3866   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3867   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3868     size = (UseAVX > 2) ? 64 : 32;
 3869   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3870     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3871   // Use flag to limit vector size.
 3872   size = MIN2(size,(int)MaxVectorSize);
 3873   // Minimum 2 values in vector (or 4 for bytes).
 3874   switch (bt) {
 3875   case T_DOUBLE:
 3876   case T_LONG:
 3877     if (size < 16) return 0;
 3878     break;
 3879   case T_FLOAT:
 3880   case T_INT:
 3881     if (size < 8) return 0;
 3882     break;
 3883   case T_BOOLEAN:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_CHAR:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_BYTE:
 3890     if (size < 4) return 0;
 3891     break;
 3892   case T_SHORT:
 3893     if (size < 4) return 0;
 3894     break;
 3895   default:
 3896     ShouldNotReachHere();
 3897   }
 3898   return size;
 3899 }
 3900 
 3901 // Limits on vector size (number of elements) loaded into vector.
 3902 int Matcher::max_vector_size(const BasicType bt) {
 3903   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3904 }
 3905 int Matcher::min_vector_size(const BasicType bt) {
 3906   int max_size = max_vector_size(bt);
 3907   // Min size which can be loaded into vector is 4 bytes.
 3908   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3909   // Support for calling svml double64 vectors
 3910   if (bt == T_DOUBLE) {
 3911     size = 1;
 3912   }
 3913   return MIN2(size,max_size);
 3914 }
 3915 
 3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3917   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3918   // by default on Cascade Lake
 3919   if (VM_Version::is_default_intel_cascade_lake()) {
 3920     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3921   }
 3922   return Matcher::max_vector_size(bt);
 3923 }
 3924 
 3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3926   return -1;
 3927 }
 3928 
 3929 // Vector ideal reg corresponding to specified size in bytes
 3930 uint Matcher::vector_ideal_reg(int size) {
 3931   assert(MaxVectorSize >= size, "");
 3932   switch(size) {
 3933     case  4: return Op_VecS;
 3934     case  8: return Op_VecD;
 3935     case 16: return Op_VecX;
 3936     case 32: return Op_VecY;
 3937     case 64: return Op_VecZ;
 3938   }
 3939   ShouldNotReachHere();
 3940   return 0;
 3941 }
 3942 
 3943 // Check for shift by small constant as well
 3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3945   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3946       shift->in(2)->get_int() <= 3 &&
 3947       // Are there other uses besides address expressions?
 3948       !matcher->is_visited(shift)) {
 3949     address_visited.set(shift->_idx); // Flag as address_visited
 3950     mstack.push(shift->in(2), Matcher::Visit);
 3951     Node *conv = shift->in(1);
 3952     // Allow Matcher to match the rule which bypass
 3953     // ConvI2L operation for an array index on LP64
 3954     // if the index value is positive.
 3955     if (conv->Opcode() == Op_ConvI2L &&
 3956         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3957         // Are there other uses besides address expressions?
 3958         !matcher->is_visited(conv)) {
 3959       address_visited.set(conv->_idx); // Flag as address_visited
 3960       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3961     } else {
 3962       mstack.push(conv, Matcher::Pre_Visit);
 3963     }
 3964     return true;
 3965   }
 3966   return false;
 3967 }
 3968 
 3969 // This function identifies sub-graphs in which a 'load' node is
 3970 // input to two different nodes, and such that it can be matched
 3971 // with BMI instructions like blsi, blsr, etc.
 3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3974 // refers to the same node.
 3975 //
 3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3977 // This is a temporary solution until we make DAGs expressible in ADL.
 3978 template<typename ConType>
 3979 class FusedPatternMatcher {
 3980   Node* _op1_node;
 3981   Node* _mop_node;
 3982   int _con_op;
 3983 
 3984   static int match_next(Node* n, int next_op, int next_op_idx) {
 3985     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3986       return -1;
 3987     }
 3988 
 3989     if (next_op_idx == -1) { // n is commutative, try rotations
 3990       if (n->in(1)->Opcode() == next_op) {
 3991         return 1;
 3992       } else if (n->in(2)->Opcode() == next_op) {
 3993         return 2;
 3994       }
 3995     } else {
 3996       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3997       if (n->in(next_op_idx)->Opcode() == next_op) {
 3998         return next_op_idx;
 3999       }
 4000     }
 4001     return -1;
 4002   }
 4003 
 4004  public:
 4005   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4006     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4007 
 4008   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4009              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4010              typename ConType::NativeType con_value) {
 4011     if (_op1_node->Opcode() != op1) {
 4012       return false;
 4013     }
 4014     if (_mop_node->outcnt() > 2) {
 4015       return false;
 4016     }
 4017     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4018     if (op1_op2_idx == -1) {
 4019       return false;
 4020     }
 4021     // Memory operation must be the other edge
 4022     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4023 
 4024     // Check that the mop node is really what we want
 4025     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4026       Node* op2_node = _op1_node->in(op1_op2_idx);
 4027       if (op2_node->outcnt() > 1) {
 4028         return false;
 4029       }
 4030       assert(op2_node->Opcode() == op2, "Should be");
 4031       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4032       if (op2_con_idx == -1) {
 4033         return false;
 4034       }
 4035       // Memory operation must be the other edge
 4036       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4037       // Check that the memory operation is the same node
 4038       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4039         // Now check the constant
 4040         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4041         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4042           return true;
 4043         }
 4044       }
 4045     }
 4046     return false;
 4047   }
 4048 };
 4049 
 4050 static bool is_bmi_pattern(Node* n, Node* m) {
 4051   assert(UseBMI1Instructions, "sanity");
 4052   if (n != nullptr && m != nullptr) {
 4053     if (m->Opcode() == Op_LoadI) {
 4054       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4055       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4056              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4057              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4058     } else if (m->Opcode() == Op_LoadL) {
 4059       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4060       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4061              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4062              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4063     }
 4064   }
 4065   return false;
 4066 }
 4067 
 4068 // Should the matcher clone input 'm' of node 'n'?
 4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4070   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4071   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4072     mstack.push(m, Visit);
 4073     return true;
 4074   }
 4075   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4076     mstack.push(m, Visit);           // m = ShiftCntV
 4077     return true;
 4078   }
 4079   if (is_encode_and_store_pattern(n, m)) {
 4080     mstack.push(m, Visit);
 4081     return true;
 4082   }
 4083   return false;
 4084 }
 4085 
 4086 // Should the Matcher clone shifts on addressing modes, expecting them
 4087 // to be subsumed into complex addressing expressions or compute them
 4088 // into registers?
 4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4090   Node *off = m->in(AddPNode::Offset);
 4091   if (off->is_Con()) {
 4092     address_visited.test_set(m->_idx); // Flag as address_visited
 4093     Node *adr = m->in(AddPNode::Address);
 4094 
 4095     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4096     // AtomicAdd is not an addressing expression.
 4097     // Cheap to find it by looking for screwy base.
 4098     if (adr->is_AddP() &&
 4099         !adr->in(AddPNode::Base)->is_top() &&
 4100         !adr->in(AddPNode::Offset)->is_Con() &&
 4101         off->get_long() == (int) (off->get_long()) && // immL32
 4102         // Are there other uses besides address expressions?
 4103         !is_visited(adr)) {
 4104       address_visited.set(adr->_idx); // Flag as address_visited
 4105       Node *shift = adr->in(AddPNode::Offset);
 4106       if (!clone_shift(shift, this, mstack, address_visited)) {
 4107         mstack.push(shift, Pre_Visit);
 4108       }
 4109       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4110       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4111     } else {
 4112       mstack.push(adr, Pre_Visit);
 4113     }
 4114 
 4115     // Clone X+offset as it also folds into most addressing expressions
 4116     mstack.push(off, Visit);
 4117     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4118     return true;
 4119   } else if (clone_shift(off, this, mstack, address_visited)) {
 4120     address_visited.test_set(m->_idx); // Flag as address_visited
 4121     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4122     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4123     return true;
 4124   }
 4125   return false;
 4126 }
 4127 
 4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4129   switch (bt) {
 4130     case BoolTest::eq:
 4131       return Assembler::eq;
 4132     case BoolTest::ne:
 4133       return Assembler::neq;
 4134     case BoolTest::le:
 4135     case BoolTest::ule:
 4136       return Assembler::le;
 4137     case BoolTest::ge:
 4138     case BoolTest::uge:
 4139       return Assembler::nlt;
 4140     case BoolTest::lt:
 4141     case BoolTest::ult:
 4142       return Assembler::lt;
 4143     case BoolTest::gt:
 4144     case BoolTest::ugt:
 4145       return Assembler::nle;
 4146     default : ShouldNotReachHere(); return Assembler::_false;
 4147   }
 4148 }
 4149 
 4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4151   switch (bt) {
 4152   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4153   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4154   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4155   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4156   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4157   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4158   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4159   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4160   }
 4161 }
 4162 
 4163 // Helper methods for MachSpillCopyNode::implementation().
 4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4165                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4166   assert(ireg == Op_VecS || // 32bit vector
 4167          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4168           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4169          "no non-adjacent vector moves" );
 4170   if (masm) {
 4171     switch (ireg) {
 4172     case Op_VecS: // copy whole register
 4173     case Op_VecD:
 4174     case Op_VecX:
 4175       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4176         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4177       } else {
 4178         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4179      }
 4180       break;
 4181     case Op_VecY:
 4182       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4183         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4184       } else {
 4185         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4186      }
 4187       break;
 4188     case Op_VecZ:
 4189       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4190       break;
 4191     default:
 4192       ShouldNotReachHere();
 4193     }
 4194 #ifndef PRODUCT
 4195   } else {
 4196     switch (ireg) {
 4197     case Op_VecS:
 4198     case Op_VecD:
 4199     case Op_VecX:
 4200       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4201       break;
 4202     case Op_VecY:
 4203     case Op_VecZ:
 4204       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4205       break;
 4206     default:
 4207       ShouldNotReachHere();
 4208     }
 4209 #endif
 4210   }
 4211 }
 4212 
 4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4214                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4215   if (masm) {
 4216     if (is_load) {
 4217       switch (ireg) {
 4218       case Op_VecS:
 4219         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecD:
 4222         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4223         break;
 4224       case Op_VecX:
 4225         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4226           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4227         } else {
 4228           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4229           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4230         }
 4231         break;
 4232       case Op_VecY:
 4233         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4234           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4235         } else {
 4236           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4237           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4238         }
 4239         break;
 4240       case Op_VecZ:
 4241         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4242         break;
 4243       default:
 4244         ShouldNotReachHere();
 4245       }
 4246     } else { // store
 4247       switch (ireg) {
 4248       case Op_VecS:
 4249         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecD:
 4252         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4253         break;
 4254       case Op_VecX:
 4255         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4256           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4257         }
 4258         else {
 4259           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4260         }
 4261         break;
 4262       case Op_VecY:
 4263         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4264           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4265         }
 4266         else {
 4267           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4268         }
 4269         break;
 4270       case Op_VecZ:
 4271         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4272         break;
 4273       default:
 4274         ShouldNotReachHere();
 4275       }
 4276     }
 4277 #ifndef PRODUCT
 4278   } else {
 4279     if (is_load) {
 4280       switch (ireg) {
 4281       case Op_VecS:
 4282         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284       case Op_VecD:
 4285         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287        case Op_VecX:
 4288         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4289         break;
 4290       case Op_VecY:
 4291       case Op_VecZ:
 4292         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4293         break;
 4294       default:
 4295         ShouldNotReachHere();
 4296       }
 4297     } else { // store
 4298       switch (ireg) {
 4299       case Op_VecS:
 4300         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302       case Op_VecD:
 4303         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305        case Op_VecX:
 4306         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4307         break;
 4308       case Op_VecY:
 4309       case Op_VecZ:
 4310         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4311         break;
 4312       default:
 4313         ShouldNotReachHere();
 4314       }
 4315     }
 4316 #endif
 4317   }
 4318 }
 4319 
 4320 template <class T>
 4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4322   int size = type2aelembytes(bt) * len;
 4323   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4324   for (int i = 0; i < len; i++) {
 4325     int offset = i * type2aelembytes(bt);
 4326     switch (bt) {
 4327       case T_BYTE: val->at(i) = con; break;
 4328       case T_SHORT: {
 4329         jshort c = con;
 4330         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4331         break;
 4332       }
 4333       case T_INT: {
 4334         jint c = con;
 4335         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4336         break;
 4337       }
 4338       case T_LONG: {
 4339         jlong c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4341         break;
 4342       }
 4343       case T_FLOAT: {
 4344         jfloat c = con;
 4345         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4346         break;
 4347       }
 4348       case T_DOUBLE: {
 4349         jdouble c = con;
 4350         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4351         break;
 4352       }
 4353       default: assert(false, "%s", type2name(bt));
 4354     }
 4355   }
 4356   return val;
 4357 }
 4358 
 4359 static inline jlong high_bit_set(BasicType bt) {
 4360   switch (bt) {
 4361     case T_BYTE:  return 0x8080808080808080;
 4362     case T_SHORT: return 0x8000800080008000;
 4363     case T_INT:   return 0x8000000080000000;
 4364     case T_LONG:  return 0x8000000000000000;
 4365     default:
 4366       ShouldNotReachHere();
 4367       return 0;
 4368   }
 4369 }
 4370 
 4371 #ifndef PRODUCT
 4372   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4373     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4374   }
 4375 #endif
 4376 
 4377   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4378     __ nop(_count);
 4379   }
 4380 
 4381   uint MachNopNode::size(PhaseRegAlloc*) const {
 4382     return _count;
 4383   }
 4384 
 4385 #ifndef PRODUCT
 4386   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4387     st->print("# breakpoint");
 4388   }
 4389 #endif
 4390 
 4391   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4392     __ int3();
 4393   }
 4394 
 4395   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4396     return MachNode::size(ra_);
 4397   }
 4398 
 4399 %}
 4400 
 4401 //----------ENCODING BLOCK-----------------------------------------------------
 4402 // This block specifies the encoding classes used by the compiler to
 4403 // output byte streams.  Encoding classes are parameterized macros
 4404 // used by Machine Instruction Nodes in order to generate the bit
 4405 // encoding of the instruction.  Operands specify their base encoding
 4406 // interface with the interface keyword.  There are currently
 4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4408 // COND_INTER.  REG_INTER causes an operand to generate a function
 4409 // which returns its register number when queried.  CONST_INTER causes
 4410 // an operand to generate a function which returns the value of the
 4411 // constant when queried.  MEMORY_INTER causes an operand to generate
 4412 // four functions which return the Base Register, the Index Register,
 4413 // the Scale Value, and the Offset Value of the operand when queried.
 4414 // COND_INTER causes an operand to generate six functions which return
 4415 // the encoding code (ie - encoding bits for the instruction)
 4416 // associated with each basic boolean condition for a conditional
 4417 // instruction.
 4418 //
 4419 // Instructions specify two basic values for encoding.  Again, a
 4420 // function is available to check if the constant displacement is an
 4421 // oop. They use the ins_encode keyword to specify their encoding
 4422 // classes (which must be a sequence of enc_class names, and their
 4423 // parameters, specified in the encoding block), and they use the
 4424 // opcode keyword to specify, in order, their primary, secondary, and
 4425 // tertiary opcode.  Only the opcode sections which a particular
 4426 // instruction needs for encoding need to be specified.
 4427 encode %{
 4428   enc_class cdql_enc(no_rax_rdx_RegI div)
 4429   %{
 4430     // Full implementation of Java idiv and irem; checks for
 4431     // special case as described in JVM spec., p.243 & p.271.
 4432     //
 4433     //         normal case                           special case
 4434     //
 4435     // input : rax: dividend                         min_int
 4436     //         reg: divisor                          -1
 4437     //
 4438     // output: rax: quotient  (= rax idiv reg)       min_int
 4439     //         rdx: remainder (= rax irem reg)       0
 4440     //
 4441     //  Code sequnce:
 4442     //
 4443     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4444     //    5:   75 07/08                jne    e <normal>
 4445     //    7:   33 d2                   xor    %edx,%edx
 4446     //  [div >= 8 -> offset + 1]
 4447     //  [REX_B]
 4448     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4449     //    c:   74 03/04                je     11 <done>
 4450     // 000000000000000e <normal>:
 4451     //    e:   99                      cltd
 4452     //  [div >= 8 -> offset + 1]
 4453     //  [REX_B]
 4454     //    f:   f7 f9                   idiv   $div
 4455     // 0000000000000011 <done>:
 4456     Label normal;
 4457     Label done;
 4458 
 4459     // cmp    $0x80000000,%eax
 4460     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4461 
 4462     // jne    e <normal>
 4463     __ jccb(Assembler::notEqual, normal);
 4464 
 4465     // xor    %edx,%edx
 4466     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4467 
 4468     // cmp    $0xffffffffffffffff,%ecx
 4469     __ cmpl($div$$Register, -1);
 4470 
 4471     // je     11 <done>
 4472     __ jccb(Assembler::equal, done);
 4473 
 4474     // <normal>
 4475     // cltd
 4476     __ bind(normal);
 4477     __ cdql();
 4478 
 4479     // idivl
 4480     // <done>
 4481     __ idivl($div$$Register);
 4482     __ bind(done);
 4483   %}
 4484 
 4485   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4486   %{
 4487     // Full implementation of Java ldiv and lrem; checks for
 4488     // special case as described in JVM spec., p.243 & p.271.
 4489     //
 4490     //         normal case                           special case
 4491     //
 4492     // input : rax: dividend                         min_long
 4493     //         reg: divisor                          -1
 4494     //
 4495     // output: rax: quotient  (= rax idiv reg)       min_long
 4496     //         rdx: remainder (= rax irem reg)       0
 4497     //
 4498     //  Code sequnce:
 4499     //
 4500     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4501     //    7:   00 00 80
 4502     //    a:   48 39 d0                cmp    %rdx,%rax
 4503     //    d:   75 08                   jne    17 <normal>
 4504     //    f:   33 d2                   xor    %edx,%edx
 4505     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4506     //   15:   74 05                   je     1c <done>
 4507     // 0000000000000017 <normal>:
 4508     //   17:   48 99                   cqto
 4509     //   19:   48 f7 f9                idiv   $div
 4510     // 000000000000001c <done>:
 4511     Label normal;
 4512     Label done;
 4513 
 4514     // mov    $0x8000000000000000,%rdx
 4515     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4516 
 4517     // cmp    %rdx,%rax
 4518     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4519 
 4520     // jne    17 <normal>
 4521     __ jccb(Assembler::notEqual, normal);
 4522 
 4523     // xor    %edx,%edx
 4524     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4525 
 4526     // cmp    $0xffffffffffffffff,$div
 4527     __ cmpq($div$$Register, -1);
 4528 
 4529     // je     1e <done>
 4530     __ jccb(Assembler::equal, done);
 4531 
 4532     // <normal>
 4533     // cqto
 4534     __ bind(normal);
 4535     __ cdqq();
 4536 
 4537     // idivq (note: must be emitted by the user of this rule)
 4538     // <done>
 4539     __ idivq($div$$Register);
 4540     __ bind(done);
 4541   %}
 4542 
 4543   enc_class clear_avx %{
 4544     DEBUG_ONLY(int off0 = __ offset());
 4545     if (generate_vzeroupper(Compile::current())) {
 4546       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4547       // Clear upper bits of YMM registers when current compiled code uses
 4548       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4549       __ vzeroupper();
 4550     }
 4551     DEBUG_ONLY(int off1 = __ offset());
 4552     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4553   %}
 4554 
 4555   enc_class Java_To_Runtime(method meth) %{
 4556     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4557     __ call(r10);
 4558     __ post_call_nop();
 4559   %}
 4560 
 4561   enc_class Java_Static_Call(method meth)
 4562   %{
 4563     // JAVA STATIC CALL
 4564     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4565     // determine who we intended to call.
 4566     if (!_method) {
 4567       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4568     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4569       // The NOP here is purely to ensure that eliding a call to
 4570       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4571       __ addr_nop_5();
 4572       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4573     } else {
 4574       int method_index = resolved_method_index(masm);
 4575       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4576                                                   : static_call_Relocation::spec(method_index);
 4577       address mark = __ pc();
 4578       int call_offset = __ offset();
 4579       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4580       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4581         // Calls of the same statically bound method can share
 4582         // a stub to the interpreter.
 4583         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4584       } else {
 4585         // Emit stubs for static call.
 4586         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4587         __ clear_inst_mark();
 4588         if (stub == nullptr) {
 4589           ciEnv::current()->record_failure("CodeCache is full");
 4590           return;
 4591         }
 4592       }
 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }
 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.
 4633 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4634 //        |        |   in   |  3   area for Intel ret address
 4635 //     Owned by    |preserve|      Empty on Sparc.
 4636 //       SELF      +--------+
 4637 //        |        |  pad2  |  2   pad to align old SP
 4638 //        |        +--------+  1
 4639 //        |        | locks  |  0
 4640 //        |        +--------+----> OptoReg::stack0(), even aligned
 4641 //        |        |  pad1  | 11   pad to align new SP
 4642 //        |        +--------+
 4643 //        |        |        | 10
 4644 //        |        | spills |  9   spills
 4645 //        V        |        |  8   (pad0 slot for callee)
 4646 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4647 //        ^        |  out   |  7
 4648 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4649 //     Owned by    +--------+
 4650 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4651 //        |    new |preserve|      Must be even-aligned.
 4652 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4653 //        |        |        |
 4654 //
 4655 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4656 //         known from SELF's arguments and the Java calling convention.
 4657 //         Region 6-7 is determined per call site.
 4658 // Note 2: If the calling convention leaves holes in the incoming argument
 4659 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4660 //         are owned by the CALLEE.  Holes should not be necessary in the
 4661 //         incoming area, as the Java calling convention is completely under
 4662 //         the control of the AD file.  Doubles can be sorted and packed to
 4663 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4664 //         varargs C calling conventions.
 4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4666 //         even aligned with pad0 as needed.
 4667 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4668 //         region 6-11 is even aligned; it may be padded out more so that
 4669 //         the region from SP to FP meets the minimum stack alignment.
 4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4671 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4672 //         SP meets the minimum alignment.
 4673 
 4674 frame
 4675 %{
 4676   // These three registers define part of the calling convention
 4677   // between compiled code and the interpreter.
 4678   inline_cache_reg(RAX);                // Inline Cache Register
 4679 
 4680   // Optional: name the operand used by cisc-spilling to access
 4681   // [stack_pointer + offset]
 4682   cisc_spilling_operand_name(indOffset32);
 4683 
 4684   // Number of stack slots consumed by locking an object
 4685   sync_stack_slots(2);
 4686 
 4687   // Compiled code's Frame Pointer
 4688   frame_pointer(RSP);
 4689 
 4690   // Stack alignment requirement
 4691   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4692 
 4693   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4694   // for calls to C.  Supports the var-args backing area for register parms.
 4695   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4696 
 4697   // The after-PROLOG location of the return address.  Location of
 4698   // return address specifies a type (REG or STACK) and a number
 4699   // representing the register number (i.e. - use a register name) or
 4700   // stack slot.
 4701   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4702   // Otherwise, it is above the locks and verification slot and alignment word
 4703   return_addr(STACK - 2 +
 4704               align_up((Compile::current()->in_preserve_stack_slots() +
 4705                         Compile::current()->fixed_slots()),
 4706                        stack_alignment_in_slots()));
 4707 
 4708   // Location of compiled Java return values.  Same as C for now.
 4709   return_value
 4710   %{
 4711     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4712            "only return normal values");
 4713 
 4714     static const int lo[Op_RegL + 1] = {
 4715       0,
 4716       0,
 4717       RAX_num,  // Op_RegN
 4718       RAX_num,  // Op_RegI
 4719       RAX_num,  // Op_RegP
 4720       XMM0_num, // Op_RegF
 4721       XMM0_num, // Op_RegD
 4722       RAX_num   // Op_RegL
 4723     };
 4724     static const int hi[Op_RegL + 1] = {
 4725       0,
 4726       0,
 4727       OptoReg::Bad, // Op_RegN
 4728       OptoReg::Bad, // Op_RegI
 4729       RAX_H_num,    // Op_RegP
 4730       OptoReg::Bad, // Op_RegF
 4731       XMM0b_num,    // Op_RegD
 4732       RAX_H_num     // Op_RegL
 4733     };
 4734     // Excluded flags and vector registers.
 4735     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4736     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4737   %}
 4738 %}
 4739 
 4740 //----------ATTRIBUTES---------------------------------------------------------
 4741 //----------Operand Attributes-------------------------------------------------
 4742 op_attrib op_cost(0);        // Required cost attribute
 4743 
 4744 //----------Instruction Attributes---------------------------------------------
 4745 ins_attrib ins_cost(100);       // Required cost attribute
 4746 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4748                                 // a non-matching short branch variant
 4749                                 // of some long branch?
 4750 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4751                                 // be a power of 2) specifies the
 4752                                 // alignment that some part of the
 4753                                 // instruction (not necessarily the
 4754                                 // start) requires.  If > 1, a
 4755                                 // compute_padding() function must be
 4756                                 // provided for the instruction
 4757 
 4758 // Whether this node is expanded during code emission into a sequence of
 4759 // instructions and the first instruction can perform an implicit null check.
 4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4761 
 4762 //----------OPERANDS-----------------------------------------------------------
 4763 // Operand definitions must precede instruction definitions for correct parsing
 4764 // in the ADLC because operands constitute user defined types which are used in
 4765 // instruction definitions.
 4766 
 4767 //----------Simple Operands----------------------------------------------------
 4768 // Immediate Operands
 4769 // Integer Immediate
 4770 operand immI()
 4771 %{
 4772   match(ConI);
 4773 
 4774   op_cost(10);
 4775   format %{ %}
 4776   interface(CONST_INTER);
 4777 %}
 4778 
 4779 // Constant for test vs zero
 4780 operand immI_0()
 4781 %{
 4782   predicate(n->get_int() == 0);
 4783   match(ConI);
 4784 
 4785   op_cost(0);
 4786   format %{ %}
 4787   interface(CONST_INTER);
 4788 %}
 4789 
 4790 // Constant for increment
 4791 operand immI_1()
 4792 %{
 4793   predicate(n->get_int() == 1);
 4794   match(ConI);
 4795 
 4796   op_cost(0);
 4797   format %{ %}
 4798   interface(CONST_INTER);
 4799 %}
 4800 
 4801 // Constant for decrement
 4802 operand immI_M1()
 4803 %{
 4804   predicate(n->get_int() == -1);
 4805   match(ConI);
 4806 
 4807   op_cost(0);
 4808   format %{ %}
 4809   interface(CONST_INTER);
 4810 %}
 4811 
 4812 operand immI_2()
 4813 %{
 4814   predicate(n->get_int() == 2);
 4815   match(ConI);
 4816 
 4817   op_cost(0);
 4818   format %{ %}
 4819   interface(CONST_INTER);
 4820 %}
 4821 
 4822 operand immI_4()
 4823 %{
 4824   predicate(n->get_int() == 4);
 4825   match(ConI);
 4826 
 4827   op_cost(0);
 4828   format %{ %}
 4829   interface(CONST_INTER);
 4830 %}
 4831 
 4832 operand immI_8()
 4833 %{
 4834   predicate(n->get_int() == 8);
 4835   match(ConI);
 4836 
 4837   op_cost(0);
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 // Valid scale values for addressing modes
 4843 operand immI2()
 4844 %{
 4845   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4846   match(ConI);
 4847 
 4848   format %{ %}
 4849   interface(CONST_INTER);
 4850 %}
 4851 
 4852 operand immU7()
 4853 %{
 4854   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4855   match(ConI);
 4856 
 4857   op_cost(5);
 4858   format %{ %}
 4859   interface(CONST_INTER);
 4860 %}
 4861 
 4862 operand immI8()
 4863 %{
 4864   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4865   match(ConI);
 4866 
 4867   op_cost(5);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 operand immU8()
 4873 %{
 4874   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4875   match(ConI);
 4876 
 4877   op_cost(5);
 4878   format %{ %}
 4879   interface(CONST_INTER);
 4880 %}
 4881 
 4882 operand immI16()
 4883 %{
 4884   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4885   match(ConI);
 4886 
 4887   op_cost(10);
 4888   format %{ %}
 4889   interface(CONST_INTER);
 4890 %}
 4891 
 4892 // Int Immediate non-negative
 4893 operand immU31()
 4894 %{
 4895   predicate(n->get_int() >= 0);
 4896   match(ConI);
 4897 
 4898   op_cost(0);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 // Pointer Immediate
 4904 operand immP()
 4905 %{
 4906   match(ConP);
 4907 
 4908   op_cost(10);
 4909   format %{ %}
 4910   interface(CONST_INTER);
 4911 %}
 4912 
 4913 // Null Pointer Immediate
 4914 operand immP0()
 4915 %{
 4916   predicate(n->get_ptr() == 0);
 4917   match(ConP);
 4918 
 4919   op_cost(5);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 // Pointer Immediate
 4925 operand immN() %{
 4926   match(ConN);
 4927 
 4928   op_cost(10);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 operand immNKlass() %{
 4934   match(ConNKlass);
 4935 
 4936   op_cost(10);
 4937   format %{ %}
 4938   interface(CONST_INTER);
 4939 %}
 4940 
 4941 // Null Pointer Immediate
 4942 operand immN0() %{
 4943   predicate(n->get_narrowcon() == 0);
 4944   match(ConN);
 4945 
 4946   op_cost(5);
 4947   format %{ %}
 4948   interface(CONST_INTER);
 4949 %}
 4950 
 4951 operand immP31()
 4952 %{
 4953   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4954             && (n->get_ptr() >> 31) == 0);
 4955   match(ConP);
 4956 
 4957   op_cost(5);
 4958   format %{ %}
 4959   interface(CONST_INTER);
 4960 %}
 4961 
 4962 
 4963 // Long Immediate
 4964 operand immL()
 4965 %{
 4966   match(ConL);
 4967 
 4968   op_cost(20);
 4969   format %{ %}
 4970   interface(CONST_INTER);
 4971 %}
 4972 
 4973 // Long Immediate 8-bit
 4974 operand immL8()
 4975 %{
 4976   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4977   match(ConL);
 4978 
 4979   op_cost(5);
 4980   format %{ %}
 4981   interface(CONST_INTER);
 4982 %}
 4983 
 4984 // Long Immediate 32-bit unsigned
 4985 operand immUL32()
 4986 %{
 4987   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4988   match(ConL);
 4989 
 4990   op_cost(10);
 4991   format %{ %}
 4992   interface(CONST_INTER);
 4993 %}
 4994 
 4995 // Long Immediate 32-bit signed
 4996 operand immL32()
 4997 %{
 4998   predicate(n->get_long() == (int) (n->get_long()));
 4999   match(ConL);
 5000 
 5001   op_cost(15);
 5002   format %{ %}
 5003   interface(CONST_INTER);
 5004 %}
 5005 
 5006 operand immL_Pow2()
 5007 %{
 5008   predicate(is_power_of_2((julong)n->get_long()));
 5009   match(ConL);
 5010 
 5011   op_cost(15);
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 operand immL_NotPow2()
 5017 %{
 5018   predicate(is_power_of_2((julong)~n->get_long()));
 5019   match(ConL);
 5020 
 5021   op_cost(15);
 5022   format %{ %}
 5023   interface(CONST_INTER);
 5024 %}
 5025 
 5026 // Long Immediate zero
 5027 operand immL0()
 5028 %{
 5029   predicate(n->get_long() == 0L);
 5030   match(ConL);
 5031 
 5032   op_cost(10);
 5033   format %{ %}
 5034   interface(CONST_INTER);
 5035 %}
 5036 
 5037 // Constant for increment
 5038 operand immL1()
 5039 %{
 5040   predicate(n->get_long() == 1);
 5041   match(ConL);
 5042 
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 // Constant for decrement
 5048 operand immL_M1()
 5049 %{
 5050   predicate(n->get_long() == -1);
 5051   match(ConL);
 5052 
 5053   format %{ %}
 5054   interface(CONST_INTER);
 5055 %}
 5056 
 5057 // Long Immediate: low 32-bit mask
 5058 operand immL_32bits()
 5059 %{
 5060   predicate(n->get_long() == 0xFFFFFFFFL);
 5061   match(ConL);
 5062   op_cost(20);
 5063 
 5064   format %{ %}
 5065   interface(CONST_INTER);
 5066 %}
 5067 
 5068 // Int Immediate: 2^n-1, positive
 5069 operand immI_Pow2M1()
 5070 %{
 5071   predicate((n->get_int() > 0)
 5072             && is_power_of_2((juint)n->get_int() + 1));
 5073   match(ConI);
 5074 
 5075   op_cost(20);
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Float Immediate zero
 5081 operand immF0()
 5082 %{
 5083   predicate(jint_cast(n->getf()) == 0);
 5084   match(ConF);
 5085 
 5086   op_cost(5);
 5087   format %{ %}
 5088   interface(CONST_INTER);
 5089 %}
 5090 
 5091 // Float Immediate
 5092 operand immF()
 5093 %{
 5094   match(ConF);
 5095 
 5096   op_cost(15);
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Half Float Immediate
 5102 operand immH()
 5103 %{
 5104   match(ConH);
 5105 
 5106   op_cost(15);
 5107   format %{ %}
 5108   interface(CONST_INTER);
 5109 %}
 5110 
 5111 // Double Immediate zero
 5112 operand immD0()
 5113 %{
 5114   predicate(jlong_cast(n->getd()) == 0);
 5115   match(ConD);
 5116 
 5117   op_cost(5);
 5118   format %{ %}
 5119   interface(CONST_INTER);
 5120 %}
 5121 
 5122 // Double Immediate
 5123 operand immD()
 5124 %{
 5125   match(ConD);
 5126 
 5127   op_cost(15);
 5128   format %{ %}
 5129   interface(CONST_INTER);
 5130 %}
 5131 
 5132 // Immediates for special shifts (sign extend)
 5133 
 5134 // Constants for increment
 5135 operand immI_16()
 5136 %{
 5137   predicate(n->get_int() == 16);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 operand immI_24()
 5145 %{
 5146   predicate(n->get_int() == 24);
 5147   match(ConI);
 5148 
 5149   format %{ %}
 5150   interface(CONST_INTER);
 5151 %}
 5152 
 5153 // Constant for byte-wide masking
 5154 operand immI_255()
 5155 %{
 5156   predicate(n->get_int() == 255);
 5157   match(ConI);
 5158 
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Constant for short-wide masking
 5164 operand immI_65535()
 5165 %{
 5166   predicate(n->get_int() == 65535);
 5167   match(ConI);
 5168 
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Constant for byte-wide masking
 5174 operand immL_255()
 5175 %{
 5176   predicate(n->get_long() == 255);
 5177   match(ConL);
 5178 
 5179   format %{ %}
 5180   interface(CONST_INTER);
 5181 %}
 5182 
 5183 // Constant for short-wide masking
 5184 operand immL_65535()
 5185 %{
 5186   predicate(n->get_long() == 65535);
 5187   match(ConL);
 5188 
 5189   format %{ %}
 5190   interface(CONST_INTER);
 5191 %}
 5192 
 5193 // AOT Runtime Constants Address
 5194 operand immAOTRuntimeConstantsAddress()
 5195 %{
 5196   // Check if the address is in the range of AOT Runtime Constants
 5197   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5198   match(ConP);
 5199 
 5200   op_cost(0);
 5201   format %{ %}
 5202   interface(CONST_INTER);
 5203 %}
 5204 
 5205 operand kReg()
 5206 %{
 5207   constraint(ALLOC_IN_RC(vectmask_reg));
 5208   match(RegVectMask);
 5209   format %{%}
 5210   interface(REG_INTER);
 5211 %}
 5212 
 5213 // Register Operands
 5214 // Integer Register
 5215 operand rRegI()
 5216 %{
 5217   constraint(ALLOC_IN_RC(int_reg));
 5218   match(RegI);
 5219 
 5220   match(rax_RegI);
 5221   match(rbx_RegI);
 5222   match(rcx_RegI);
 5223   match(rdx_RegI);
 5224   match(rdi_RegI);
 5225 
 5226   format %{ %}
 5227   interface(REG_INTER);
 5228 %}
 5229 
 5230 // Special Registers
 5231 operand rax_RegI()
 5232 %{
 5233   constraint(ALLOC_IN_RC(int_rax_reg));
 5234   match(RegI);
 5235   match(rRegI);
 5236 
 5237   format %{ "RAX" %}
 5238   interface(REG_INTER);
 5239 %}
 5240 
 5241 // Special Registers
 5242 operand rbx_RegI()
 5243 %{
 5244   constraint(ALLOC_IN_RC(int_rbx_reg));
 5245   match(RegI);
 5246   match(rRegI);
 5247 
 5248   format %{ "RBX" %}
 5249   interface(REG_INTER);
 5250 %}
 5251 
 5252 operand rcx_RegI()
 5253 %{
 5254   constraint(ALLOC_IN_RC(int_rcx_reg));
 5255   match(RegI);
 5256   match(rRegI);
 5257 
 5258   format %{ "RCX" %}
 5259   interface(REG_INTER);
 5260 %}
 5261 
 5262 operand rdx_RegI()
 5263 %{
 5264   constraint(ALLOC_IN_RC(int_rdx_reg));
 5265   match(RegI);
 5266   match(rRegI);
 5267 
 5268   format %{ "RDX" %}
 5269   interface(REG_INTER);
 5270 %}
 5271 
 5272 operand rdi_RegI()
 5273 %{
 5274   constraint(ALLOC_IN_RC(int_rdi_reg));
 5275   match(RegI);
 5276   match(rRegI);
 5277 
 5278   format %{ "RDI" %}
 5279   interface(REG_INTER);
 5280 %}
 5281 
 5282 operand no_rax_rdx_RegI()
 5283 %{
 5284   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5285   match(RegI);
 5286   match(rbx_RegI);
 5287   match(rcx_RegI);
 5288   match(rdi_RegI);
 5289 
 5290   format %{ %}
 5291   interface(REG_INTER);
 5292 %}
 5293 
 5294 operand no_rbp_r13_RegI()
 5295 %{
 5296   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5297   match(RegI);
 5298   match(rRegI);
 5299   match(rax_RegI);
 5300   match(rbx_RegI);
 5301   match(rcx_RegI);
 5302   match(rdx_RegI);
 5303   match(rdi_RegI);
 5304 
 5305   format %{ %}
 5306   interface(REG_INTER);
 5307 %}
 5308 
 5309 // Pointer Register
 5310 operand any_RegP()
 5311 %{
 5312   constraint(ALLOC_IN_RC(any_reg));
 5313   match(RegP);
 5314   match(rax_RegP);
 5315   match(rbx_RegP);
 5316   match(rdi_RegP);
 5317   match(rsi_RegP);
 5318   match(rbp_RegP);
 5319   match(r15_RegP);
 5320   match(rRegP);
 5321 
 5322   format %{ %}
 5323   interface(REG_INTER);
 5324 %}
 5325 
 5326 operand rRegP()
 5327 %{
 5328   constraint(ALLOC_IN_RC(ptr_reg));
 5329   match(RegP);
 5330   match(rax_RegP);
 5331   match(rbx_RegP);
 5332   match(rdi_RegP);
 5333   match(rsi_RegP);
 5334   match(rbp_RegP);  // See Q&A below about
 5335   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5336 
 5337   format %{ %}
 5338   interface(REG_INTER);
 5339 %}
 5340 
 5341 operand rRegN() %{
 5342   constraint(ALLOC_IN_RC(int_reg));
 5343   match(RegN);
 5344 
 5345   format %{ %}
 5346   interface(REG_INTER);
 5347 %}
 5348 
 5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5352 // The output of an instruction is controlled by the allocator, which respects
 5353 // register class masks, not match rules.  Unless an instruction mentions
 5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5355 // by the allocator as an input.
 5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5358 // result, RBP is not included in the output of the instruction either.
 5359 
 5360 // This operand is not allowed to use RBP even if
 5361 // RBP is not used to hold the frame pointer.
 5362 operand no_rbp_RegP()
 5363 %{
 5364   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5365   match(RegP);
 5366   match(rbx_RegP);
 5367   match(rsi_RegP);
 5368   match(rdi_RegP);
 5369 
 5370   format %{ %}
 5371   interface(REG_INTER);
 5372 %}
 5373 
 5374 // Special Registers
 5375 // Return a pointer value
 5376 operand rax_RegP()
 5377 %{
 5378   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5379   match(RegP);
 5380   match(rRegP);
 5381 
 5382   format %{ %}
 5383   interface(REG_INTER);
 5384 %}
 5385 
 5386 // Special Registers
 5387 // Return a compressed pointer value
 5388 operand rax_RegN()
 5389 %{
 5390   constraint(ALLOC_IN_RC(int_rax_reg));
 5391   match(RegN);
 5392   match(rRegN);
 5393 
 5394   format %{ %}
 5395   interface(REG_INTER);
 5396 %}
 5397 
 5398 // Used in AtomicAdd
 5399 operand rbx_RegP()
 5400 %{
 5401   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5402   match(RegP);
 5403   match(rRegP);
 5404 
 5405   format %{ %}
 5406   interface(REG_INTER);
 5407 %}
 5408 
 5409 operand rsi_RegP()
 5410 %{
 5411   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5412   match(RegP);
 5413   match(rRegP);
 5414 
 5415   format %{ %}
 5416   interface(REG_INTER);
 5417 %}
 5418 
 5419 operand rbp_RegP()
 5420 %{
 5421   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5422   match(RegP);
 5423   match(rRegP);
 5424 
 5425   format %{ %}
 5426   interface(REG_INTER);
 5427 %}
 5428 
 5429 // Used in rep stosq
 5430 operand rdi_RegP()
 5431 %{
 5432   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5433   match(RegP);
 5434   match(rRegP);
 5435 
 5436   format %{ %}
 5437   interface(REG_INTER);
 5438 %}
 5439 
 5440 operand r15_RegP()
 5441 %{
 5442   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5443   match(RegP);
 5444   match(rRegP);
 5445 
 5446   format %{ %}
 5447   interface(REG_INTER);
 5448 %}
 5449 
 5450 operand rRegL()
 5451 %{
 5452   constraint(ALLOC_IN_RC(long_reg));
 5453   match(RegL);
 5454   match(rax_RegL);
 5455   match(rdx_RegL);
 5456 
 5457   format %{ %}
 5458   interface(REG_INTER);
 5459 %}
 5460 
 5461 // Special Registers
 5462 operand no_rax_rdx_RegL()
 5463 %{
 5464   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5465   match(RegL);
 5466   match(rRegL);
 5467 
 5468   format %{ %}
 5469   interface(REG_INTER);
 5470 %}
 5471 
 5472 operand rax_RegL()
 5473 %{
 5474   constraint(ALLOC_IN_RC(long_rax_reg));
 5475   match(RegL);
 5476   match(rRegL);
 5477 
 5478   format %{ "RAX" %}
 5479   interface(REG_INTER);
 5480 %}
 5481 
 5482 operand rcx_RegL()
 5483 %{
 5484   constraint(ALLOC_IN_RC(long_rcx_reg));
 5485   match(RegL);
 5486   match(rRegL);
 5487 
 5488   format %{ %}
 5489   interface(REG_INTER);
 5490 %}
 5491 
 5492 operand rdx_RegL()
 5493 %{
 5494   constraint(ALLOC_IN_RC(long_rdx_reg));
 5495   match(RegL);
 5496   match(rRegL);
 5497 
 5498   format %{ %}
 5499   interface(REG_INTER);
 5500 %}
 5501 
 5502 operand r11_RegL()
 5503 %{
 5504   constraint(ALLOC_IN_RC(long_r11_reg));
 5505   match(RegL);
 5506   match(rRegL);
 5507 
 5508   format %{ %}
 5509   interface(REG_INTER);
 5510 %}
 5511 
 5512 operand no_rbp_r13_RegL()
 5513 %{
 5514   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5515   match(RegL);
 5516   match(rRegL);
 5517   match(rax_RegL);
 5518   match(rcx_RegL);
 5519   match(rdx_RegL);
 5520 
 5521   format %{ %}
 5522   interface(REG_INTER);
 5523 %}
 5524 
 5525 // Flags register, used as output of compare instructions
 5526 operand rFlagsReg()
 5527 %{
 5528   constraint(ALLOC_IN_RC(int_flags));
 5529   match(RegFlags);
 5530 
 5531   format %{ "RFLAGS" %}
 5532   interface(REG_INTER);
 5533 %}
 5534 
 5535 // Flags register, used as output of FLOATING POINT compare instructions
 5536 operand rFlagsRegU()
 5537 %{
 5538   constraint(ALLOC_IN_RC(int_flags));
 5539   match(RegFlags);
 5540 
 5541   format %{ "RFLAGS_U" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCF() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CF" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 operand rFlagsRegUCFE() %{
 5555   constraint(ALLOC_IN_RC(int_flags));
 5556   match(RegFlags);
 5557   predicate(UseAPX && VM_Version::supports_avx10_2());
 5558 
 5559   format %{ "RFLAGS_U_CFE" %}
 5560   interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand regF() %{
 5565    constraint(ALLOC_IN_RC(float_reg));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand legRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_legacy));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Float register operands
 5582 operand vlRegF() %{
 5583    constraint(ALLOC_IN_RC(float_reg_vl));
 5584    match(RegF);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand regD() %{
 5592    constraint(ALLOC_IN_RC(double_reg));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand legRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_legacy));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 // Double register operands
 5609 operand vlRegD() %{
 5610    constraint(ALLOC_IN_RC(double_reg_vl));
 5611    match(RegD);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 //----------Memory Operands----------------------------------------------------
 5618 // Direct Memory Operand
 5619 // operand direct(immP addr)
 5620 // %{
 5621 //   match(addr);
 5622 
 5623 //   format %{ "[$addr]" %}
 5624 //   interface(MEMORY_INTER) %{
 5625 //     base(0xFFFFFFFF);
 5626 //     index(0x4);
 5627 //     scale(0x0);
 5628 //     disp($addr);
 5629 //   %}
 5630 // %}
 5631 
 5632 // Indirect Memory Operand
 5633 operand indirect(any_RegP reg)
 5634 %{
 5635   constraint(ALLOC_IN_RC(ptr_reg));
 5636   match(reg);
 5637 
 5638   format %{ "[$reg]" %}
 5639   interface(MEMORY_INTER) %{
 5640     base($reg);
 5641     index(0x4);
 5642     scale(0x0);
 5643     disp(0x0);
 5644   %}
 5645 %}
 5646 
 5647 // Indirect Memory Plus Short Offset Operand
 5648 operand indOffset8(any_RegP reg, immL8 off)
 5649 %{
 5650   constraint(ALLOC_IN_RC(ptr_reg));
 5651   match(AddP reg off);
 5652 
 5653   format %{ "[$reg + $off (8-bit)]" %}
 5654   interface(MEMORY_INTER) %{
 5655     base($reg);
 5656     index(0x4);
 5657     scale(0x0);
 5658     disp($off);
 5659   %}
 5660 %}
 5661 
 5662 // Indirect Memory Plus Long Offset Operand
 5663 operand indOffset32(any_RegP reg, immL32 off)
 5664 %{
 5665   constraint(ALLOC_IN_RC(ptr_reg));
 5666   match(AddP reg off);
 5667 
 5668   format %{ "[$reg + $off (32-bit)]" %}
 5669   interface(MEMORY_INTER) %{
 5670     base($reg);
 5671     index(0x4);
 5672     scale(0x0);
 5673     disp($off);
 5674   %}
 5675 %}
 5676 
 5677 // Indirect Memory Plus Index Register Plus Offset Operand
 5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   match(AddP (AddP reg lreg) off);
 5682 
 5683   op_cost(10);
 5684   format %{"[$reg + $off + $lreg]" %}
 5685   interface(MEMORY_INTER) %{
 5686     base($reg);
 5687     index($lreg);
 5688     scale(0x0);
 5689     disp($off);
 5690   %}
 5691 %}
 5692 
 5693 // Indirect Memory Plus Index Register Plus Offset Operand
 5694 operand indIndex(any_RegP reg, rRegL lreg)
 5695 %{
 5696   constraint(ALLOC_IN_RC(ptr_reg));
 5697   match(AddP reg lreg);
 5698 
 5699   op_cost(10);
 5700   format %{"[$reg + $lreg]" %}
 5701   interface(MEMORY_INTER) %{
 5702     base($reg);
 5703     index($lreg);
 5704     scale(0x0);
 5705     disp(0x0);
 5706   %}
 5707 %}
 5708 
 5709 // Indirect Memory Times Scale Plus Index Register
 5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5711 %{
 5712   constraint(ALLOC_IN_RC(ptr_reg));
 5713   match(AddP reg (LShiftL lreg scale));
 5714 
 5715   op_cost(10);
 5716   format %{"[$reg + $lreg << $scale]" %}
 5717   interface(MEMORY_INTER) %{
 5718     base($reg);
 5719     index($lreg);
 5720     scale($scale);
 5721     disp(0x0);
 5722   %}
 5723 %}
 5724 
 5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5726 %{
 5727   constraint(ALLOC_IN_RC(ptr_reg));
 5728   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5729   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5730 
 5731   op_cost(10);
 5732   format %{"[$reg + pos $idx << $scale]" %}
 5733   interface(MEMORY_INTER) %{
 5734     base($reg);
 5735     index($idx);
 5736     scale($scale);
 5737     disp(0x0);
 5738   %}
 5739 %}
 5740 
 5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5743 %{
 5744   constraint(ALLOC_IN_RC(ptr_reg));
 5745   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5746 
 5747   op_cost(10);
 5748   format %{"[$reg + $off + $lreg << $scale]" %}
 5749   interface(MEMORY_INTER) %{
 5750     base($reg);
 5751     index($lreg);
 5752     scale($scale);
 5753     disp($off);
 5754   %}
 5755 %}
 5756 
 5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5759 %{
 5760   constraint(ALLOC_IN_RC(ptr_reg));
 5761   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5762   match(AddP (AddP reg (ConvI2L idx)) off);
 5763 
 5764   op_cost(10);
 5765   format %{"[$reg + $off + $idx]" %}
 5766   interface(MEMORY_INTER) %{
 5767     base($reg);
 5768     index($idx);
 5769     scale(0x0);
 5770     disp($off);
 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)
 5811 %{
 5812   predicate(CompressedOops::shift() == 0);
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   match(DecodeN reg);
 5815 
 5816   format %{ "[$reg]" %}
 5817   interface(MEMORY_INTER) %{
 5818     base($reg);
 5819     index(0x4);
 5820     scale(0x0);
 5821     disp(0x0);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Memory Plus Short Offset Operand
 5826 operand indOffset8Narrow(rRegN reg, immL8 off)
 5827 %{
 5828   predicate(CompressedOops::shift() == 0);
 5829   constraint(ALLOC_IN_RC(ptr_reg));
 5830   match(AddP (DecodeN reg) off);
 5831 
 5832   format %{ "[$reg + $off (8-bit)]" %}
 5833   interface(MEMORY_INTER) %{
 5834     base($reg);
 5835     index(0x4);
 5836     scale(0x0);
 5837     disp($off);
 5838   %}
 5839 %}
 5840 
 5841 // Indirect Memory Plus Long Offset Operand
 5842 operand indOffset32Narrow(rRegN reg, immL32 off)
 5843 %{
 5844   predicate(CompressedOops::shift() == 0);
 5845   constraint(ALLOC_IN_RC(ptr_reg));
 5846   match(AddP (DecodeN reg) off);
 5847 
 5848   format %{ "[$reg + $off (32-bit)]" %}
 5849   interface(MEMORY_INTER) %{
 5850     base($reg);
 5851     index(0x4);
 5852     scale(0x0);
 5853     disp($off);
 5854   %}
 5855 %}
 5856 
 5857 // Indirect Memory Plus Index Register Plus Offset Operand
 5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5859 %{
 5860   predicate(CompressedOops::shift() == 0);
 5861   constraint(ALLOC_IN_RC(ptr_reg));
 5862   match(AddP (AddP (DecodeN reg) lreg) off);
 5863 
 5864   op_cost(10);
 5865   format %{"[$reg + $off + $lreg]" %}
 5866   interface(MEMORY_INTER) %{
 5867     base($reg);
 5868     index($lreg);
 5869     scale(0x0);
 5870     disp($off);
 5871   %}
 5872 %}
 5873 
 5874 // Indirect Memory Plus Index Register Plus Offset Operand
 5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5876 %{
 5877   predicate(CompressedOops::shift() == 0);
 5878   constraint(ALLOC_IN_RC(ptr_reg));
 5879   match(AddP (DecodeN reg) lreg);
 5880 
 5881   op_cost(10);
 5882   format %{"[$reg + $lreg]" %}
 5883   interface(MEMORY_INTER) %{
 5884     base($reg);
 5885     index($lreg);
 5886     scale(0x0);
 5887     disp(0x0);
 5888   %}
 5889 %}
 5890 
 5891 // Indirect Memory Times Scale Plus Index Register
 5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5893 %{
 5894   predicate(CompressedOops::shift() == 0);
 5895   constraint(ALLOC_IN_RC(ptr_reg));
 5896   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5897 
 5898   op_cost(10);
 5899   format %{"[$reg + $lreg << $scale]" %}
 5900   interface(MEMORY_INTER) %{
 5901     base($reg);
 5902     index($lreg);
 5903     scale($scale);
 5904     disp(0x0);
 5905   %}
 5906 %}
 5907 
 5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5910 %{
 5911   predicate(CompressedOops::shift() == 0);
 5912   constraint(ALLOC_IN_RC(ptr_reg));
 5913   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5914 
 5915   op_cost(10);
 5916   format %{"[$reg + $off + $lreg << $scale]" %}
 5917   interface(MEMORY_INTER) %{
 5918     base($reg);
 5919     index($lreg);
 5920     scale($scale);
 5921     disp($off);
 5922   %}
 5923 %}
 5924 
 5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5927 %{
 5928   constraint(ALLOC_IN_RC(ptr_reg));
 5929   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5930   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5931 
 5932   op_cost(10);
 5933   format %{"[$reg + $off + $idx]" %}
 5934   interface(MEMORY_INTER) %{
 5935     base($reg);
 5936     index($idx);
 5937     scale(0x0);
 5938     disp($off);
 5939   %}
 5940 %}
 5941 
 5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5944 %{
 5945   constraint(ALLOC_IN_RC(ptr_reg));
 5946   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5947   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5948 
 5949   op_cost(10);
 5950   format %{"[$reg + $off + $idx << $scale]" %}
 5951   interface(MEMORY_INTER) %{
 5952     base($reg);
 5953     index($idx);
 5954     scale($scale);
 5955     disp($off);
 5956   %}
 5957 %}
 5958 
 5959 //----------Special Memory Operands--------------------------------------------
 5960 // Stack Slot Operand - This operand is used for loading and storing temporary
 5961 //                      values on the stack where a match requires a value to
 5962 //                      flow through memory.
 5963 operand stackSlotP(sRegP reg)
 5964 %{
 5965   constraint(ALLOC_IN_RC(stack_slots));
 5966   // No match rule because this operand is only generated in matching
 5967 
 5968   format %{ "[$reg]" %}
 5969   interface(MEMORY_INTER) %{
 5970     base(0x4);   // RSP
 5971     index(0x4);  // No Index
 5972     scale(0x0);  // No Scale
 5973     disp($reg);  // Stack Offset
 5974   %}
 5975 %}
 5976 
 5977 operand stackSlotI(sRegI reg)
 5978 %{
 5979   constraint(ALLOC_IN_RC(stack_slots));
 5980   // No match rule because this operand is only generated in matching
 5981 
 5982   format %{ "[$reg]" %}
 5983   interface(MEMORY_INTER) %{
 5984     base(0x4);   // RSP
 5985     index(0x4);  // No Index
 5986     scale(0x0);  // No Scale
 5987     disp($reg);  // Stack Offset
 5988   %}
 5989 %}
 5990 
 5991 operand stackSlotF(sRegF reg)
 5992 %{
 5993   constraint(ALLOC_IN_RC(stack_slots));
 5994   // No match rule because this operand is only generated in matching
 5995 
 5996   format %{ "[$reg]" %}
 5997   interface(MEMORY_INTER) %{
 5998     base(0x4);   // RSP
 5999     index(0x4);  // No Index
 6000     scale(0x0);  // No Scale
 6001     disp($reg);  // Stack Offset
 6002   %}
 6003 %}
 6004 
 6005 operand stackSlotD(sRegD reg)
 6006 %{
 6007   constraint(ALLOC_IN_RC(stack_slots));
 6008   // No match rule because this operand is only generated in matching
 6009 
 6010   format %{ "[$reg]" %}
 6011   interface(MEMORY_INTER) %{
 6012     base(0x4);   // RSP
 6013     index(0x4);  // No Index
 6014     scale(0x0);  // No Scale
 6015     disp($reg);  // Stack Offset
 6016   %}
 6017 %}
 6018 operand stackSlotL(sRegL reg)
 6019 %{
 6020   constraint(ALLOC_IN_RC(stack_slots));
 6021   // No match rule because this operand is only generated in matching
 6022 
 6023   format %{ "[$reg]" %}
 6024   interface(MEMORY_INTER) %{
 6025     base(0x4);   // RSP
 6026     index(0x4);  // No Index
 6027     scale(0x0);  // No Scale
 6028     disp($reg);  // Stack Offset
 6029   %}
 6030 %}
 6031 
 6032 //----------Conditional Branch Operands----------------------------------------
 6033 // Comparison Op  - This is the operation of the comparison, and is limited to
 6034 //                  the following set of codes:
 6035 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6036 //
 6037 // Other attributes of the comparison, such as unsignedness, are specified
 6038 // by the comparison instruction that sets a condition code flags register.
 6039 // That result is represented by a flags operand whose subtype is appropriate
 6040 // to the unsignedness (etc.) of the comparison.
 6041 //
 6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6045 
 6046 // Comparison Code
 6047 operand cmpOp()
 6048 %{
 6049   match(Bool);
 6050 
 6051   format %{ "" %}
 6052   interface(COND_INTER) %{
 6053     equal(0x4, "e");
 6054     not_equal(0x5, "ne");
 6055     less(0xc, "l");
 6056     greater_equal(0xd, "ge");
 6057     less_equal(0xe, "le");
 6058     greater(0xf, "g");
 6059     overflow(0x0, "o");
 6060     no_overflow(0x1, "no");
 6061   %}
 6062 %}
 6063 
 6064 // Comparison Code, unsigned compare.  Used by FP also, with
 6065 // C2 (unordered) turned into GT or LT already.  The other bits
 6066 // C0 and C3 are turned into Carry & Zero flags.
 6067 operand cmpOpU()
 6068 %{
 6069   match(Bool);
 6070 
 6071   format %{ "" %}
 6072   interface(COND_INTER) %{
 6073     equal(0x4, "e");
 6074     not_equal(0x5, "ne");
 6075     less(0x2, "b");
 6076     greater_equal(0x3, "ae");
 6077     less_equal(0x6, "be");
 6078     greater(0x7, "a");
 6079     overflow(0x0, "o");
 6080     no_overflow(0x1, "no");
 6081   %}
 6082 %}
 6083 
 6084 
 6085 // Floating comparisons that don't require any fixup for the unordered case,
 6086 // If both inputs of the comparison are the same, ZF is always set so we
 6087 // don't need to use cmpOpUCF2 for eq/ne
 6088 operand cmpOpUCF() %{
 6089   match(Bool);
 6090   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6091             (n->as_Bool()->_test._test == BoolTest::lt ||
 6092              n->as_Bool()->_test._test == BoolTest::ge ||
 6093              n->as_Bool()->_test._test == BoolTest::le ||
 6094              n->as_Bool()->_test._test == BoolTest::gt ||
 6095              n->in(1)->in(1) == n->in(1)->in(2)));
 6096   format %{ "" %}
 6097   interface(COND_INTER) %{
 6098     equal(0xb, "np");
 6099     not_equal(0xa, "p");
 6100     less(0x2, "b");
 6101     greater_equal(0x3, "ae");
 6102     less_equal(0x6, "be");
 6103     greater(0x7, "a");
 6104     overflow(0x0, "o");
 6105     no_overflow(0x1, "no");
 6106   %}
 6107 %}
 6108 
 6109 
 6110 // Floating comparisons that can be fixed up with extra conditional jumps
 6111 operand cmpOpUCF2() %{
 6112   match(Bool);
 6113   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6114             (n->as_Bool()->_test._test == BoolTest::ne ||
 6115              n->as_Bool()->_test._test == BoolTest::eq) &&
 6116             n->in(1)->in(1) != n->in(1)->in(2));
 6117   format %{ "" %}
 6118   interface(COND_INTER) %{
 6119     equal(0x4, "e");
 6120     not_equal(0x5, "ne");
 6121     less(0x2, "b");
 6122     greater_equal(0x3, "ae");
 6123     less_equal(0x6, "be");
 6124     greater(0x7, "a");
 6125     overflow(0x0, "o");
 6126     no_overflow(0x1, "no");
 6127   %}
 6128 %}
 6129 
 6130 
 6131 // Floating point comparisons that set condition flags to test more directly,
 6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6134 // latter conditions to ones that use unsigned tests before passing into an
 6135 // instruction because the preceding comparison might be based on a three way
 6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6137 operand cmpOpUCFE()
 6138 %{
 6139   match(Bool);
 6140   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6141             (n->as_Bool()->_test._test == BoolTest::ne ||
 6142              n->as_Bool()->_test._test == BoolTest::eq ||
 6143              n->as_Bool()->_test._test == BoolTest::lt ||
 6144              n->as_Bool()->_test._test == BoolTest::ge ||
 6145              n->as_Bool()->_test._test == BoolTest::le ||
 6146              n->as_Bool()->_test._test == BoolTest::gt));
 6147 
 6148   format %{ "" %}
 6149   interface(COND_INTER) %{
 6150     equal(0x4, "e");
 6151     not_equal(0x5, "ne");
 6152     less(0x2, "b");
 6153     greater_equal(0x3, "ae");
 6154     less_equal(0x6, "be");
 6155     greater(0x7, "a");
 6156     overflow(0x0, "o");
 6157     no_overflow(0x1, "no");
 6158   %}
 6159 %}
 6160 
 6161 // Operands for bound floating pointer register arguments
 6162 operand rxmm0() %{
 6163   constraint(ALLOC_IN_RC(xmm0_reg));
 6164   match(VecX);
 6165   format%{%}
 6166   interface(REG_INTER);
 6167 %}
 6168 
 6169 // Vectors
 6170 
 6171 // Dummy generic vector class. Should be used for all vector operands.
 6172 // Replaced with vec[SDXYZ] during post-selection pass.
 6173 operand vec() %{
 6174   constraint(ALLOC_IN_RC(dynamic));
 6175   match(VecX);
 6176   match(VecY);
 6177   match(VecZ);
 6178   match(VecS);
 6179   match(VecD);
 6180 
 6181   format %{ %}
 6182   interface(REG_INTER);
 6183 %}
 6184 
 6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6188 // runtime code generation via reg_class_dynamic.
 6189 operand legVec() %{
 6190   constraint(ALLOC_IN_RC(dynamic));
 6191   match(VecX);
 6192   match(VecY);
 6193   match(VecZ);
 6194   match(VecS);
 6195   match(VecD);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces vec during post-selection cleanup. See above.
 6202 operand vecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces legVec during post-selection cleanup. See above.
 6211 operand legVecS() %{
 6212   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6213   match(VecS);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces vec during post-selection cleanup. See above.
 6220 operand vecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces legVec during post-selection cleanup. See above.
 6229 operand legVecD() %{
 6230   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6231   match(VecD);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces vec during post-selection cleanup. See above.
 6238 operand vecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces legVec during post-selection cleanup. See above.
 6247 operand legVecX() %{
 6248   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6249   match(VecX);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces vec during post-selection cleanup. See above.
 6256 operand vecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces legVec during post-selection cleanup. See above.
 6265 operand legVecY() %{
 6266   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6267   match(VecY);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces vec during post-selection cleanup. See above.
 6274 operand vecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 
 6321 // Generic P2/P3 pipeline
 6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6323 // 3 instructions decoded per cycle.
 6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6325 // 3 ALU op, only ALU0 handles mul instructions.
 6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6327            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6328            BR, FPU,
 6329            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6330 
 6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6332 // Pipeline Description specifies the stages in the machine's pipeline
 6333 
 6334 // Generic P2/P3 pipeline
 6335 pipe_desc(S0, S1, S2, S3, S4, S5);
 6336 
 6337 //----------PIPELINE CLASSES---------------------------------------------------
 6338 // Pipeline Classes describe the stages in which input and output are
 6339 // referenced by the hardware pipeline.
 6340 
 6341 // Naming convention: ialu or fpu
 6342 // Then: _reg
 6343 // Then: _reg if there is a 2nd register
 6344 // Then: _long if it's a pair of instructions implementing a long
 6345 // Then: _fat if it requires the big decoder
 6346 //   Or: _mem if it requires the big decoder and a memory unit.
 6347 
 6348 // Integer ALU reg operation
 6349 pipe_class ialu_reg(rRegI dst)
 6350 %{
 6351     single_instruction;
 6352     dst    : S4(write);
 6353     dst    : S3(read);
 6354     DECODE : S0;        // any decoder
 6355     ALU    : S3;        // any alu
 6356 %}
 6357 
 6358 // Long ALU reg operation
 6359 pipe_class ialu_reg_long(rRegL dst)
 6360 %{
 6361     instruction_count(2);
 6362     dst    : S4(write);
 6363     dst    : S3(read);
 6364     DECODE : S0(2);     // any 2 decoders
 6365     ALU    : S3(2);     // both alus
 6366 %}
 6367 
 6368 // Integer ALU reg operation using big decoder
 6369 pipe_class ialu_reg_fat(rRegI dst)
 6370 %{
 6371     single_instruction;
 6372     dst    : S4(write);
 6373     dst    : S3(read);
 6374     D0     : S0;        // big decoder only
 6375     ALU    : S3;        // any alu
 6376 %}
 6377 
 6378 // Integer ALU reg-reg operation
 6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6380 %{
 6381     single_instruction;
 6382     dst    : S4(write);
 6383     src    : S3(read);
 6384     DECODE : S0;        // any decoder
 6385     ALU    : S3;        // any alu
 6386 %}
 6387 
 6388 // Integer ALU reg-reg operation
 6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6390 %{
 6391     single_instruction;
 6392     dst    : S4(write);
 6393     src    : S3(read);
 6394     D0     : S0;        // big decoder only
 6395     ALU    : S3;        // any alu
 6396 %}
 6397 
 6398 // Integer ALU reg-mem operation
 6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6400 %{
 6401     single_instruction;
 6402     dst    : S5(write);
 6403     mem    : S3(read);
 6404     D0     : S0;        // big decoder only
 6405     ALU    : S4;        // any alu
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer mem operation (prefetch)
 6410 pipe_class ialu_mem(memory mem)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     D0     : S0;        // big decoder only
 6415     MEM    : S3;        // any mem
 6416 %}
 6417 
 6418 // Integer Store to Memory
 6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6420 %{
 6421     single_instruction;
 6422     mem    : S3(read);
 6423     src    : S5(read);
 6424     D0     : S0;        // big decoder only
 6425     ALU    : S4;        // any alu
 6426     MEM    : S3;
 6427 %}
 6428 
 6429 // // Long Store to Memory
 6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6431 // %{
 6432 //     instruction_count(2);
 6433 //     mem    : S3(read);
 6434 //     src    : S5(read);
 6435 //     D0     : S0(2);          // big decoder only; twice
 6436 //     ALU    : S4(2);     // any 2 alus
 6437 //     MEM    : S3(2);  // Both mems
 6438 // %}
 6439 
 6440 // Integer Store to Memory
 6441 pipe_class ialu_mem_imm(memory mem)
 6442 %{
 6443     single_instruction;
 6444     mem    : S3(read);
 6445     D0     : S0;        // big decoder only
 6446     ALU    : S4;        // any alu
 6447     MEM    : S3;
 6448 %}
 6449 
 6450 // Integer ALU0 reg-reg operation
 6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6452 %{
 6453     single_instruction;
 6454     dst    : S4(write);
 6455     src    : S3(read);
 6456     D0     : S0;        // Big decoder only
 6457     ALU0   : S3;        // only alu0
 6458 %}
 6459 
 6460 // Integer ALU0 reg-mem operation
 6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6462 %{
 6463     single_instruction;
 6464     dst    : S5(write);
 6465     mem    : S3(read);
 6466     D0     : S0;        // big decoder only
 6467     ALU0   : S4;        // ALU0 only
 6468     MEM    : S3;        // any mem
 6469 %}
 6470 
 6471 // Integer ALU reg-reg operation
 6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6473 %{
 6474     single_instruction;
 6475     cr     : S4(write);
 6476     src1   : S3(read);
 6477     src2   : S3(read);
 6478     DECODE : S0;        // any decoder
 6479     ALU    : S3;        // any alu
 6480 %}
 6481 
 6482 // Integer ALU reg-imm operation
 6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6484 %{
 6485     single_instruction;
 6486     cr     : S4(write);
 6487     src1   : S3(read);
 6488     DECODE : S0;        // any decoder
 6489     ALU    : S3;        // any alu
 6490 %}
 6491 
 6492 // Integer ALU reg-mem operation
 6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6494 %{
 6495     single_instruction;
 6496     cr     : S4(write);
 6497     src1   : S3(read);
 6498     src2   : S3(read);
 6499     D0     : S0;        // big decoder only
 6500     ALU    : S4;        // any alu
 6501     MEM    : S3;
 6502 %}
 6503 
 6504 // Conditional move reg-reg
 6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6506 %{
 6507     instruction_count(4);
 6508     y      : S4(read);
 6509     q      : S3(read);
 6510     p      : S3(read);
 6511     DECODE : S0(4);     // any decoder
 6512 %}
 6513 
 6514 // Conditional move reg-reg
 6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6516 %{
 6517     single_instruction;
 6518     dst    : S4(write);
 6519     src    : S3(read);
 6520     cr     : S3(read);
 6521     DECODE : S0;        // any decoder
 6522 %}
 6523 
 6524 // Conditional move reg-mem
 6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6526 %{
 6527     single_instruction;
 6528     dst    : S4(write);
 6529     src    : S3(read);
 6530     cr     : S3(read);
 6531     DECODE : S0;        // any decoder
 6532     MEM    : S3;
 6533 %}
 6534 
 6535 // Conditional move reg-reg long
 6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6537 %{
 6538     single_instruction;
 6539     dst    : S4(write);
 6540     src    : S3(read);
 6541     cr     : S3(read);
 6542     DECODE : S0(2);     // any 2 decoders
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg(regD dst)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S3(read);
 6550     DECODE : S0(2);     // any 2 decoders
 6551     FPU    : S3;
 6552 %}
 6553 
 6554 // Float reg-reg operation
 6555 pipe_class fpu_reg_reg(regD dst, regD src)
 6556 %{
 6557     instruction_count(2);
 6558     dst    : S4(write);
 6559     src    : S3(read);
 6560     DECODE : S0(2);     // any 2 decoders
 6561     FPU    : S3;
 6562 %}
 6563 
 6564 // Float reg-reg operation
 6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6566 %{
 6567     instruction_count(3);
 6568     dst    : S4(write);
 6569     src1   : S3(read);
 6570     src2   : S3(read);
 6571     DECODE : S0(3);     // any 3 decoders
 6572     FPU    : S3(2);
 6573 %}
 6574 
 6575 // Float reg-reg operation
 6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6577 %{
 6578     instruction_count(4);
 6579     dst    : S4(write);
 6580     src1   : S3(read);
 6581     src2   : S3(read);
 6582     src3   : S3(read);
 6583     DECODE : S0(4);     // any 3 decoders
 6584     FPU    : S3(2);
 6585 %}
 6586 
 6587 // Float reg-reg operation
 6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6589 %{
 6590     instruction_count(4);
 6591     dst    : S4(write);
 6592     src1   : S3(read);
 6593     src2   : S3(read);
 6594     src3   : S3(read);
 6595     DECODE : S1(3);     // any 3 decoders
 6596     D0     : S0;        // Big decoder only
 6597     FPU    : S3(2);
 6598     MEM    : S3;
 6599 %}
 6600 
 6601 // Float reg-mem operation
 6602 pipe_class fpu_reg_mem(regD dst, memory mem)
 6603 %{
 6604     instruction_count(2);
 6605     dst    : S5(write);
 6606     mem    : S3(read);
 6607     D0     : S0;        // big decoder only
 6608     DECODE : S1;        // any decoder for FPU POP
 6609     FPU    : S4;
 6610     MEM    : S3;        // any mem
 6611 %}
 6612 
 6613 // Float reg-mem operation
 6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6615 %{
 6616     instruction_count(3);
 6617     dst    : S5(write);
 6618     src1   : S3(read);
 6619     mem    : S3(read);
 6620     D0     : S0;        // big decoder only
 6621     DECODE : S1(2);     // any decoder for FPU POP
 6622     FPU    : S4;
 6623     MEM    : S3;        // any mem
 6624 %}
 6625 
 6626 // Float mem-reg operation
 6627 pipe_class fpu_mem_reg(memory mem, regD src)
 6628 %{
 6629     instruction_count(2);
 6630     src    : S5(read);
 6631     mem    : S3(read);
 6632     DECODE : S0;        // any decoder for FPU PUSH
 6633     D0     : S1;        // big decoder only
 6634     FPU    : S4;
 6635     MEM    : S3;        // any mem
 6636 %}
 6637 
 6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6639 %{
 6640     instruction_count(3);
 6641     src1   : S3(read);
 6642     src2   : S3(read);
 6643     mem    : S3(read);
 6644     DECODE : S0(2);     // any decoder for FPU PUSH
 6645     D0     : S1;        // big decoder only
 6646     FPU    : S4;
 6647     MEM    : S3;        // any mem
 6648 %}
 6649 
 6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6651 %{
 6652     instruction_count(3);
 6653     src1   : S3(read);
 6654     src2   : S3(read);
 6655     mem    : S4(read);
 6656     DECODE : S0;        // any decoder for FPU PUSH
 6657     D0     : S0(2);     // big decoder only
 6658     FPU    : S4;
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem(memory dst, memory src1)
 6663 %{
 6664     instruction_count(2);
 6665     src1   : S3(read);
 6666     dst    : S4(read);
 6667     D0     : S0(2);     // big decoder only
 6668     MEM    : S3(2);     // any mem
 6669 %}
 6670 
 6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6672 %{
 6673     instruction_count(3);
 6674     src1   : S3(read);
 6675     src2   : S3(read);
 6676     dst    : S4(read);
 6677     D0     : S0(3);     // big decoder only
 6678     FPU    : S4;
 6679     MEM    : S3(3);     // any mem
 6680 %}
 6681 
 6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6683 %{
 6684     instruction_count(3);
 6685     src1   : S4(read);
 6686     mem    : S4(read);
 6687     DECODE : S0;        // any decoder for FPU PUSH
 6688     D0     : S0(2);     // big decoder only
 6689     FPU    : S4;
 6690     MEM    : S3(2);     // any mem
 6691 %}
 6692 
 6693 // Float load constant
 6694 pipe_class fpu_reg_con(regD dst)
 6695 %{
 6696     instruction_count(2);
 6697     dst    : S5(write);
 6698     D0     : S0;        // big decoder only for the load
 6699     DECODE : S1;        // any decoder for FPU POP
 6700     FPU    : S4;
 6701     MEM    : S3;        // any mem
 6702 %}
 6703 
 6704 // Float load constant
 6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6706 %{
 6707     instruction_count(3);
 6708     dst    : S5(write);
 6709     src    : S3(read);
 6710     D0     : S0;        // big decoder only for the load
 6711     DECODE : S1(2);     // any decoder for FPU POP
 6712     FPU    : S4;
 6713     MEM    : S3;        // any mem
 6714 %}
 6715 
 6716 // UnConditional branch
 6717 pipe_class pipe_jmp(label labl)
 6718 %{
 6719     single_instruction;
 6720     BR   : S3;
 6721 %}
 6722 
 6723 // Conditional branch
 6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6725 %{
 6726     single_instruction;
 6727     cr    : S1(read);
 6728     BR    : S3;
 6729 %}
 6730 
 6731 // Allocation idiom
 6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6733 %{
 6734     instruction_count(1); force_serialization;
 6735     fixed_latency(6);
 6736     heap_ptr : S3(read);
 6737     DECODE   : S0(3);
 6738     D0       : S2;
 6739     MEM      : S3;
 6740     ALU      : S3(2);
 6741     dst      : S5(write);
 6742     BR       : S5;
 6743 %}
 6744 
 6745 // Generic big/slow expanded idiom
 6746 pipe_class pipe_slow()
 6747 %{
 6748     instruction_count(10); multiple_bundles; force_serialization;
 6749     fixed_latency(100);
 6750     D0  : S0(2);
 6751     MEM : S3(2);
 6752 %}
 6753 
 6754 // The real do-nothing guy
 6755 pipe_class empty()
 6756 %{
 6757     instruction_count(0);
 6758 %}
 6759 
 6760 // Define the class for the Nop node
 6761 define
 6762 %{
 6763    MachNop = empty;
 6764 %}
 6765 
 6766 %}
 6767 
 6768 //----------INSTRUCTIONS-------------------------------------------------------
 6769 //
 6770 // match      -- States which machine-independent subtree may be replaced
 6771 //               by this instruction.
 6772 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6773 //               selection to identify a minimum cost tree of machine
 6774 //               instructions that matches a tree of machine-independent
 6775 //               instructions.
 6776 // format     -- A string providing the disassembly for this instruction.
 6777 //               The value of an instruction's operand may be inserted
 6778 //               by referring to it with a '$' prefix.
 6779 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6780 //               to within an encode class as $primary, $secondary, and $tertiary
 6781 //               rrspectively.  The primary opcode is commonly used to
 6782 //               indicate the type of machine instruction, while secondary
 6783 //               and tertiary are often used for prefix options or addressing
 6784 //               modes.
 6785 // ins_encode -- A list of encode classes with parameters. The encode class
 6786 //               name must have been defined in an 'enc_class' specification
 6787 //               in the encode section of the architecture description.
 6788 
 6789 // ============================================================================
 6790 
 6791 instruct ShouldNotReachHere() %{
 6792   match(Halt);
 6793   format %{ "stop\t# ShouldNotReachHere" %}
 6794   ins_encode %{
 6795     if (is_reachable()) {
 6796       const char* str = __ code_string(_halt_reason);
 6797       __ stop(str);
 6798     }
 6799   %}
 6800   ins_pipe(pipe_slow);
 6801 %}
 6802 
 6803 // ============================================================================
 6804 
 6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6806 // Load Float
 6807 instruct MoveF2VL(vlRegF dst, regF src) %{
 6808   match(Set dst src);
 6809   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6810   ins_encode %{
 6811     ShouldNotReachHere();
 6812   %}
 6813   ins_pipe( fpu_reg_reg );
 6814 %}
 6815 
 6816 // Load Float
 6817 instruct MoveF2LEG(legRegF dst, regF src) %{
 6818   match(Set dst src);
 6819   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6820   ins_encode %{
 6821     ShouldNotReachHere();
 6822   %}
 6823   ins_pipe( fpu_reg_reg );
 6824 %}
 6825 
 6826 // Load Float
 6827 instruct MoveVL2F(regF dst, vlRegF src) %{
 6828   match(Set dst src);
 6829   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6830   ins_encode %{
 6831     ShouldNotReachHere();
 6832   %}
 6833   ins_pipe( fpu_reg_reg );
 6834 %}
 6835 
 6836 // Load Float
 6837 instruct MoveLEG2F(regF dst, legRegF src) %{
 6838   match(Set dst src);
 6839   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6840   ins_encode %{
 6841     ShouldNotReachHere();
 6842   %}
 6843   ins_pipe( fpu_reg_reg );
 6844 %}
 6845 
 6846 // Load Double
 6847 instruct MoveD2VL(vlRegD dst, regD src) %{
 6848   match(Set dst src);
 6849   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6850   ins_encode %{
 6851     ShouldNotReachHere();
 6852   %}
 6853   ins_pipe( fpu_reg_reg );
 6854 %}
 6855 
 6856 // Load Double
 6857 instruct MoveD2LEG(legRegD dst, regD src) %{
 6858   match(Set dst src);
 6859   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6860   ins_encode %{
 6861     ShouldNotReachHere();
 6862   %}
 6863   ins_pipe( fpu_reg_reg );
 6864 %}
 6865 
 6866 // Load Double
 6867 instruct MoveVL2D(regD dst, vlRegD src) %{
 6868   match(Set dst src);
 6869   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6870   ins_encode %{
 6871     ShouldNotReachHere();
 6872   %}
 6873   ins_pipe( fpu_reg_reg );
 6874 %}
 6875 
 6876 // Load Double
 6877 instruct MoveLEG2D(regD dst, legRegD src) %{
 6878   match(Set dst src);
 6879   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6880   ins_encode %{
 6881     ShouldNotReachHere();
 6882   %}
 6883   ins_pipe( fpu_reg_reg );
 6884 %}
 6885 
 6886 //----------Load/Store/Move Instructions---------------------------------------
 6887 //----------Load Instructions--------------------------------------------------
 6888 
 6889 // Load Byte (8 bit signed)
 6890 instruct loadB(rRegI dst, memory mem)
 6891 %{
 6892   match(Set dst (LoadB mem));
 6893 
 6894   ins_cost(125);
 6895   format %{ "movsbl  $dst, $mem\t# byte" %}
 6896 
 6897   ins_encode %{
 6898     __ movsbl($dst$$Register, $mem$$Address);
 6899   %}
 6900 
 6901   ins_pipe(ialu_reg_mem);
 6902 %}
 6903 
 6904 // Load Byte (8 bit signed) into Long Register
 6905 instruct loadB2L(rRegL dst, memory mem)
 6906 %{
 6907   match(Set dst (ConvI2L (LoadB mem)));
 6908 
 6909   ins_cost(125);
 6910   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6911 
 6912   ins_encode %{
 6913     __ movsbq($dst$$Register, $mem$$Address);
 6914   %}
 6915 
 6916   ins_pipe(ialu_reg_mem);
 6917 %}
 6918 
 6919 // Load Unsigned Byte (8 bit UNsigned)
 6920 instruct loadUB(rRegI dst, memory mem)
 6921 %{
 6922   match(Set dst (LoadUB mem));
 6923 
 6924   ins_cost(125);
 6925   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6926 
 6927   ins_encode %{
 6928     __ movzbl($dst$$Register, $mem$$Address);
 6929   %}
 6930 
 6931   ins_pipe(ialu_reg_mem);
 6932 %}
 6933 
 6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6935 instruct loadUB2L(rRegL dst, memory mem)
 6936 %{
 6937   match(Set dst (ConvI2L (LoadUB mem)));
 6938 
 6939   ins_cost(125);
 6940   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6941 
 6942   ins_encode %{
 6943     __ movzbq($dst$$Register, $mem$$Address);
 6944   %}
 6945 
 6946   ins_pipe(ialu_reg_mem);
 6947 %}
 6948 
 6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6951   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6952   effect(KILL cr);
 6953 
 6954   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6955             "andl    $dst, right_n_bits($mask, 8)" %}
 6956   ins_encode %{
 6957     Register Rdst = $dst$$Register;
 6958     __ movzbq(Rdst, $mem$$Address);
 6959     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6960   %}
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Short (16 bit signed)
 6965 instruct loadS(rRegI dst, memory mem)
 6966 %{
 6967   match(Set dst (LoadS mem));
 6968 
 6969   ins_cost(125);
 6970   format %{ "movswl $dst, $mem\t# short" %}
 6971 
 6972   ins_encode %{
 6973     __ movswl($dst$$Register, $mem$$Address);
 6974   %}
 6975 
 6976   ins_pipe(ialu_reg_mem);
 6977 %}
 6978 
 6979 // Load Short (16 bit signed) to Byte (8 bit signed)
 6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6981   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6982 
 6983   ins_cost(125);
 6984   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6985   ins_encode %{
 6986     __ movsbl($dst$$Register, $mem$$Address);
 6987   %}
 6988   ins_pipe(ialu_reg_mem);
 6989 %}
 6990 
 6991 // Load Short (16 bit signed) into Long Register
 6992 instruct loadS2L(rRegL dst, memory mem)
 6993 %{
 6994   match(Set dst (ConvI2L (LoadS mem)));
 6995 
 6996   ins_cost(125);
 6997   format %{ "movswq $dst, $mem\t# short -> long" %}
 6998 
 6999   ins_encode %{
 7000     __ movswq($dst$$Register, $mem$$Address);
 7001   %}
 7002 
 7003   ins_pipe(ialu_reg_mem);
 7004 %}
 7005 
 7006 // Load Unsigned Short/Char (16 bit UNsigned)
 7007 instruct loadUS(rRegI dst, memory mem)
 7008 %{
 7009   match(Set dst (LoadUS mem));
 7010 
 7011   ins_cost(125);
 7012   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7013 
 7014   ins_encode %{
 7015     __ movzwl($dst$$Register, $mem$$Address);
 7016   %}
 7017 
 7018   ins_pipe(ialu_reg_mem);
 7019 %}
 7020 
 7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7023   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7024 
 7025   ins_cost(125);
 7026   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7027   ins_encode %{
 7028     __ movsbl($dst$$Register, $mem$$Address);
 7029   %}
 7030   ins_pipe(ialu_reg_mem);
 7031 %}
 7032 
 7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7034 instruct loadUS2L(rRegL dst, memory mem)
 7035 %{
 7036   match(Set dst (ConvI2L (LoadUS mem)));
 7037 
 7038   ins_cost(125);
 7039   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7040 
 7041   ins_encode %{
 7042     __ movzwq($dst$$Register, $mem$$Address);
 7043   %}
 7044 
 7045   ins_pipe(ialu_reg_mem);
 7046 %}
 7047 
 7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7050   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7051 
 7052   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7053   ins_encode %{
 7054     __ movzbq($dst$$Register, $mem$$Address);
 7055   %}
 7056   ins_pipe(ialu_reg_mem);
 7057 %}
 7058 
 7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7061   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7062   effect(KILL cr);
 7063 
 7064   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7065             "andl    $dst, right_n_bits($mask, 16)" %}
 7066   ins_encode %{
 7067     Register Rdst = $dst$$Register;
 7068     __ movzwq(Rdst, $mem$$Address);
 7069     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7070   %}
 7071   ins_pipe(ialu_reg_mem);
 7072 %}
 7073 
 7074 // Load Integer
 7075 instruct loadI(rRegI dst, memory mem)
 7076 %{
 7077   match(Set dst (LoadI mem));
 7078 
 7079   ins_cost(125);
 7080   format %{ "movl    $dst, $mem\t# int" %}
 7081 
 7082   ins_encode %{
 7083     __ movl($dst$$Register, $mem$$Address);
 7084   %}
 7085 
 7086   ins_pipe(ialu_reg_mem);
 7087 %}
 7088 
 7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7091   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7092 
 7093   ins_cost(125);
 7094   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7095   ins_encode %{
 7096     __ movsbl($dst$$Register, $mem$$Address);
 7097   %}
 7098   ins_pipe(ialu_reg_mem);
 7099 %}
 7100 
 7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7103   match(Set dst (AndI (LoadI mem) mask));
 7104 
 7105   ins_cost(125);
 7106   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7107   ins_encode %{
 7108     __ movzbl($dst$$Register, $mem$$Address);
 7109   %}
 7110   ins_pipe(ialu_reg_mem);
 7111 %}
 7112 
 7113 // Load Integer (32 bit signed) to Short (16 bit signed)
 7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7115   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7116 
 7117   ins_cost(125);
 7118   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7119   ins_encode %{
 7120     __ movswl($dst$$Register, $mem$$Address);
 7121   %}
 7122   ins_pipe(ialu_reg_mem);
 7123 %}
 7124 
 7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7127   match(Set dst (AndI (LoadI mem) mask));
 7128 
 7129   ins_cost(125);
 7130   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7131   ins_encode %{
 7132     __ movzwl($dst$$Register, $mem$$Address);
 7133   %}
 7134   ins_pipe(ialu_reg_mem);
 7135 %}
 7136 
 7137 // Load Integer into Long Register
 7138 instruct loadI2L(rRegL dst, memory mem)
 7139 %{
 7140   match(Set dst (ConvI2L (LoadI mem)));
 7141 
 7142   ins_cost(125);
 7143   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7144 
 7145   ins_encode %{
 7146     __ movslq($dst$$Register, $mem$$Address);
 7147   %}
 7148 
 7149   ins_pipe(ialu_reg_mem);
 7150 %}
 7151 
 7152 // Load Integer with mask 0xFF into Long Register
 7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7154   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7155 
 7156   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7157   ins_encode %{
 7158     __ movzbq($dst$$Register, $mem$$Address);
 7159   %}
 7160   ins_pipe(ialu_reg_mem);
 7161 %}
 7162 
 7163 // Load Integer with mask 0xFFFF into Long Register
 7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7165   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7166 
 7167   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7168   ins_encode %{
 7169     __ movzwq($dst$$Register, $mem$$Address);
 7170   %}
 7171   ins_pipe(ialu_reg_mem);
 7172 %}
 7173 
 7174 // Load Integer with a 31-bit mask into Long Register
 7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7176   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7177   effect(KILL cr);
 7178 
 7179   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7180             "andl    $dst, $mask" %}
 7181   ins_encode %{
 7182     Register Rdst = $dst$$Register;
 7183     __ movl(Rdst, $mem$$Address);
 7184     __ andl(Rdst, $mask$$constant);
 7185   %}
 7186   ins_pipe(ialu_reg_mem);
 7187 %}
 7188 
 7189 // Load Unsigned Integer into Long Register
 7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7191 %{
 7192   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7193 
 7194   ins_cost(125);
 7195   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7196 
 7197   ins_encode %{
 7198     __ movl($dst$$Register, $mem$$Address);
 7199   %}
 7200 
 7201   ins_pipe(ialu_reg_mem);
 7202 %}
 7203 
 7204 // Load Long
 7205 instruct loadL(rRegL dst, memory mem)
 7206 %{
 7207   match(Set dst (LoadL mem));
 7208 
 7209   ins_cost(125);
 7210   format %{ "movq    $dst, $mem\t# long" %}
 7211 
 7212   ins_encode %{
 7213     __ movq($dst$$Register, $mem$$Address);
 7214   %}
 7215 
 7216   ins_pipe(ialu_reg_mem); // XXX
 7217 %}
 7218 
 7219 // Load Range
 7220 instruct loadRange(rRegI dst, memory mem)
 7221 %{
 7222   match(Set dst (LoadRange mem));
 7223 
 7224   ins_cost(125); // XXX
 7225   format %{ "movl    $dst, $mem\t# range" %}
 7226   ins_encode %{
 7227     __ movl($dst$$Register, $mem$$Address);
 7228   %}
 7229   ins_pipe(ialu_reg_mem);
 7230 %}
 7231 
 7232 // Load Pointer
 7233 instruct loadP(rRegP dst, memory mem)
 7234 %{
 7235   match(Set dst (LoadP mem));
 7236   predicate(n->as_Load()->barrier_data() == 0);
 7237 
 7238   ins_cost(125); // XXX
 7239   format %{ "movq    $dst, $mem\t# ptr" %}
 7240   ins_encode %{
 7241     __ movq($dst$$Register, $mem$$Address);
 7242   %}
 7243   ins_pipe(ialu_reg_mem); // XXX
 7244 %}
 7245 
 7246 // Load Compressed Pointer
 7247 instruct loadN(rRegN dst, memory mem)
 7248 %{
 7249    predicate(n->as_Load()->barrier_data() == 0);
 7250    match(Set dst (LoadN mem));
 7251 
 7252    ins_cost(125); // XXX
 7253    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7254    ins_encode %{
 7255      __ movl($dst$$Register, $mem$$Address);
 7256    %}
 7257    ins_pipe(ialu_reg_mem); // XXX
 7258 %}
 7259 
 7260 
 7261 // Load Klass Pointer
 7262 instruct loadKlass(rRegP dst, memory mem)
 7263 %{
 7264   match(Set dst (LoadKlass mem));
 7265 
 7266   ins_cost(125); // XXX
 7267   format %{ "movq    $dst, $mem\t# class" %}
 7268   ins_encode %{
 7269     __ movq($dst$$Register, $mem$$Address);
 7270   %}
 7271   ins_pipe(ialu_reg_mem); // XXX
 7272 %}
 7273 
 7274 // Load narrow Klass Pointer
 7275 instruct loadNKlass(rRegN dst, memory mem)
 7276 %{
 7277   predicate(!UseCompactObjectHeaders);
 7278   match(Set dst (LoadNKlass mem));
 7279 
 7280   ins_cost(125); // XXX
 7281   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7282   ins_encode %{
 7283     __ movl($dst$$Register, $mem$$Address);
 7284   %}
 7285   ins_pipe(ialu_reg_mem); // XXX
 7286 %}
 7287 
 7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7289 %{
 7290   predicate(UseCompactObjectHeaders);
 7291   match(Set dst (LoadNKlass mem));
 7292   effect(KILL cr);
 7293   ins_cost(125);
 7294   format %{
 7295     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7296     "shrl    $dst, markWord::klass_shift_at_offset"
 7297   %}
 7298   ins_encode %{
 7299     if (UseAPX) {
 7300       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7301     }
 7302     else {
 7303       __ movl($dst$$Register, $mem$$Address);
 7304       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7305     }
 7306   %}
 7307   ins_pipe(ialu_reg_mem);
 7308 %}
 7309 
 7310 // Load Float
 7311 instruct loadF(regF dst, memory mem)
 7312 %{
 7313   match(Set dst (LoadF mem));
 7314 
 7315   ins_cost(145); // XXX
 7316   format %{ "movss   $dst, $mem\t# float" %}
 7317   ins_encode %{
 7318     __ movflt($dst$$XMMRegister, $mem$$Address);
 7319   %}
 7320   ins_pipe(pipe_slow); // XXX
 7321 %}
 7322 
 7323 // Load Double
 7324 instruct loadD_partial(regD dst, memory mem)
 7325 %{
 7326   predicate(!UseXmmLoadAndClearUpper);
 7327   match(Set dst (LoadD mem));
 7328 
 7329   ins_cost(145); // XXX
 7330   format %{ "movlpd  $dst, $mem\t# double" %}
 7331   ins_encode %{
 7332     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7333   %}
 7334   ins_pipe(pipe_slow); // XXX
 7335 %}
 7336 
 7337 instruct loadD(regD dst, memory mem)
 7338 %{
 7339   predicate(UseXmmLoadAndClearUpper);
 7340   match(Set dst (LoadD mem));
 7341 
 7342   ins_cost(145); // XXX
 7343   format %{ "movsd   $dst, $mem\t# double" %}
 7344   ins_encode %{
 7345     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7346   %}
 7347   ins_pipe(pipe_slow); // XXX
 7348 %}
 7349 
 7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7351 %{
 7352   match(Set dst con);
 7353 
 7354   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7355 
 7356   ins_encode %{
 7357     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7358   %}
 7359 
 7360   ins_pipe(ialu_reg_fat);
 7361 %}
 7362 
 7363 // min = java.lang.Math.min(float a, float b)
 7364 // max = java.lang.Math.max(float a, float b)
 7365 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7366 %{
 7367   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7368   match(Set dst (MaxF a b));
 7369   match(Set dst (MinF a b));
 7370 
 7371   format %{ "minmaxF $dst, $a, $b" %}
 7372   ins_encode %{
 7373     int opcode = this->ideal_Opcode();
 7374     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7375   %}
 7376   ins_pipe( pipe_slow );
 7377 %}
 7378 
 7379 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
 7380 %{
 7381   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7382   match(Set dst (MaxF a b));
 7383   match(Set dst (MinF a b));
 7384   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7385 
 7386   format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7387   ins_encode %{
 7388     int opcode = this->ideal_Opcode();
 7389     bool min = (opcode == Op_MinF) ? true : false;
 7390     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7391                     min, fp_prec_flt /*pt*/);
 7392   %}
 7393   ins_pipe( pipe_slow );
 7394 %}
 7395 
 7396 // min = java.lang.Math.min(float a, float b)
 7397 // max = java.lang.Math.max(float a, float b)
 7398 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7399 %{
 7400   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7401   match(Set dst (MaxF a b));
 7402   match(Set dst (MinF a b));
 7403   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7404 
 7405   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7406   ins_encode %{
 7407     int opcode = this->ideal_Opcode();
 7408     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7409     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7410                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7411   %}
 7412   ins_pipe( pipe_slow );
 7413 %}
 7414 
 7415 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
 7416 %{
 7417   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7418   match(Set dst (MaxF a b));
 7419   match(Set dst (MinF a b));
 7420   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7421 
 7422   format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7423   ins_encode %{
 7424     int opcode = this->ideal_Opcode();
 7425     bool min = (opcode == Op_MinF) ? true : false;
 7426     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7427                     min, fp_prec_flt /*pt*/);
 7428   %}
 7429   ins_pipe( pipe_slow );
 7430 %}
 7431 
 7432 // min = java.lang.Math.min(double a, double b)
 7433 // max = java.lang.Math.max(double a, double b)
 7434 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7435 %{
 7436   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7437   match(Set dst (MaxD a b));
 7438   match(Set dst (MinD a b));
 7439 
 7440   format %{ "minmaxD $dst, $a, $b" %}
 7441   ins_encode %{
 7442     int opcode = this->ideal_Opcode();
 7443     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7444   %}
 7445   ins_pipe( pipe_slow );
 7446 %}
 7447 
 7448 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
 7449 %{
 7450   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7451   match(Set dst (MaxD a b));
 7452   match(Set dst (MinD a b));
 7453   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7454 
 7455   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7456   ins_encode %{
 7457     int opcode = this->ideal_Opcode();
 7458     bool min = (opcode == Op_MinD) ? true : false;
 7459     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7460                     min, fp_prec_dbl /*pt*/);
 7461   %}
 7462   ins_pipe( pipe_slow );
 7463 %}
 7464 
 7465 // min = java.lang.Math.min(double a, double b)
 7466 // max = java.lang.Math.max(double a, double b)
 7467 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7468 %{
 7469   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7470   match(Set dst (MaxD a b));
 7471   match(Set dst (MinD a b));
 7472   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7473 
 7474   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7475   ins_encode %{
 7476     int opcode = this->ideal_Opcode();
 7477     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7478     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7479                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7480   %}
 7481   ins_pipe( pipe_slow );
 7482 %}
 7483 
 7484 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
 7485 %{
 7486   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7487   match(Set dst (MaxD a b));
 7488   match(Set dst (MinD a b));
 7489   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7490 
 7491   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7492   ins_encode %{
 7493     int opcode = this->ideal_Opcode();
 7494     bool min = (opcode == Op_MinD) ? true : false;
 7495     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7496                     min, fp_prec_dbl /*pt*/);
 7497   %}
 7498   ins_pipe( pipe_slow );
 7499 %}
 7500 
 7501 // Load Effective Address
 7502 instruct leaP8(rRegP dst, indOffset8 mem)
 7503 %{
 7504   match(Set dst mem);
 7505 
 7506   ins_cost(110); // XXX
 7507   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7508   ins_encode %{
 7509     __ leaq($dst$$Register, $mem$$Address);
 7510   %}
 7511   ins_pipe(ialu_reg_reg_fat);
 7512 %}
 7513 
 7514 instruct leaP32(rRegP dst, indOffset32 mem)
 7515 %{
 7516   match(Set dst mem);
 7517 
 7518   ins_cost(110);
 7519   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7520   ins_encode %{
 7521     __ leaq($dst$$Register, $mem$$Address);
 7522   %}
 7523   ins_pipe(ialu_reg_reg_fat);
 7524 %}
 7525 
 7526 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7527 %{
 7528   match(Set dst mem);
 7529 
 7530   ins_cost(110);
 7531   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7532   ins_encode %{
 7533     __ leaq($dst$$Register, $mem$$Address);
 7534   %}
 7535   ins_pipe(ialu_reg_reg_fat);
 7536 %}
 7537 
 7538 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7539 %{
 7540   match(Set dst mem);
 7541 
 7542   ins_cost(110);
 7543   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7544   ins_encode %{
 7545     __ leaq($dst$$Register, $mem$$Address);
 7546   %}
 7547   ins_pipe(ialu_reg_reg_fat);
 7548 %}
 7549 
 7550 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7551 %{
 7552   match(Set dst mem);
 7553 
 7554   ins_cost(110);
 7555   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7556   ins_encode %{
 7557     __ leaq($dst$$Register, $mem$$Address);
 7558   %}
 7559   ins_pipe(ialu_reg_reg_fat);
 7560 %}
 7561 
 7562 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7563 %{
 7564   match(Set dst mem);
 7565 
 7566   ins_cost(110);
 7567   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7568   ins_encode %{
 7569     __ leaq($dst$$Register, $mem$$Address);
 7570   %}
 7571   ins_pipe(ialu_reg_reg_fat);
 7572 %}
 7573 
 7574 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7575 %{
 7576   match(Set dst mem);
 7577 
 7578   ins_cost(110);
 7579   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7580   ins_encode %{
 7581     __ leaq($dst$$Register, $mem$$Address);
 7582   %}
 7583   ins_pipe(ialu_reg_reg_fat);
 7584 %}
 7585 
 7586 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7587 %{
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 // Load Effective Address which uses Narrow (32-bits) oop
 7599 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7600 %{
 7601   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7602   match(Set dst mem);
 7603 
 7604   ins_cost(110);
 7605   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7606   ins_encode %{
 7607     __ leaq($dst$$Register, $mem$$Address);
 7608   %}
 7609   ins_pipe(ialu_reg_reg_fat);
 7610 %}
 7611 
 7612 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7613 %{
 7614   predicate(CompressedOops::shift() == 0);
 7615   match(Set dst mem);
 7616 
 7617   ins_cost(110); // XXX
 7618   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7619   ins_encode %{
 7620     __ leaq($dst$$Register, $mem$$Address);
 7621   %}
 7622   ins_pipe(ialu_reg_reg_fat);
 7623 %}
 7624 
 7625 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7626 %{
 7627   predicate(CompressedOops::shift() == 0);
 7628   match(Set dst mem);
 7629 
 7630   ins_cost(110);
 7631   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7632   ins_encode %{
 7633     __ leaq($dst$$Register, $mem$$Address);
 7634   %}
 7635   ins_pipe(ialu_reg_reg_fat);
 7636 %}
 7637 
 7638 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7639 %{
 7640   predicate(CompressedOops::shift() == 0);
 7641   match(Set dst mem);
 7642 
 7643   ins_cost(110);
 7644   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7645   ins_encode %{
 7646     __ leaq($dst$$Register, $mem$$Address);
 7647   %}
 7648   ins_pipe(ialu_reg_reg_fat);
 7649 %}
 7650 
 7651 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7652 %{
 7653   predicate(CompressedOops::shift() == 0);
 7654   match(Set dst mem);
 7655 
 7656   ins_cost(110);
 7657   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7658   ins_encode %{
 7659     __ leaq($dst$$Register, $mem$$Address);
 7660   %}
 7661   ins_pipe(ialu_reg_reg_fat);
 7662 %}
 7663 
 7664 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7665 %{
 7666   predicate(CompressedOops::shift() == 0);
 7667   match(Set dst mem);
 7668 
 7669   ins_cost(110);
 7670   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7671   ins_encode %{
 7672     __ leaq($dst$$Register, $mem$$Address);
 7673   %}
 7674   ins_pipe(ialu_reg_reg_fat);
 7675 %}
 7676 
 7677 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7678 %{
 7679   predicate(CompressedOops::shift() == 0);
 7680   match(Set dst mem);
 7681 
 7682   ins_cost(110);
 7683   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7684   ins_encode %{
 7685     __ leaq($dst$$Register, $mem$$Address);
 7686   %}
 7687   ins_pipe(ialu_reg_reg_fat);
 7688 %}
 7689 
 7690 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7691 %{
 7692   predicate(CompressedOops::shift() == 0);
 7693   match(Set dst mem);
 7694 
 7695   ins_cost(110);
 7696   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7697   ins_encode %{
 7698     __ leaq($dst$$Register, $mem$$Address);
 7699   %}
 7700   ins_pipe(ialu_reg_reg_fat);
 7701 %}
 7702 
 7703 instruct loadConI(rRegI dst, immI src)
 7704 %{
 7705   match(Set dst src);
 7706 
 7707   format %{ "movl    $dst, $src\t# int" %}
 7708   ins_encode %{
 7709     __ movl($dst$$Register, $src$$constant);
 7710   %}
 7711   ins_pipe(ialu_reg_fat); // XXX
 7712 %}
 7713 
 7714 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7715 %{
 7716   match(Set dst src);
 7717   effect(KILL cr);
 7718 
 7719   ins_cost(50);
 7720   format %{ "xorl    $dst, $dst\t# int" %}
 7721   ins_encode %{
 7722     __ xorl($dst$$Register, $dst$$Register);
 7723   %}
 7724   ins_pipe(ialu_reg);
 7725 %}
 7726 
 7727 instruct loadConL(rRegL dst, immL src)
 7728 %{
 7729   match(Set dst src);
 7730 
 7731   ins_cost(150);
 7732   format %{ "movq    $dst, $src\t# long" %}
 7733   ins_encode %{
 7734     __ mov64($dst$$Register, $src$$constant);
 7735   %}
 7736   ins_pipe(ialu_reg);
 7737 %}
 7738 
 7739 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7740 %{
 7741   match(Set dst src);
 7742   effect(KILL cr);
 7743 
 7744   ins_cost(50);
 7745   format %{ "xorl    $dst, $dst\t# long" %}
 7746   ins_encode %{
 7747     __ xorl($dst$$Register, $dst$$Register);
 7748   %}
 7749   ins_pipe(ialu_reg); // XXX
 7750 %}
 7751 
 7752 instruct loadConUL32(rRegL dst, immUL32 src)
 7753 %{
 7754   match(Set dst src);
 7755 
 7756   ins_cost(60);
 7757   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7758   ins_encode %{
 7759     __ movl($dst$$Register, $src$$constant);
 7760   %}
 7761   ins_pipe(ialu_reg);
 7762 %}
 7763 
 7764 instruct loadConL32(rRegL dst, immL32 src)
 7765 %{
 7766   match(Set dst src);
 7767 
 7768   ins_cost(70);
 7769   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7770   ins_encode %{
 7771     __ movq($dst$$Register, $src$$constant);
 7772   %}
 7773   ins_pipe(ialu_reg);
 7774 %}
 7775 
 7776 instruct loadConP(rRegP dst, immP con) %{
 7777   match(Set dst con);
 7778 
 7779   format %{ "movq    $dst, $con\t# ptr" %}
 7780   ins_encode %{
 7781     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7782   %}
 7783   ins_pipe(ialu_reg_fat); // XXX
 7784 %}
 7785 
 7786 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7787 %{
 7788   match(Set dst src);
 7789   effect(KILL cr);
 7790 
 7791   ins_cost(50);
 7792   format %{ "xorl    $dst, $dst\t# ptr" %}
 7793   ins_encode %{
 7794     __ xorl($dst$$Register, $dst$$Register);
 7795   %}
 7796   ins_pipe(ialu_reg);
 7797 %}
 7798 
 7799 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7800 %{
 7801   match(Set dst src);
 7802   effect(KILL cr);
 7803 
 7804   ins_cost(60);
 7805   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7806   ins_encode %{
 7807     __ movl($dst$$Register, $src$$constant);
 7808   %}
 7809   ins_pipe(ialu_reg);
 7810 %}
 7811 
 7812 instruct loadConF(regF dst, immF con) %{
 7813   match(Set dst con);
 7814   ins_cost(125);
 7815   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7816   ins_encode %{
 7817     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7818   %}
 7819   ins_pipe(pipe_slow);
 7820 %}
 7821 
 7822 instruct loadConH(regF dst, immH con) %{
 7823   match(Set dst con);
 7824   ins_cost(125);
 7825   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7826   ins_encode %{
 7827     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7828   %}
 7829   ins_pipe(pipe_slow);
 7830 %}
 7831 
 7832 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7833   match(Set dst src);
 7834   effect(KILL cr);
 7835   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7836   ins_encode %{
 7837     __ xorq($dst$$Register, $dst$$Register);
 7838   %}
 7839   ins_pipe(ialu_reg);
 7840 %}
 7841 
 7842 instruct loadConN(rRegN dst, immN src) %{
 7843   match(Set dst src);
 7844 
 7845   ins_cost(125);
 7846   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7847   ins_encode %{
 7848     address con = (address)$src$$constant;
 7849     if (con == nullptr) {
 7850       ShouldNotReachHere();
 7851     } else {
 7852       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7853     }
 7854   %}
 7855   ins_pipe(ialu_reg_fat); // XXX
 7856 %}
 7857 
 7858 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7859   match(Set dst src);
 7860 
 7861   ins_cost(125);
 7862   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7863   ins_encode %{
 7864     address con = (address)$src$$constant;
 7865     if (con == nullptr) {
 7866       ShouldNotReachHere();
 7867     } else {
 7868       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7869     }
 7870   %}
 7871   ins_pipe(ialu_reg_fat); // XXX
 7872 %}
 7873 
 7874 instruct loadConF0(regF dst, immF0 src)
 7875 %{
 7876   match(Set dst src);
 7877   ins_cost(100);
 7878 
 7879   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7880   ins_encode %{
 7881     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7882   %}
 7883   ins_pipe(pipe_slow);
 7884 %}
 7885 
 7886 // Use the same format since predicate() can not be used here.
 7887 instruct loadConD(regD dst, immD con) %{
 7888   match(Set dst con);
 7889   ins_cost(125);
 7890   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7891   ins_encode %{
 7892     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7893   %}
 7894   ins_pipe(pipe_slow);
 7895 %}
 7896 
 7897 instruct loadConD0(regD dst, immD0 src)
 7898 %{
 7899   match(Set dst src);
 7900   ins_cost(100);
 7901 
 7902   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7903   ins_encode %{
 7904     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7905   %}
 7906   ins_pipe(pipe_slow);
 7907 %}
 7908 
 7909 instruct loadSSI(rRegI dst, stackSlotI src)
 7910 %{
 7911   match(Set dst src);
 7912 
 7913   ins_cost(125);
 7914   format %{ "movl    $dst, $src\t# int stk" %}
 7915   ins_encode %{
 7916     __ movl($dst$$Register, $src$$Address);
 7917   %}
 7918   ins_pipe(ialu_reg_mem);
 7919 %}
 7920 
 7921 instruct loadSSL(rRegL dst, stackSlotL src)
 7922 %{
 7923   match(Set dst src);
 7924 
 7925   ins_cost(125);
 7926   format %{ "movq    $dst, $src\t# long stk" %}
 7927   ins_encode %{
 7928     __ movq($dst$$Register, $src$$Address);
 7929   %}
 7930   ins_pipe(ialu_reg_mem);
 7931 %}
 7932 
 7933 instruct loadSSP(rRegP dst, stackSlotP src)
 7934 %{
 7935   match(Set dst src);
 7936 
 7937   ins_cost(125);
 7938   format %{ "movq    $dst, $src\t# ptr stk" %}
 7939   ins_encode %{
 7940     __ movq($dst$$Register, $src$$Address);
 7941   %}
 7942   ins_pipe(ialu_reg_mem);
 7943 %}
 7944 
 7945 instruct loadSSF(regF dst, stackSlotF src)
 7946 %{
 7947   match(Set dst src);
 7948 
 7949   ins_cost(125);
 7950   format %{ "movss   $dst, $src\t# float stk" %}
 7951   ins_encode %{
 7952     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7953   %}
 7954   ins_pipe(pipe_slow); // XXX
 7955 %}
 7956 
 7957 // Use the same format since predicate() can not be used here.
 7958 instruct loadSSD(regD dst, stackSlotD src)
 7959 %{
 7960   match(Set dst src);
 7961 
 7962   ins_cost(125);
 7963   format %{ "movsd   $dst, $src\t# double stk" %}
 7964   ins_encode  %{
 7965     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7966   %}
 7967   ins_pipe(pipe_slow); // XXX
 7968 %}
 7969 
 7970 // Prefetch instructions for allocation.
 7971 // Must be safe to execute with invalid address (cannot fault).
 7972 
 7973 instruct prefetchAlloc( memory mem ) %{
 7974   predicate(AllocatePrefetchInstr==3);
 7975   match(PrefetchAllocation mem);
 7976   ins_cost(125);
 7977 
 7978   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7979   ins_encode %{
 7980     __ prefetchw($mem$$Address);
 7981   %}
 7982   ins_pipe(ialu_mem);
 7983 %}
 7984 
 7985 instruct prefetchAllocNTA( memory mem ) %{
 7986   predicate(AllocatePrefetchInstr==0);
 7987   match(PrefetchAllocation mem);
 7988   ins_cost(125);
 7989 
 7990   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7991   ins_encode %{
 7992     __ prefetchnta($mem$$Address);
 7993   %}
 7994   ins_pipe(ialu_mem);
 7995 %}
 7996 
 7997 instruct prefetchAllocT0( memory mem ) %{
 7998   predicate(AllocatePrefetchInstr==1);
 7999   match(PrefetchAllocation mem);
 8000   ins_cost(125);
 8001 
 8002   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8003   ins_encode %{
 8004     __ prefetcht0($mem$$Address);
 8005   %}
 8006   ins_pipe(ialu_mem);
 8007 %}
 8008 
 8009 instruct prefetchAllocT2( memory mem ) %{
 8010   predicate(AllocatePrefetchInstr==2);
 8011   match(PrefetchAllocation mem);
 8012   ins_cost(125);
 8013 
 8014   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8015   ins_encode %{
 8016     __ prefetcht2($mem$$Address);
 8017   %}
 8018   ins_pipe(ialu_mem);
 8019 %}
 8020 
 8021 //----------Store Instructions-------------------------------------------------
 8022 
 8023 // Store Byte
 8024 instruct storeB(memory mem, rRegI src)
 8025 %{
 8026   match(Set mem (StoreB mem src));
 8027 
 8028   ins_cost(125); // XXX
 8029   format %{ "movb    $mem, $src\t# byte" %}
 8030   ins_encode %{
 8031     __ movb($mem$$Address, $src$$Register);
 8032   %}
 8033   ins_pipe(ialu_mem_reg);
 8034 %}
 8035 
 8036 // Store Char/Short
 8037 instruct storeC(memory mem, rRegI src)
 8038 %{
 8039   match(Set mem (StoreC mem src));
 8040 
 8041   ins_cost(125); // XXX
 8042   format %{ "movw    $mem, $src\t# char/short" %}
 8043   ins_encode %{
 8044     __ movw($mem$$Address, $src$$Register);
 8045   %}
 8046   ins_pipe(ialu_mem_reg);
 8047 %}
 8048 
 8049 // Store Integer
 8050 instruct storeI(memory mem, rRegI src)
 8051 %{
 8052   match(Set mem (StoreI mem src));
 8053 
 8054   ins_cost(125); // XXX
 8055   format %{ "movl    $mem, $src\t# int" %}
 8056   ins_encode %{
 8057     __ movl($mem$$Address, $src$$Register);
 8058   %}
 8059   ins_pipe(ialu_mem_reg);
 8060 %}
 8061 
 8062 // Store Long
 8063 instruct storeL(memory mem, rRegL src)
 8064 %{
 8065   match(Set mem (StoreL mem src));
 8066 
 8067   ins_cost(125); // XXX
 8068   format %{ "movq    $mem, $src\t# long" %}
 8069   ins_encode %{
 8070     __ movq($mem$$Address, $src$$Register);
 8071   %}
 8072   ins_pipe(ialu_mem_reg); // XXX
 8073 %}
 8074 
 8075 // Store Pointer
 8076 instruct storeP(memory mem, any_RegP src)
 8077 %{
 8078   predicate(n->as_Store()->barrier_data() == 0);
 8079   match(Set mem (StoreP mem src));
 8080 
 8081   ins_cost(125); // XXX
 8082   format %{ "movq    $mem, $src\t# ptr" %}
 8083   ins_encode %{
 8084     __ movq($mem$$Address, $src$$Register);
 8085   %}
 8086   ins_pipe(ialu_mem_reg);
 8087 %}
 8088 
 8089 instruct storeImmP0(memory mem, immP0 zero)
 8090 %{
 8091   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8092   match(Set mem (StoreP mem zero));
 8093 
 8094   ins_cost(125); // XXX
 8095   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8096   ins_encode %{
 8097     __ movq($mem$$Address, r12);
 8098   %}
 8099   ins_pipe(ialu_mem_reg);
 8100 %}
 8101 
 8102 // Store Null Pointer, mark word, or other simple pointer constant.
 8103 instruct storeImmP(memory mem, immP31 src)
 8104 %{
 8105   predicate(n->as_Store()->barrier_data() == 0);
 8106   match(Set mem (StoreP mem src));
 8107 
 8108   ins_cost(150); // XXX
 8109   format %{ "movq    $mem, $src\t# ptr" %}
 8110   ins_encode %{
 8111     __ movq($mem$$Address, $src$$constant);
 8112   %}
 8113   ins_pipe(ialu_mem_imm);
 8114 %}
 8115 
 8116 // Store Compressed Pointer
 8117 instruct storeN(memory mem, rRegN src)
 8118 %{
 8119   predicate(n->as_Store()->barrier_data() == 0);
 8120   match(Set mem (StoreN mem src));
 8121 
 8122   ins_cost(125); // XXX
 8123   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8124   ins_encode %{
 8125     __ movl($mem$$Address, $src$$Register);
 8126   %}
 8127   ins_pipe(ialu_mem_reg);
 8128 %}
 8129 
 8130 instruct storeNKlass(memory mem, rRegN src)
 8131 %{
 8132   match(Set mem (StoreNKlass mem src));
 8133 
 8134   ins_cost(125); // XXX
 8135   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8136   ins_encode %{
 8137     __ movl($mem$$Address, $src$$Register);
 8138   %}
 8139   ins_pipe(ialu_mem_reg);
 8140 %}
 8141 
 8142 instruct storeImmN0(memory mem, immN0 zero)
 8143 %{
 8144   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8145   match(Set mem (StoreN mem zero));
 8146 
 8147   ins_cost(125); // XXX
 8148   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8149   ins_encode %{
 8150     __ movl($mem$$Address, r12);
 8151   %}
 8152   ins_pipe(ialu_mem_reg);
 8153 %}
 8154 
 8155 instruct storeImmN(memory mem, immN src)
 8156 %{
 8157   predicate(n->as_Store()->barrier_data() == 0);
 8158   match(Set mem (StoreN mem src));
 8159 
 8160   ins_cost(150); // XXX
 8161   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8162   ins_encode %{
 8163     address con = (address)$src$$constant;
 8164     if (con == nullptr) {
 8165       __ movl($mem$$Address, 0);
 8166     } else {
 8167       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8168     }
 8169   %}
 8170   ins_pipe(ialu_mem_imm);
 8171 %}
 8172 
 8173 instruct storeImmNKlass(memory mem, immNKlass src)
 8174 %{
 8175   match(Set mem (StoreNKlass mem src));
 8176 
 8177   ins_cost(150); // XXX
 8178   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8179   ins_encode %{
 8180     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8181   %}
 8182   ins_pipe(ialu_mem_imm);
 8183 %}
 8184 
 8185 // Store Integer Immediate
 8186 instruct storeImmI0(memory mem, immI_0 zero)
 8187 %{
 8188   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8189   match(Set mem (StoreI mem zero));
 8190 
 8191   ins_cost(125); // XXX
 8192   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8193   ins_encode %{
 8194     __ movl($mem$$Address, r12);
 8195   %}
 8196   ins_pipe(ialu_mem_reg);
 8197 %}
 8198 
 8199 instruct storeImmI(memory mem, immI src)
 8200 %{
 8201   match(Set mem (StoreI mem src));
 8202 
 8203   ins_cost(150);
 8204   format %{ "movl    $mem, $src\t# int" %}
 8205   ins_encode %{
 8206     __ movl($mem$$Address, $src$$constant);
 8207   %}
 8208   ins_pipe(ialu_mem_imm);
 8209 %}
 8210 
 8211 // Store Long Immediate
 8212 instruct storeImmL0(memory mem, immL0 zero)
 8213 %{
 8214   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8215   match(Set mem (StoreL mem zero));
 8216 
 8217   ins_cost(125); // XXX
 8218   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8219   ins_encode %{
 8220     __ movq($mem$$Address, r12);
 8221   %}
 8222   ins_pipe(ialu_mem_reg);
 8223 %}
 8224 
 8225 instruct storeImmL(memory mem, immL32 src)
 8226 %{
 8227   match(Set mem (StoreL mem src));
 8228 
 8229   ins_cost(150);
 8230   format %{ "movq    $mem, $src\t# long" %}
 8231   ins_encode %{
 8232     __ movq($mem$$Address, $src$$constant);
 8233   %}
 8234   ins_pipe(ialu_mem_imm);
 8235 %}
 8236 
 8237 // Store Short/Char Immediate
 8238 instruct storeImmC0(memory mem, immI_0 zero)
 8239 %{
 8240   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8241   match(Set mem (StoreC mem zero));
 8242 
 8243   ins_cost(125); // XXX
 8244   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8245   ins_encode %{
 8246     __ movw($mem$$Address, r12);
 8247   %}
 8248   ins_pipe(ialu_mem_reg);
 8249 %}
 8250 
 8251 instruct storeImmI16(memory mem, immI16 src)
 8252 %{
 8253   predicate(UseStoreImmI16);
 8254   match(Set mem (StoreC mem src));
 8255 
 8256   ins_cost(150);
 8257   format %{ "movw    $mem, $src\t# short/char" %}
 8258   ins_encode %{
 8259     __ movw($mem$$Address, $src$$constant);
 8260   %}
 8261   ins_pipe(ialu_mem_imm);
 8262 %}
 8263 
 8264 // Store Byte Immediate
 8265 instruct storeImmB0(memory mem, immI_0 zero)
 8266 %{
 8267   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8268   match(Set mem (StoreB mem zero));
 8269 
 8270   ins_cost(125); // XXX
 8271   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8272   ins_encode %{
 8273     __ movb($mem$$Address, r12);
 8274   %}
 8275   ins_pipe(ialu_mem_reg);
 8276 %}
 8277 
 8278 instruct storeImmB(memory mem, immI8 src)
 8279 %{
 8280   match(Set mem (StoreB mem src));
 8281 
 8282   ins_cost(150); // XXX
 8283   format %{ "movb    $mem, $src\t# byte" %}
 8284   ins_encode %{
 8285     __ movb($mem$$Address, $src$$constant);
 8286   %}
 8287   ins_pipe(ialu_mem_imm);
 8288 %}
 8289 
 8290 // Store Float
 8291 instruct storeF(memory mem, regF src)
 8292 %{
 8293   match(Set mem (StoreF mem src));
 8294 
 8295   ins_cost(95); // XXX
 8296   format %{ "movss   $mem, $src\t# float" %}
 8297   ins_encode %{
 8298     __ movflt($mem$$Address, $src$$XMMRegister);
 8299   %}
 8300   ins_pipe(pipe_slow); // XXX
 8301 %}
 8302 
 8303 // Store immediate Float value (it is faster than store from XMM register)
 8304 instruct storeF0(memory mem, immF0 zero)
 8305 %{
 8306   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8307   match(Set mem (StoreF mem zero));
 8308 
 8309   ins_cost(25); // XXX
 8310   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8311   ins_encode %{
 8312     __ movl($mem$$Address, r12);
 8313   %}
 8314   ins_pipe(ialu_mem_reg);
 8315 %}
 8316 
 8317 instruct storeF_imm(memory mem, immF src)
 8318 %{
 8319   match(Set mem (StoreF mem src));
 8320 
 8321   ins_cost(50);
 8322   format %{ "movl    $mem, $src\t# float" %}
 8323   ins_encode %{
 8324     __ movl($mem$$Address, jint_cast($src$$constant));
 8325   %}
 8326   ins_pipe(ialu_mem_imm);
 8327 %}
 8328 
 8329 // Store Double
 8330 instruct storeD(memory mem, regD src)
 8331 %{
 8332   match(Set mem (StoreD mem src));
 8333 
 8334   ins_cost(95); // XXX
 8335   format %{ "movsd   $mem, $src\t# double" %}
 8336   ins_encode %{
 8337     __ movdbl($mem$$Address, $src$$XMMRegister);
 8338   %}
 8339   ins_pipe(pipe_slow); // XXX
 8340 %}
 8341 
 8342 // Store immediate double 0.0 (it is faster than store from XMM register)
 8343 instruct storeD0_imm(memory mem, immD0 src)
 8344 %{
 8345   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8346   match(Set mem (StoreD mem src));
 8347 
 8348   ins_cost(50);
 8349   format %{ "movq    $mem, $src\t# double 0." %}
 8350   ins_encode %{
 8351     __ movq($mem$$Address, $src$$constant);
 8352   %}
 8353   ins_pipe(ialu_mem_imm);
 8354 %}
 8355 
 8356 instruct storeD0(memory mem, immD0 zero)
 8357 %{
 8358   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8359   match(Set mem (StoreD mem zero));
 8360 
 8361   ins_cost(25); // XXX
 8362   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8363   ins_encode %{
 8364     __ movq($mem$$Address, r12);
 8365   %}
 8366   ins_pipe(ialu_mem_reg);
 8367 %}
 8368 
 8369 instruct storeSSI(stackSlotI dst, rRegI src)
 8370 %{
 8371   match(Set dst src);
 8372 
 8373   ins_cost(100);
 8374   format %{ "movl    $dst, $src\t# int stk" %}
 8375   ins_encode %{
 8376     __ movl($dst$$Address, $src$$Register);
 8377   %}
 8378   ins_pipe( ialu_mem_reg );
 8379 %}
 8380 
 8381 instruct storeSSL(stackSlotL dst, rRegL src)
 8382 %{
 8383   match(Set dst src);
 8384 
 8385   ins_cost(100);
 8386   format %{ "movq    $dst, $src\t# long stk" %}
 8387   ins_encode %{
 8388     __ movq($dst$$Address, $src$$Register);
 8389   %}
 8390   ins_pipe(ialu_mem_reg);
 8391 %}
 8392 
 8393 instruct storeSSP(stackSlotP dst, rRegP src)
 8394 %{
 8395   match(Set dst src);
 8396 
 8397   ins_cost(100);
 8398   format %{ "movq    $dst, $src\t# ptr stk" %}
 8399   ins_encode %{
 8400     __ movq($dst$$Address, $src$$Register);
 8401   %}
 8402   ins_pipe(ialu_mem_reg);
 8403 %}
 8404 
 8405 instruct storeSSF(stackSlotF dst, regF src)
 8406 %{
 8407   match(Set dst src);
 8408 
 8409   ins_cost(95); // XXX
 8410   format %{ "movss   $dst, $src\t# float stk" %}
 8411   ins_encode %{
 8412     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8413   %}
 8414   ins_pipe(pipe_slow); // XXX
 8415 %}
 8416 
 8417 instruct storeSSD(stackSlotD dst, regD src)
 8418 %{
 8419   match(Set dst src);
 8420 
 8421   ins_cost(95); // XXX
 8422   format %{ "movsd   $dst, $src\t# double stk" %}
 8423   ins_encode %{
 8424     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8425   %}
 8426   ins_pipe(pipe_slow); // XXX
 8427 %}
 8428 
 8429 instruct cacheWB(indirect addr)
 8430 %{
 8431   predicate(VM_Version::supports_data_cache_line_flush());
 8432   match(CacheWB addr);
 8433 
 8434   ins_cost(100);
 8435   format %{"cache wb $addr" %}
 8436   ins_encode %{
 8437     assert($addr->index_position() < 0, "should be");
 8438     assert($addr$$disp == 0, "should be");
 8439     __ cache_wb(Address($addr$$base$$Register, 0));
 8440   %}
 8441   ins_pipe(pipe_slow); // XXX
 8442 %}
 8443 
 8444 instruct cacheWBPreSync()
 8445 %{
 8446   predicate(VM_Version::supports_data_cache_line_flush());
 8447   match(CacheWBPreSync);
 8448 
 8449   ins_cost(100);
 8450   format %{"cache wb presync" %}
 8451   ins_encode %{
 8452     __ cache_wbsync(true);
 8453   %}
 8454   ins_pipe(pipe_slow); // XXX
 8455 %}
 8456 
 8457 instruct cacheWBPostSync()
 8458 %{
 8459   predicate(VM_Version::supports_data_cache_line_flush());
 8460   match(CacheWBPostSync);
 8461 
 8462   ins_cost(100);
 8463   format %{"cache wb postsync" %}
 8464   ins_encode %{
 8465     __ cache_wbsync(false);
 8466   %}
 8467   ins_pipe(pipe_slow); // XXX
 8468 %}
 8469 
 8470 //----------BSWAP Instructions-------------------------------------------------
 8471 instruct bytes_reverse_int(rRegI dst) %{
 8472   match(Set dst (ReverseBytesI dst));
 8473 
 8474   format %{ "bswapl  $dst" %}
 8475   ins_encode %{
 8476     __ bswapl($dst$$Register);
 8477   %}
 8478   ins_pipe( ialu_reg );
 8479 %}
 8480 
 8481 instruct bytes_reverse_long(rRegL dst) %{
 8482   match(Set dst (ReverseBytesL dst));
 8483 
 8484   format %{ "bswapq  $dst" %}
 8485   ins_encode %{
 8486     __ bswapq($dst$$Register);
 8487   %}
 8488   ins_pipe( ialu_reg);
 8489 %}
 8490 
 8491 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8492   match(Set dst (ReverseBytesUS dst));
 8493   effect(KILL cr);
 8494 
 8495   format %{ "bswapl  $dst\n\t"
 8496             "shrl    $dst,16\n\t" %}
 8497   ins_encode %{
 8498     __ bswapl($dst$$Register);
 8499     __ shrl($dst$$Register, 16);
 8500   %}
 8501   ins_pipe( ialu_reg );
 8502 %}
 8503 
 8504 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8505   match(Set dst (ReverseBytesS dst));
 8506   effect(KILL cr);
 8507 
 8508   format %{ "bswapl  $dst\n\t"
 8509             "sar     $dst,16\n\t" %}
 8510   ins_encode %{
 8511     __ bswapl($dst$$Register);
 8512     __ sarl($dst$$Register, 16);
 8513   %}
 8514   ins_pipe( ialu_reg );
 8515 %}
 8516 
 8517 //---------- Zeros Count Instructions ------------------------------------------
 8518 
 8519 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8520   predicate(UseCountLeadingZerosInstruction);
 8521   match(Set dst (CountLeadingZerosI src));
 8522   effect(KILL cr);
 8523 
 8524   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8525   ins_encode %{
 8526     __ lzcntl($dst$$Register, $src$$Register);
 8527   %}
 8528   ins_pipe(ialu_reg);
 8529 %}
 8530 
 8531 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8532   predicate(UseCountLeadingZerosInstruction);
 8533   match(Set dst (CountLeadingZerosI (LoadI src)));
 8534   effect(KILL cr);
 8535   ins_cost(175);
 8536   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8537   ins_encode %{
 8538     __ lzcntl($dst$$Register, $src$$Address);
 8539   %}
 8540   ins_pipe(ialu_reg_mem);
 8541 %}
 8542 
 8543 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8544   predicate(!UseCountLeadingZerosInstruction);
 8545   match(Set dst (CountLeadingZerosI src));
 8546   effect(KILL cr);
 8547 
 8548   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8549             "jnz     skip\n\t"
 8550             "movl    $dst, -1\n"
 8551       "skip:\n\t"
 8552             "negl    $dst\n\t"
 8553             "addl    $dst, 31" %}
 8554   ins_encode %{
 8555     Register Rdst = $dst$$Register;
 8556     Register Rsrc = $src$$Register;
 8557     Label skip;
 8558     __ bsrl(Rdst, Rsrc);
 8559     __ jccb(Assembler::notZero, skip);
 8560     __ movl(Rdst, -1);
 8561     __ bind(skip);
 8562     __ negl(Rdst);
 8563     __ addl(Rdst, BitsPerInt - 1);
 8564   %}
 8565   ins_pipe(ialu_reg);
 8566 %}
 8567 
 8568 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8569   predicate(UseCountLeadingZerosInstruction);
 8570   match(Set dst (CountLeadingZerosL src));
 8571   effect(KILL cr);
 8572 
 8573   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8574   ins_encode %{
 8575     __ lzcntq($dst$$Register, $src$$Register);
 8576   %}
 8577   ins_pipe(ialu_reg);
 8578 %}
 8579 
 8580 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8581   predicate(UseCountLeadingZerosInstruction);
 8582   match(Set dst (CountLeadingZerosL (LoadL src)));
 8583   effect(KILL cr);
 8584   ins_cost(175);
 8585   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8586   ins_encode %{
 8587     __ lzcntq($dst$$Register, $src$$Address);
 8588   %}
 8589   ins_pipe(ialu_reg_mem);
 8590 %}
 8591 
 8592 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8593   predicate(!UseCountLeadingZerosInstruction);
 8594   match(Set dst (CountLeadingZerosL src));
 8595   effect(KILL cr);
 8596 
 8597   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8598             "jnz     skip\n\t"
 8599             "movl    $dst, -1\n"
 8600       "skip:\n\t"
 8601             "negl    $dst\n\t"
 8602             "addl    $dst, 63" %}
 8603   ins_encode %{
 8604     Register Rdst = $dst$$Register;
 8605     Register Rsrc = $src$$Register;
 8606     Label skip;
 8607     __ bsrq(Rdst, Rsrc);
 8608     __ jccb(Assembler::notZero, skip);
 8609     __ movl(Rdst, -1);
 8610     __ bind(skip);
 8611     __ negl(Rdst);
 8612     __ addl(Rdst, BitsPerLong - 1);
 8613   %}
 8614   ins_pipe(ialu_reg);
 8615 %}
 8616 
 8617 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8618   predicate(UseCountTrailingZerosInstruction);
 8619   match(Set dst (CountTrailingZerosI src));
 8620   effect(KILL cr);
 8621 
 8622   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8623   ins_encode %{
 8624     __ tzcntl($dst$$Register, $src$$Register);
 8625   %}
 8626   ins_pipe(ialu_reg);
 8627 %}
 8628 
 8629 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8630   predicate(UseCountTrailingZerosInstruction);
 8631   match(Set dst (CountTrailingZerosI (LoadI src)));
 8632   effect(KILL cr);
 8633   ins_cost(175);
 8634   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8635   ins_encode %{
 8636     __ tzcntl($dst$$Register, $src$$Address);
 8637   %}
 8638   ins_pipe(ialu_reg_mem);
 8639 %}
 8640 
 8641 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8642   predicate(!UseCountTrailingZerosInstruction);
 8643   match(Set dst (CountTrailingZerosI src));
 8644   effect(KILL cr);
 8645 
 8646   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8647             "jnz     done\n\t"
 8648             "movl    $dst, 32\n"
 8649       "done:" %}
 8650   ins_encode %{
 8651     Register Rdst = $dst$$Register;
 8652     Label done;
 8653     __ bsfl(Rdst, $src$$Register);
 8654     __ jccb(Assembler::notZero, done);
 8655     __ movl(Rdst, BitsPerInt);
 8656     __ bind(done);
 8657   %}
 8658   ins_pipe(ialu_reg);
 8659 %}
 8660 
 8661 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8662   predicate(UseCountTrailingZerosInstruction);
 8663   match(Set dst (CountTrailingZerosL src));
 8664   effect(KILL cr);
 8665 
 8666   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8667   ins_encode %{
 8668     __ tzcntq($dst$$Register, $src$$Register);
 8669   %}
 8670   ins_pipe(ialu_reg);
 8671 %}
 8672 
 8673 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8674   predicate(UseCountTrailingZerosInstruction);
 8675   match(Set dst (CountTrailingZerosL (LoadL src)));
 8676   effect(KILL cr);
 8677   ins_cost(175);
 8678   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8679   ins_encode %{
 8680     __ tzcntq($dst$$Register, $src$$Address);
 8681   %}
 8682   ins_pipe(ialu_reg_mem);
 8683 %}
 8684 
 8685 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8686   predicate(!UseCountTrailingZerosInstruction);
 8687   match(Set dst (CountTrailingZerosL src));
 8688   effect(KILL cr);
 8689 
 8690   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8691             "jnz     done\n\t"
 8692             "movl    $dst, 64\n"
 8693       "done:" %}
 8694   ins_encode %{
 8695     Register Rdst = $dst$$Register;
 8696     Label done;
 8697     __ bsfq(Rdst, $src$$Register);
 8698     __ jccb(Assembler::notZero, done);
 8699     __ movl(Rdst, BitsPerLong);
 8700     __ bind(done);
 8701   %}
 8702   ins_pipe(ialu_reg);
 8703 %}
 8704 
 8705 //--------------- Reverse Operation Instructions ----------------
 8706 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8707   predicate(!VM_Version::supports_gfni());
 8708   match(Set dst (ReverseI src));
 8709   effect(TEMP dst, TEMP rtmp, KILL cr);
 8710   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8711   ins_encode %{
 8712     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8713   %}
 8714   ins_pipe( ialu_reg );
 8715 %}
 8716 
 8717 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8718   predicate(VM_Version::supports_gfni());
 8719   match(Set dst (ReverseI src));
 8720   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8721   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8722   ins_encode %{
 8723     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8724   %}
 8725   ins_pipe( ialu_reg );
 8726 %}
 8727 
 8728 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8729   predicate(!VM_Version::supports_gfni());
 8730   match(Set dst (ReverseL src));
 8731   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8732   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8733   ins_encode %{
 8734     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8735   %}
 8736   ins_pipe( ialu_reg );
 8737 %}
 8738 
 8739 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8740   predicate(VM_Version::supports_gfni());
 8741   match(Set dst (ReverseL src));
 8742   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8743   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8744   ins_encode %{
 8745     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8746   %}
 8747   ins_pipe( ialu_reg );
 8748 %}
 8749 
 8750 //---------- Population Count Instructions -------------------------------------
 8751 
 8752 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8753   predicate(UsePopCountInstruction);
 8754   match(Set dst (PopCountI src));
 8755   effect(KILL cr);
 8756 
 8757   format %{ "popcnt  $dst, $src" %}
 8758   ins_encode %{
 8759     __ popcntl($dst$$Register, $src$$Register);
 8760   %}
 8761   ins_pipe(ialu_reg);
 8762 %}
 8763 
 8764 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8765   predicate(UsePopCountInstruction);
 8766   match(Set dst (PopCountI (LoadI mem)));
 8767   effect(KILL cr);
 8768 
 8769   format %{ "popcnt  $dst, $mem" %}
 8770   ins_encode %{
 8771     __ popcntl($dst$$Register, $mem$$Address);
 8772   %}
 8773   ins_pipe(ialu_reg);
 8774 %}
 8775 
 8776 // Note: Long.bitCount(long) returns an int.
 8777 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8778   predicate(UsePopCountInstruction);
 8779   match(Set dst (PopCountL src));
 8780   effect(KILL cr);
 8781 
 8782   format %{ "popcnt  $dst, $src" %}
 8783   ins_encode %{
 8784     __ popcntq($dst$$Register, $src$$Register);
 8785   %}
 8786   ins_pipe(ialu_reg);
 8787 %}
 8788 
 8789 // Note: Long.bitCount(long) returns an int.
 8790 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8791   predicate(UsePopCountInstruction);
 8792   match(Set dst (PopCountL (LoadL mem)));
 8793   effect(KILL cr);
 8794 
 8795   format %{ "popcnt  $dst, $mem" %}
 8796   ins_encode %{
 8797     __ popcntq($dst$$Register, $mem$$Address);
 8798   %}
 8799   ins_pipe(ialu_reg);
 8800 %}
 8801 
 8802 
 8803 //----------MemBar Instructions-----------------------------------------------
 8804 // Memory barrier flavors
 8805 
 8806 instruct membar_acquire()
 8807 %{
 8808   match(MemBarAcquire);
 8809   match(LoadFence);
 8810   ins_cost(0);
 8811 
 8812   size(0);
 8813   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8814   ins_encode();
 8815   ins_pipe(empty);
 8816 %}
 8817 
 8818 instruct membar_acquire_lock()
 8819 %{
 8820   match(MemBarAcquireLock);
 8821   ins_cost(0);
 8822 
 8823   size(0);
 8824   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8825   ins_encode();
 8826   ins_pipe(empty);
 8827 %}
 8828 
 8829 instruct membar_release()
 8830 %{
 8831   match(MemBarRelease);
 8832   match(StoreFence);
 8833   ins_cost(0);
 8834 
 8835   size(0);
 8836   format %{ "MEMBAR-release ! (empty encoding)" %}
 8837   ins_encode();
 8838   ins_pipe(empty);
 8839 %}
 8840 
 8841 instruct membar_release_lock()
 8842 %{
 8843   match(MemBarReleaseLock);
 8844   ins_cost(0);
 8845 
 8846   size(0);
 8847   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8848   ins_encode();
 8849   ins_pipe(empty);
 8850 %}
 8851 
 8852 instruct membar_storeload(rFlagsReg cr) %{
 8853   match(MemBarStoreLoad);
 8854   effect(KILL cr);
 8855   ins_cost(400);
 8856 
 8857   format %{
 8858     $$template
 8859     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8860   %}
 8861   ins_encode %{
 8862     __ membar(Assembler::StoreLoad);
 8863   %}
 8864   ins_pipe(pipe_slow);
 8865 %}
 8866 
 8867 instruct membar_volatile(rFlagsReg cr) %{
 8868   match(MemBarVolatile);
 8869   effect(KILL cr);
 8870   ins_cost(400);
 8871 
 8872   format %{
 8873     $$template
 8874     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8875   %}
 8876   ins_encode %{
 8877     __ membar(Assembler::StoreLoad);
 8878   %}
 8879   ins_pipe(pipe_slow);
 8880 %}
 8881 
 8882 instruct unnecessary_membar_volatile()
 8883 %{
 8884   match(MemBarVolatile);
 8885   predicate(Matcher::post_store_load_barrier(n));
 8886   ins_cost(0);
 8887 
 8888   size(0);
 8889   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8890   ins_encode();
 8891   ins_pipe(empty);
 8892 %}
 8893 
 8894 instruct membar_full(rFlagsReg cr) %{
 8895   match(MemBarFull);
 8896   effect(KILL cr);
 8897   ins_cost(400);
 8898 
 8899   format %{
 8900     $$template
 8901     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8902   %}
 8903   ins_encode %{
 8904     __ membar(Assembler::StoreLoad);
 8905   %}
 8906   ins_pipe(pipe_slow);
 8907 %}
 8908 
 8909 instruct membar_storestore() %{
 8910   match(MemBarStoreStore);
 8911   match(StoreStoreFence);
 8912   ins_cost(0);
 8913 
 8914   size(0);
 8915   format %{ "MEMBAR-storestore (empty encoding)" %}
 8916   ins_encode( );
 8917   ins_pipe(empty);
 8918 %}
 8919 
 8920 //----------Move Instructions--------------------------------------------------
 8921 
 8922 instruct castX2P(rRegP dst, rRegL src)
 8923 %{
 8924   match(Set dst (CastX2P src));
 8925 
 8926   format %{ "movq    $dst, $src\t# long->ptr" %}
 8927   ins_encode %{
 8928     if ($dst$$reg != $src$$reg) {
 8929       __ movptr($dst$$Register, $src$$Register);
 8930     }
 8931   %}
 8932   ins_pipe(ialu_reg_reg); // XXX
 8933 %}
 8934 
 8935 instruct castP2X(rRegL dst, rRegP src)
 8936 %{
 8937   match(Set dst (CastP2X src));
 8938 
 8939   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8940   ins_encode %{
 8941     if ($dst$$reg != $src$$reg) {
 8942       __ movptr($dst$$Register, $src$$Register);
 8943     }
 8944   %}
 8945   ins_pipe(ialu_reg_reg); // XXX
 8946 %}
 8947 
 8948 // Convert oop into int for vectors alignment masking
 8949 instruct convP2I(rRegI dst, rRegP src)
 8950 %{
 8951   match(Set dst (ConvL2I (CastP2X src)));
 8952 
 8953   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8954   ins_encode %{
 8955     __ movl($dst$$Register, $src$$Register);
 8956   %}
 8957   ins_pipe(ialu_reg_reg); // XXX
 8958 %}
 8959 
 8960 // Convert compressed oop into int for vectors alignment masking
 8961 // in case of 32bit oops (heap < 4Gb).
 8962 instruct convN2I(rRegI dst, rRegN src)
 8963 %{
 8964   predicate(CompressedOops::shift() == 0);
 8965   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8966 
 8967   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8968   ins_encode %{
 8969     __ movl($dst$$Register, $src$$Register);
 8970   %}
 8971   ins_pipe(ialu_reg_reg); // XXX
 8972 %}
 8973 
 8974 // Convert oop pointer into compressed form
 8975 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8976   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8977   match(Set dst (EncodeP src));
 8978   effect(KILL cr);
 8979   format %{ "encode_heap_oop $dst,$src" %}
 8980   ins_encode %{
 8981     Register s = $src$$Register;
 8982     Register d = $dst$$Register;
 8983     if (s != d) {
 8984       __ movq(d, s);
 8985     }
 8986     __ encode_heap_oop(d);
 8987   %}
 8988   ins_pipe(ialu_reg_long);
 8989 %}
 8990 
 8991 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8992   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8993   match(Set dst (EncodeP src));
 8994   effect(KILL cr);
 8995   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8996   ins_encode %{
 8997     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8998   %}
 8999   ins_pipe(ialu_reg_long);
 9000 %}
 9001 
 9002 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9003   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9004             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9005   match(Set dst (DecodeN src));
 9006   effect(KILL cr);
 9007   format %{ "decode_heap_oop $dst,$src" %}
 9008   ins_encode %{
 9009     Register s = $src$$Register;
 9010     Register d = $dst$$Register;
 9011     if (s != d) {
 9012       __ movq(d, s);
 9013     }
 9014     __ decode_heap_oop(d);
 9015   %}
 9016   ins_pipe(ialu_reg_long);
 9017 %}
 9018 
 9019 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9020   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9021             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9022   match(Set dst (DecodeN src));
 9023   effect(KILL cr);
 9024   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9025   ins_encode %{
 9026     Register s = $src$$Register;
 9027     Register d = $dst$$Register;
 9028     if (s != d) {
 9029       __ decode_heap_oop_not_null(d, s);
 9030     } else {
 9031       __ decode_heap_oop_not_null(d);
 9032     }
 9033   %}
 9034   ins_pipe(ialu_reg_long);
 9035 %}
 9036 
 9037 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9038   match(Set dst (EncodePKlass src));
 9039   effect(TEMP dst, KILL cr);
 9040   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9041   ins_encode %{
 9042     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9043   %}
 9044   ins_pipe(ialu_reg_long);
 9045 %}
 9046 
 9047 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9048   match(Set dst (DecodeNKlass src));
 9049   effect(TEMP dst, KILL cr);
 9050   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9051   ins_encode %{
 9052     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9053   %}
 9054   ins_pipe(ialu_reg_long);
 9055 %}
 9056 
 9057 //----------Conditional Move---------------------------------------------------
 9058 // Jump
 9059 // dummy instruction for generating temp registers
 9060 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9061   match(Jump (LShiftL switch_val shift));
 9062   ins_cost(350);
 9063   predicate(false);
 9064   effect(TEMP dest);
 9065 
 9066   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9067             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9068   ins_encode %{
 9069     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9070     // to do that and the compiler is using that register as one it can allocate.
 9071     // So we build it all by hand.
 9072     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9073     // ArrayAddress dispatch(table, index);
 9074     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9075     __ lea($dest$$Register, $constantaddress);
 9076     __ jmp(dispatch);
 9077   %}
 9078   ins_pipe(pipe_jmp);
 9079 %}
 9080 
 9081 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9082   match(Jump (AddL (LShiftL switch_val shift) offset));
 9083   ins_cost(350);
 9084   effect(TEMP dest);
 9085 
 9086   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9087             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9088   ins_encode %{
 9089     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9090     // to do that and the compiler is using that register as one it can allocate.
 9091     // So we build it all by hand.
 9092     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9093     // ArrayAddress dispatch(table, index);
 9094     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9095     __ lea($dest$$Register, $constantaddress);
 9096     __ jmp(dispatch);
 9097   %}
 9098   ins_pipe(pipe_jmp);
 9099 %}
 9100 
 9101 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9102   match(Jump switch_val);
 9103   ins_cost(350);
 9104   effect(TEMP dest);
 9105 
 9106   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9107             "jmp     [$dest + $switch_val]\n\t" %}
 9108   ins_encode %{
 9109     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9110     // to do that and the compiler is using that register as one it can allocate.
 9111     // So we build it all by hand.
 9112     // Address index(noreg, switch_reg, Address::times_1);
 9113     // ArrayAddress dispatch(table, index);
 9114     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9115     __ lea($dest$$Register, $constantaddress);
 9116     __ jmp(dispatch);
 9117   %}
 9118   ins_pipe(pipe_jmp);
 9119 %}
 9120 
 9121 // Conditional move
 9122 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9123 %{
 9124   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9125   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9126 
 9127   ins_cost(100); // XXX
 9128   format %{ "setbn$cop $dst\t# signed, int" %}
 9129   ins_encode %{
 9130     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9131     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9132   %}
 9133   ins_pipe(ialu_reg);
 9134 %}
 9135 
 9136 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9137 %{
 9138   predicate(!UseAPX);
 9139   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9140 
 9141   ins_cost(200); // XXX
 9142   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9143   ins_encode %{
 9144     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9145   %}
 9146   ins_pipe(pipe_cmov_reg);
 9147 %}
 9148 
 9149 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9150 %{
 9151   predicate(UseAPX);
 9152   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9153 
 9154   ins_cost(200);
 9155   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9156   ins_encode %{
 9157     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9158   %}
 9159   ins_pipe(pipe_cmov_reg);
 9160 %}
 9161 
 9162 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9163 %{
 9164   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9165   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9166 
 9167   ins_cost(100); // XXX
 9168   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9169   ins_encode %{
 9170     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9171     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9172   %}
 9173   ins_pipe(ialu_reg);
 9174 %}
 9175 
 9176 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9177   predicate(!UseAPX);
 9178   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9179 
 9180   ins_cost(200); // XXX
 9181   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9182   ins_encode %{
 9183     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9184   %}
 9185   ins_pipe(pipe_cmov_reg);
 9186 %}
 9187 
 9188 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9189   predicate(UseAPX);
 9190   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9191 
 9192   ins_cost(200);
 9193   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9194   ins_encode %{
 9195     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9196   %}
 9197   ins_pipe(pipe_cmov_reg);
 9198 %}
 9199 
 9200 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9201 %{
 9202   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9203   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9204 
 9205   ins_cost(100); // XXX
 9206   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9207   ins_encode %{
 9208     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9209     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9210   %}
 9211   ins_pipe(ialu_reg);
 9212 %}
 9213 
 9214 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9215 %{
 9216   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9217   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9218 
 9219   ins_cost(100); // XXX
 9220   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9221   ins_encode %{
 9222     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9223     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9224   %}
 9225   ins_pipe(ialu_reg);
 9226 %}
 9227 
 9228 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9229   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9230 
 9231   ins_cost(200);
 9232   expand %{
 9233     cmovI_regU(cop, cr, dst, src);
 9234   %}
 9235 %}
 9236 
 9237 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9238   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9239 
 9240   ins_cost(200);
 9241   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9242   ins_encode %{
 9243     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9244   %}
 9245   ins_pipe(pipe_cmov_reg);
 9246 %}
 9247 
 9248 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9249   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9250   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9251 
 9252   ins_cost(200); // XXX
 9253   format %{ "cmovpl  $dst, $src\n\t"
 9254             "cmovnel $dst, $src" %}
 9255   ins_encode %{
 9256     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9257     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9258   %}
 9259   ins_pipe(pipe_cmov_reg);
 9260 %}
 9261 
 9262 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9263 // inputs of the CMove
 9264 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9265   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9266   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9267   effect(TEMP dst);
 9268 
 9269   ins_cost(200); // XXX
 9270   format %{ "cmovpl  $dst, $src\n\t"
 9271             "cmovnel $dst, $src" %}
 9272   ins_encode %{
 9273     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9274     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9275   %}
 9276   ins_pipe(pipe_cmov_reg);
 9277 %}
 9278 
 9279 // Conditional move
 9280 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9281   predicate(!UseAPX);
 9282   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9283 
 9284   ins_cost(250); // XXX
 9285   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9286   ins_encode %{
 9287     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9288   %}
 9289   ins_pipe(pipe_cmov_mem);
 9290 %}
 9291 
 9292 // Conditional move
 9293 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9294 %{
 9295   predicate(UseAPX);
 9296   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9297 
 9298   ins_cost(250);
 9299   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9300   ins_encode %{
 9301     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9302   %}
 9303   ins_pipe(pipe_cmov_mem);
 9304 %}
 9305 
 9306 // Conditional move
 9307 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9308 %{
 9309   predicate(!UseAPX);
 9310   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9311 
 9312   ins_cost(250); // XXX
 9313   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9314   ins_encode %{
 9315     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9316   %}
 9317   ins_pipe(pipe_cmov_mem);
 9318 %}
 9319 
 9320 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9321   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9322 
 9323   ins_cost(250);
 9324   expand %{
 9325     cmovI_memU(cop, cr, dst, src);
 9326   %}
 9327 %}
 9328 
 9329 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9330 %{
 9331   predicate(UseAPX);
 9332   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9333 
 9334   ins_cost(250);
 9335   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9336   ins_encode %{
 9337     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9338   %}
 9339   ins_pipe(pipe_cmov_mem);
 9340 %}
 9341 
 9342 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9343 %{
 9344   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9345 
 9346   ins_cost(250);
 9347   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9348   ins_encode %{
 9349     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9350   %}
 9351   ins_pipe(pipe_cmov_mem);
 9352 %}
 9353 
 9354 // Conditional move
 9355 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9356 %{
 9357   predicate(!UseAPX);
 9358   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9359 
 9360   ins_cost(200); // XXX
 9361   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9362   ins_encode %{
 9363     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9364   %}
 9365   ins_pipe(pipe_cmov_reg);
 9366 %}
 9367 
 9368 // Conditional move ndd
 9369 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9370 %{
 9371   predicate(UseAPX);
 9372   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9373 
 9374   ins_cost(200);
 9375   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9376   ins_encode %{
 9377     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9378   %}
 9379   ins_pipe(pipe_cmov_reg);
 9380 %}
 9381 
 9382 // Conditional move
 9383 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9384 %{
 9385   predicate(!UseAPX);
 9386   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9387 
 9388   ins_cost(200); // XXX
 9389   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9390   ins_encode %{
 9391     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9392   %}
 9393   ins_pipe(pipe_cmov_reg);
 9394 %}
 9395 
 9396 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9397   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9398 
 9399   ins_cost(200);
 9400   expand %{
 9401     cmovN_regU(cop, cr, dst, src);
 9402   %}
 9403 %}
 9404 
 9405 // Conditional move ndd
 9406 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9407 %{
 9408   predicate(UseAPX);
 9409   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9410 
 9411   ins_cost(200);
 9412   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9413   ins_encode %{
 9414     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9415   %}
 9416   ins_pipe(pipe_cmov_reg);
 9417 %}
 9418 
 9419 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9420   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9421 
 9422   ins_cost(200);
 9423   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9424   ins_encode %{
 9425     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9426   %}
 9427   ins_pipe(pipe_cmov_reg);
 9428 %}
 9429 
 9430 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9431   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9432   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9433 
 9434   ins_cost(200); // XXX
 9435   format %{ "cmovpl  $dst, $src\n\t"
 9436             "cmovnel $dst, $src" %}
 9437   ins_encode %{
 9438     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9439     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9440   %}
 9441   ins_pipe(pipe_cmov_reg);
 9442 %}
 9443 
 9444 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9445 // inputs of the CMove
 9446 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9447   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9448   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9449 
 9450   ins_cost(200); // XXX
 9451   format %{ "cmovpl  $dst, $src\n\t"
 9452             "cmovnel $dst, $src" %}
 9453   ins_encode %{
 9454     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9455     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9456   %}
 9457   ins_pipe(pipe_cmov_reg);
 9458 %}
 9459 
 9460 // Conditional move
 9461 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9462 %{
 9463   predicate(!UseAPX);
 9464   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9465 
 9466   ins_cost(200); // XXX
 9467   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9468   ins_encode %{
 9469     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9470   %}
 9471   ins_pipe(pipe_cmov_reg);  // XXX
 9472 %}
 9473 
 9474 // Conditional move ndd
 9475 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9476 %{
 9477   predicate(UseAPX);
 9478   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9479 
 9480   ins_cost(200);
 9481   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9482   ins_encode %{
 9483     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9484   %}
 9485   ins_pipe(pipe_cmov_reg);
 9486 %}
 9487 
 9488 // Conditional move
 9489 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9490 %{
 9491   predicate(!UseAPX);
 9492   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9493 
 9494   ins_cost(200); // XXX
 9495   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9496   ins_encode %{
 9497     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9498   %}
 9499   ins_pipe(pipe_cmov_reg); // XXX
 9500 %}
 9501 
 9502 // Conditional move ndd
 9503 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9504 %{
 9505   predicate(UseAPX);
 9506   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9507 
 9508   ins_cost(200);
 9509   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9510   ins_encode %{
 9511     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9512   %}
 9513   ins_pipe(pipe_cmov_reg);
 9514 %}
 9515 
 9516 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9517   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9518 
 9519   ins_cost(200);
 9520   expand %{
 9521     cmovP_regU(cop, cr, dst, src);
 9522   %}
 9523 %}
 9524 
 9525 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9526   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9527 
 9528   ins_cost(200);
 9529   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9530   ins_encode %{
 9531     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9532   %}
 9533   ins_pipe(pipe_cmov_reg);
 9534 %}
 9535 
 9536 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9537   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9538   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9539 
 9540   ins_cost(200); // XXX
 9541   format %{ "cmovpq  $dst, $src\n\t"
 9542             "cmovneq $dst, $src" %}
 9543   ins_encode %{
 9544     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9545     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9546   %}
 9547   ins_pipe(pipe_cmov_reg);
 9548 %}
 9549 
 9550 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9551 // inputs of the CMove
 9552 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9553   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9554   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9555 
 9556   ins_cost(200); // XXX
 9557   format %{ "cmovpq  $dst, $src\n\t"
 9558             "cmovneq $dst, $src" %}
 9559   ins_encode %{
 9560     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9561     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9562   %}
 9563   ins_pipe(pipe_cmov_reg);
 9564 %}
 9565 
 9566 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9567 %{
 9568   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9569   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9570 
 9571   ins_cost(100); // XXX
 9572   format %{ "setbn$cop $dst\t# signed, long" %}
 9573   ins_encode %{
 9574     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9575     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9576   %}
 9577   ins_pipe(ialu_reg);
 9578 %}
 9579 
 9580 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9581 %{
 9582   predicate(!UseAPX);
 9583   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9584 
 9585   ins_cost(200); // XXX
 9586   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9587   ins_encode %{
 9588     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9589   %}
 9590   ins_pipe(pipe_cmov_reg);  // XXX
 9591 %}
 9592 
 9593 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9594 %{
 9595   predicate(UseAPX);
 9596   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9597 
 9598   ins_cost(200);
 9599   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9600   ins_encode %{
 9601     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9602   %}
 9603   ins_pipe(pipe_cmov_reg);
 9604 %}
 9605 
 9606 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9607 %{
 9608   predicate(!UseAPX);
 9609   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9610 
 9611   ins_cost(200); // XXX
 9612   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9613   ins_encode %{
 9614     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9615   %}
 9616   ins_pipe(pipe_cmov_mem);  // XXX
 9617 %}
 9618 
 9619 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9620 %{
 9621   predicate(UseAPX);
 9622   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9623 
 9624   ins_cost(200);
 9625   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9626   ins_encode %{
 9627     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9628   %}
 9629   ins_pipe(pipe_cmov_mem);
 9630 %}
 9631 
 9632 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9633 %{
 9634   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9635   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9636 
 9637   ins_cost(100); // XXX
 9638   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9639   ins_encode %{
 9640     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9641     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9642   %}
 9643   ins_pipe(ialu_reg);
 9644 %}
 9645 
 9646 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9647 %{
 9648   predicate(!UseAPX);
 9649   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9650 
 9651   ins_cost(200); // XXX
 9652   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9653   ins_encode %{
 9654     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9655   %}
 9656   ins_pipe(pipe_cmov_reg); // XXX
 9657 %}
 9658 
 9659 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9660 %{
 9661   predicate(UseAPX);
 9662   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9663 
 9664   ins_cost(200);
 9665   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9666   ins_encode %{
 9667     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9668   %}
 9669   ins_pipe(pipe_cmov_reg);
 9670 %}
 9671 
 9672 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9673 %{
 9674   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9675   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9676 
 9677   ins_cost(100); // XXX
 9678   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9679   ins_encode %{
 9680     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9681     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9682   %}
 9683   ins_pipe(ialu_reg);
 9684 %}
 9685 
 9686 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9687 %{
 9688   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9689   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9690 
 9691   ins_cost(100); // XXX
 9692   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9693   ins_encode %{
 9694     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9695     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9696   %}
 9697   ins_pipe(ialu_reg);
 9698 %}
 9699 
 9700 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9701   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9702 
 9703   ins_cost(200);
 9704   expand %{
 9705     cmovL_regU(cop, cr, dst, src);
 9706   %}
 9707 %}
 9708 
 9709 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9710 %{
 9711   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9712 
 9713   ins_cost(200);
 9714   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9715   ins_encode %{
 9716     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9717   %}
 9718   ins_pipe(pipe_cmov_reg);
 9719 %}
 9720 
 9721 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9722   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9723   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9724 
 9725   ins_cost(200); // XXX
 9726   format %{ "cmovpq  $dst, $src\n\t"
 9727             "cmovneq $dst, $src" %}
 9728   ins_encode %{
 9729     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9730     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9731   %}
 9732   ins_pipe(pipe_cmov_reg);
 9733 %}
 9734 
 9735 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9736 // inputs of the CMove
 9737 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9738   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9739   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9740 
 9741   ins_cost(200); // XXX
 9742   format %{ "cmovpq  $dst, $src\n\t"
 9743             "cmovneq $dst, $src" %}
 9744   ins_encode %{
 9745     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9746     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9747   %}
 9748   ins_pipe(pipe_cmov_reg);
 9749 %}
 9750 
 9751 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9752 %{
 9753   predicate(!UseAPX);
 9754   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9755 
 9756   ins_cost(200); // XXX
 9757   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9758   ins_encode %{
 9759     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9760   %}
 9761   ins_pipe(pipe_cmov_mem); // XXX
 9762 %}
 9763 
 9764 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9765   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9766 
 9767   ins_cost(200);
 9768   expand %{
 9769     cmovL_memU(cop, cr, dst, src);
 9770   %}
 9771 %}
 9772 
 9773 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9774 %{
 9775   predicate(UseAPX);
 9776   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9777 
 9778   ins_cost(200);
 9779   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9780   ins_encode %{
 9781     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9782   %}
 9783   ins_pipe(pipe_cmov_mem);
 9784 %}
 9785 
 9786 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9787 %{
 9788   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9789 
 9790   ins_cost(200);
 9791   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9792   ins_encode %{
 9793     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9794   %}
 9795   ins_pipe(pipe_cmov_mem);
 9796 %}
 9797 
 9798 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9799 %{
 9800   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9801 
 9802   ins_cost(200); // XXX
 9803   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9804             "movss     $dst, $src\n"
 9805     "skip:" %}
 9806   ins_encode %{
 9807     Label Lskip;
 9808     // Invert sense of branch from sense of CMOV
 9809     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9810     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9811     __ bind(Lskip);
 9812   %}
 9813   ins_pipe(pipe_slow);
 9814 %}
 9815 
 9816 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9817 %{
 9818   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9819 
 9820   ins_cost(200); // XXX
 9821   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9822             "movss     $dst, $src\n"
 9823     "skip:" %}
 9824   ins_encode %{
 9825     Label Lskip;
 9826     // Invert sense of branch from sense of CMOV
 9827     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9828     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9829     __ bind(Lskip);
 9830   %}
 9831   ins_pipe(pipe_slow);
 9832 %}
 9833 
 9834 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9835   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9836 
 9837   ins_cost(200);
 9838   expand %{
 9839     cmovF_regU(cop, cr, dst, src);
 9840   %}
 9841 %}
 9842 
 9843 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9844 %{
 9845   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9846 
 9847   ins_cost(200); // XXX
 9848   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9849             "movss     $dst, $src\n"
 9850     "skip:" %}
 9851   ins_encode %{
 9852     Label Lskip;
 9853     // Invert sense of branch from sense of CMOV
 9854     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9855     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9856     __ bind(Lskip);
 9857   %}
 9858   ins_pipe(pipe_slow);
 9859 %}
 9860 
 9861 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9862 %{
 9863   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9864 
 9865   ins_cost(200); // XXX
 9866   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9867             "movsd     $dst, $src\n"
 9868     "skip:" %}
 9869   ins_encode %{
 9870     Label Lskip;
 9871     // Invert sense of branch from sense of CMOV
 9872     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9873     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9874     __ bind(Lskip);
 9875   %}
 9876   ins_pipe(pipe_slow);
 9877 %}
 9878 
 9879 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9880 %{
 9881   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9882 
 9883   ins_cost(200); // XXX
 9884   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9885             "movsd     $dst, $src\n"
 9886     "skip:" %}
 9887   ins_encode %{
 9888     Label Lskip;
 9889     // Invert sense of branch from sense of CMOV
 9890     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9891     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9892     __ bind(Lskip);
 9893   %}
 9894   ins_pipe(pipe_slow);
 9895 %}
 9896 
 9897 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9899 
 9900   ins_cost(200);
 9901   expand %{
 9902     cmovD_regU(cop, cr, dst, src);
 9903   %}
 9904 %}
 9905 
 9906 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9907 %{
 9908   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9909 
 9910   ins_cost(200); // XXX
 9911   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9912             "movsd     $dst, $src\n"
 9913     "skip:" %}
 9914   ins_encode %{
 9915     Label Lskip;
 9916     // Invert sense of branch from sense of CMOV
 9917     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9918     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9919     __ bind(Lskip);
 9920   %}
 9921   ins_pipe(pipe_slow);
 9922 %}
 9923 
 9924 //----------Arithmetic Instructions--------------------------------------------
 9925 //----------Addition Instructions----------------------------------------------
 9926 
 9927 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9928 %{
 9929   predicate(!UseAPX);
 9930   match(Set dst (AddI dst src));
 9931   effect(KILL cr);
 9932   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9933   format %{ "addl    $dst, $src\t# int" %}
 9934   ins_encode %{
 9935     __ addl($dst$$Register, $src$$Register);
 9936   %}
 9937   ins_pipe(ialu_reg_reg);
 9938 %}
 9939 
 9940 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9941 %{
 9942   predicate(UseAPX);
 9943   match(Set dst (AddI src1 src2));
 9944   effect(KILL cr);
 9945   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9946 
 9947   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9948   ins_encode %{
 9949     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9950   %}
 9951   ins_pipe(ialu_reg_reg);
 9952 %}
 9953 
 9954 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9955 %{
 9956   predicate(!UseAPX);
 9957   match(Set dst (AddI dst src));
 9958   effect(KILL cr);
 9959   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9960 
 9961   format %{ "addl    $dst, $src\t# int" %}
 9962   ins_encode %{
 9963     __ addl($dst$$Register, $src$$constant);
 9964   %}
 9965   ins_pipe( ialu_reg );
 9966 %}
 9967 
 9968 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9969 %{
 9970   predicate(UseAPX);
 9971   match(Set dst (AddI src1 src2));
 9972   effect(KILL cr);
 9973   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9974 
 9975   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9976   ins_encode %{
 9977     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9978   %}
 9979   ins_pipe( ialu_reg );
 9980 %}
 9981 
 9982 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9983 %{
 9984   predicate(UseAPX);
 9985   match(Set dst (AddI (LoadI src1) src2));
 9986   effect(KILL cr);
 9987   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9988 
 9989   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9990   ins_encode %{
 9991     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9992   %}
 9993   ins_pipe( ialu_reg );
 9994 %}
 9995 
 9996 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9997 %{
 9998   predicate(!UseAPX);
 9999   match(Set dst (AddI dst (LoadI src)));
10000   effect(KILL cr);
10001   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10002 
10003   ins_cost(150); // XXX
10004   format %{ "addl    $dst, $src\t# int" %}
10005   ins_encode %{
10006     __ addl($dst$$Register, $src$$Address);
10007   %}
10008   ins_pipe(ialu_reg_mem);
10009 %}
10010 
10011 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10012 %{
10013   predicate(UseAPX);
10014   match(Set dst (AddI src1 (LoadI src2)));
10015   effect(KILL cr);
10016   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10017 
10018   ins_cost(150);
10019   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10020   ins_encode %{
10021     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10022   %}
10023   ins_pipe(ialu_reg_mem);
10024 %}
10025 
10026 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10027 %{
10028   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029   effect(KILL cr);
10030   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10031 
10032   ins_cost(150); // XXX
10033   format %{ "addl    $dst, $src\t# int" %}
10034   ins_encode %{
10035     __ addl($dst$$Address, $src$$Register);
10036   %}
10037   ins_pipe(ialu_mem_reg);
10038 %}
10039 
10040 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10041 %{
10042   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10043   effect(KILL cr);
10044   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10045 
10046 
10047   ins_cost(125); // XXX
10048   format %{ "addl    $dst, $src\t# int" %}
10049   ins_encode %{
10050     __ addl($dst$$Address, $src$$constant);
10051   %}
10052   ins_pipe(ialu_mem_imm);
10053 %}
10054 
10055 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10056 %{
10057   predicate(!UseAPX && UseIncDec);
10058   match(Set dst (AddI dst src));
10059   effect(KILL cr);
10060 
10061   format %{ "incl    $dst\t# int" %}
10062   ins_encode %{
10063     __ incrementl($dst$$Register);
10064   %}
10065   ins_pipe(ialu_reg);
10066 %}
10067 
10068 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10069 %{
10070   predicate(UseAPX && UseIncDec);
10071   match(Set dst (AddI src val));
10072   effect(KILL cr);
10073   flag(PD::Flag_ndd_demotable_opr1);
10074 
10075   format %{ "eincl    $dst, $src\t# int ndd" %}
10076   ins_encode %{
10077     __ eincl($dst$$Register, $src$$Register, false);
10078   %}
10079   ins_pipe(ialu_reg);
10080 %}
10081 
10082 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10083 %{
10084   predicate(UseAPX && UseIncDec);
10085   match(Set dst (AddI (LoadI src) val));
10086   effect(KILL cr);
10087 
10088   format %{ "eincl    $dst, $src\t# int ndd" %}
10089   ins_encode %{
10090     __ eincl($dst$$Register, $src$$Address, false);
10091   %}
10092   ins_pipe(ialu_reg);
10093 %}
10094 
10095 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10096 %{
10097   predicate(UseIncDec);
10098   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10099   effect(KILL cr);
10100 
10101   ins_cost(125); // XXX
10102   format %{ "incl    $dst\t# int" %}
10103   ins_encode %{
10104     __ incrementl($dst$$Address);
10105   %}
10106   ins_pipe(ialu_mem_imm);
10107 %}
10108 
10109 // XXX why does that use AddI
10110 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10111 %{
10112   predicate(!UseAPX && UseIncDec);
10113   match(Set dst (AddI dst src));
10114   effect(KILL cr);
10115 
10116   format %{ "decl    $dst\t# int" %}
10117   ins_encode %{
10118     __ decrementl($dst$$Register);
10119   %}
10120   ins_pipe(ialu_reg);
10121 %}
10122 
10123 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10124 %{
10125   predicate(UseAPX && UseIncDec);
10126   match(Set dst (AddI src val));
10127   effect(KILL cr);
10128   flag(PD::Flag_ndd_demotable_opr1);
10129 
10130   format %{ "edecl    $dst, $src\t# int ndd" %}
10131   ins_encode %{
10132     __ edecl($dst$$Register, $src$$Register, false);
10133   %}
10134   ins_pipe(ialu_reg);
10135 %}
10136 
10137 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10138 %{
10139   predicate(UseAPX && UseIncDec);
10140   match(Set dst (AddI (LoadI src) val));
10141   effect(KILL cr);
10142 
10143   format %{ "edecl    $dst, $src\t# int ndd" %}
10144   ins_encode %{
10145     __ edecl($dst$$Register, $src$$Address, false);
10146   %}
10147   ins_pipe(ialu_reg);
10148 %}
10149 
10150 // XXX why does that use AddI
10151 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10152 %{
10153   predicate(UseIncDec);
10154   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10155   effect(KILL cr);
10156 
10157   ins_cost(125); // XXX
10158   format %{ "decl    $dst\t# int" %}
10159   ins_encode %{
10160     __ decrementl($dst$$Address);
10161   %}
10162   ins_pipe(ialu_mem_imm);
10163 %}
10164 
10165 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10166 %{
10167   predicate(VM_Version::supports_fast_2op_lea());
10168   match(Set dst (AddI (LShiftI index scale) disp));
10169 
10170   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10171   ins_encode %{
10172     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10173     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10174   %}
10175   ins_pipe(ialu_reg_reg);
10176 %}
10177 
10178 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10179 %{
10180   predicate(VM_Version::supports_fast_3op_lea());
10181   match(Set dst (AddI (AddI base index) disp));
10182 
10183   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10184   ins_encode %{
10185     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10186   %}
10187   ins_pipe(ialu_reg_reg);
10188 %}
10189 
10190 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10191 %{
10192   predicate(VM_Version::supports_fast_2op_lea());
10193   match(Set dst (AddI base (LShiftI index scale)));
10194 
10195   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10196   ins_encode %{
10197     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10198     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10199   %}
10200   ins_pipe(ialu_reg_reg);
10201 %}
10202 
10203 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10204 %{
10205   predicate(VM_Version::supports_fast_3op_lea());
10206   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10207 
10208   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10209   ins_encode %{
10210     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10211     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10212   %}
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10217 %{
10218   predicate(!UseAPX);
10219   match(Set dst (AddL dst src));
10220   effect(KILL cr);
10221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222 
10223   format %{ "addq    $dst, $src\t# long" %}
10224   ins_encode %{
10225     __ addq($dst$$Register, $src$$Register);
10226   %}
10227   ins_pipe(ialu_reg_reg);
10228 %}
10229 
10230 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10231 %{
10232   predicate(UseAPX);
10233   match(Set dst (AddL src1 src2));
10234   effect(KILL cr);
10235   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10236 
10237   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10238   ins_encode %{
10239     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10240   %}
10241   ins_pipe(ialu_reg_reg);
10242 %}
10243 
10244 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10245 %{
10246   predicate(!UseAPX);
10247   match(Set dst (AddL dst src));
10248   effect(KILL cr);
10249   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250 
10251   format %{ "addq    $dst, $src\t# long" %}
10252   ins_encode %{
10253     __ addq($dst$$Register, $src$$constant);
10254   %}
10255   ins_pipe( ialu_reg );
10256 %}
10257 
10258 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10259 %{
10260   predicate(UseAPX);
10261   match(Set dst (AddL src1 src2));
10262   effect(KILL cr);
10263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10264 
10265   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10266   ins_encode %{
10267     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10268   %}
10269   ins_pipe( ialu_reg );
10270 %}
10271 
10272 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10273 %{
10274   predicate(UseAPX);
10275   match(Set dst (AddL (LoadL src1) src2));
10276   effect(KILL cr);
10277   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10278 
10279   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10280   ins_encode %{
10281     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10282   %}
10283   ins_pipe( ialu_reg );
10284 %}
10285 
10286 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10287 %{
10288   predicate(!UseAPX);
10289   match(Set dst (AddL dst (LoadL src)));
10290   effect(KILL cr);
10291   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10292 
10293   ins_cost(150); // XXX
10294   format %{ "addq    $dst, $src\t# long" %}
10295   ins_encode %{
10296     __ addq($dst$$Register, $src$$Address);
10297   %}
10298   ins_pipe(ialu_reg_mem);
10299 %}
10300 
10301 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10302 %{
10303   predicate(UseAPX);
10304   match(Set dst (AddL src1 (LoadL src2)));
10305   effect(KILL cr);
10306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10307 
10308   ins_cost(150);
10309   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10310   ins_encode %{
10311     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10312   %}
10313   ins_pipe(ialu_reg_mem);
10314 %}
10315 
10316 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10317 %{
10318   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319   effect(KILL cr);
10320   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10321 
10322   ins_cost(150); // XXX
10323   format %{ "addq    $dst, $src\t# long" %}
10324   ins_encode %{
10325     __ addq($dst$$Address, $src$$Register);
10326   %}
10327   ins_pipe(ialu_mem_reg);
10328 %}
10329 
10330 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10331 %{
10332   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10333   effect(KILL cr);
10334   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10335 
10336   ins_cost(125); // XXX
10337   format %{ "addq    $dst, $src\t# long" %}
10338   ins_encode %{
10339     __ addq($dst$$Address, $src$$constant);
10340   %}
10341   ins_pipe(ialu_mem_imm);
10342 %}
10343 
10344 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10345 %{
10346   predicate(!UseAPX && UseIncDec);
10347   match(Set dst (AddL dst src));
10348   effect(KILL cr);
10349 
10350   format %{ "incq    $dst\t# long" %}
10351   ins_encode %{
10352     __ incrementq($dst$$Register);
10353   %}
10354   ins_pipe(ialu_reg);
10355 %}
10356 
10357 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10358 %{
10359   predicate(UseAPX && UseIncDec);
10360   match(Set dst (AddL src val));
10361   effect(KILL cr);
10362   flag(PD::Flag_ndd_demotable_opr1);
10363 
10364   format %{ "eincq    $dst, $src\t# long ndd" %}
10365   ins_encode %{
10366     __ eincq($dst$$Register, $src$$Register, false);
10367   %}
10368   ins_pipe(ialu_reg);
10369 %}
10370 
10371 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10372 %{
10373   predicate(UseAPX && UseIncDec);
10374   match(Set dst (AddL (LoadL src) val));
10375   effect(KILL cr);
10376 
10377   format %{ "eincq    $dst, $src\t# long ndd" %}
10378   ins_encode %{
10379     __ eincq($dst$$Register, $src$$Address, false);
10380   %}
10381   ins_pipe(ialu_reg);
10382 %}
10383 
10384 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10385 %{
10386   predicate(UseIncDec);
10387   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10388   effect(KILL cr);
10389 
10390   ins_cost(125); // XXX
10391   format %{ "incq    $dst\t# long" %}
10392   ins_encode %{
10393     __ incrementq($dst$$Address);
10394   %}
10395   ins_pipe(ialu_mem_imm);
10396 %}
10397 
10398 // XXX why does that use AddL
10399 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10400 %{
10401   predicate(!UseAPX && UseIncDec);
10402   match(Set dst (AddL dst src));
10403   effect(KILL cr);
10404 
10405   format %{ "decq    $dst\t# long" %}
10406   ins_encode %{
10407     __ decrementq($dst$$Register);
10408   %}
10409   ins_pipe(ialu_reg);
10410 %}
10411 
10412 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10413 %{
10414   predicate(UseAPX && UseIncDec);
10415   match(Set dst (AddL src val));
10416   effect(KILL cr);
10417   flag(PD::Flag_ndd_demotable_opr1);
10418 
10419   format %{ "edecq    $dst, $src\t# long ndd" %}
10420   ins_encode %{
10421     __ edecq($dst$$Register, $src$$Register, false);
10422   %}
10423   ins_pipe(ialu_reg);
10424 %}
10425 
10426 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10427 %{
10428   predicate(UseAPX && UseIncDec);
10429   match(Set dst (AddL (LoadL src) val));
10430   effect(KILL cr);
10431 
10432   format %{ "edecq    $dst, $src\t# long ndd" %}
10433   ins_encode %{
10434     __ edecq($dst$$Register, $src$$Address, false);
10435   %}
10436   ins_pipe(ialu_reg);
10437 %}
10438 
10439 // XXX why does that use AddL
10440 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10441 %{
10442   predicate(UseIncDec);
10443   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10444   effect(KILL cr);
10445 
10446   ins_cost(125); // XXX
10447   format %{ "decq    $dst\t# long" %}
10448   ins_encode %{
10449     __ decrementq($dst$$Address);
10450   %}
10451   ins_pipe(ialu_mem_imm);
10452 %}
10453 
10454 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10455 %{
10456   predicate(VM_Version::supports_fast_2op_lea());
10457   match(Set dst (AddL (LShiftL index scale) disp));
10458 
10459   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10460   ins_encode %{
10461     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10462     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10463   %}
10464   ins_pipe(ialu_reg_reg);
10465 %}
10466 
10467 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10468 %{
10469   predicate(VM_Version::supports_fast_3op_lea());
10470   match(Set dst (AddL (AddL base index) disp));
10471 
10472   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10473   ins_encode %{
10474     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10475   %}
10476   ins_pipe(ialu_reg_reg);
10477 %}
10478 
10479 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10480 %{
10481   predicate(VM_Version::supports_fast_2op_lea());
10482   match(Set dst (AddL base (LShiftL index scale)));
10483 
10484   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10485   ins_encode %{
10486     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10487     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10488   %}
10489   ins_pipe(ialu_reg_reg);
10490 %}
10491 
10492 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10493 %{
10494   predicate(VM_Version::supports_fast_3op_lea());
10495   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10496 
10497   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10498   ins_encode %{
10499     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10500     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10501   %}
10502   ins_pipe(ialu_reg_reg);
10503 %}
10504 
10505 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10506 %{
10507   match(Set dst (AddP dst src));
10508   effect(KILL cr);
10509   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10510 
10511   format %{ "addq    $dst, $src\t# ptr" %}
10512   ins_encode %{
10513     __ addq($dst$$Register, $src$$Register);
10514   %}
10515   ins_pipe(ialu_reg_reg);
10516 %}
10517 
10518 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10519 %{
10520   match(Set dst (AddP dst src));
10521   effect(KILL cr);
10522   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10523 
10524   format %{ "addq    $dst, $src\t# ptr" %}
10525   ins_encode %{
10526     __ addq($dst$$Register, $src$$constant);
10527   %}
10528   ins_pipe( ialu_reg );
10529 %}
10530 
10531 // XXX addP mem ops ????
10532 
10533 instruct checkCastPP(rRegP dst)
10534 %{
10535   match(Set dst (CheckCastPP dst));
10536 
10537   size(0);
10538   format %{ "# checkcastPP of $dst" %}
10539   ins_encode(/* empty encoding */);
10540   ins_pipe(empty);
10541 %}
10542 
10543 instruct castPP(rRegP dst)
10544 %{
10545   match(Set dst (CastPP dst));
10546 
10547   size(0);
10548   format %{ "# castPP of $dst" %}
10549   ins_encode(/* empty encoding */);
10550   ins_pipe(empty);
10551 %}
10552 
10553 instruct castII(rRegI dst)
10554 %{
10555   predicate(VerifyConstraintCasts == 0);
10556   match(Set dst (CastII dst));
10557 
10558   size(0);
10559   format %{ "# castII of $dst" %}
10560   ins_encode(/* empty encoding */);
10561   ins_cost(0);
10562   ins_pipe(empty);
10563 %}
10564 
10565 instruct castII_checked(rRegI dst, rFlagsReg cr)
10566 %{
10567   predicate(VerifyConstraintCasts > 0);
10568   match(Set dst (CastII dst));
10569 
10570   effect(KILL cr);
10571   format %{ "# cast_checked_II $dst" %}
10572   ins_encode %{
10573     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10574   %}
10575   ins_pipe(pipe_slow);
10576 %}
10577 
10578 instruct castLL(rRegL dst)
10579 %{
10580   predicate(VerifyConstraintCasts == 0);
10581   match(Set dst (CastLL dst));
10582 
10583   size(0);
10584   format %{ "# castLL of $dst" %}
10585   ins_encode(/* empty encoding */);
10586   ins_cost(0);
10587   ins_pipe(empty);
10588 %}
10589 
10590 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10591 %{
10592   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10593   match(Set dst (CastLL dst));
10594 
10595   effect(KILL cr);
10596   format %{ "# cast_checked_LL $dst" %}
10597   ins_encode %{
10598     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10599   %}
10600   ins_pipe(pipe_slow);
10601 %}
10602 
10603 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10604 %{
10605   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10606   match(Set dst (CastLL dst));
10607 
10608   effect(KILL cr, TEMP tmp);
10609   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10610   ins_encode %{
10611     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10612   %}
10613   ins_pipe(pipe_slow);
10614 %}
10615 
10616 instruct castFF(regF dst)
10617 %{
10618   match(Set dst (CastFF dst));
10619 
10620   size(0);
10621   format %{ "# castFF of $dst" %}
10622   ins_encode(/* empty encoding */);
10623   ins_cost(0);
10624   ins_pipe(empty);
10625 %}
10626 
10627 instruct castHH(regF dst)
10628 %{
10629   match(Set dst (CastHH dst));
10630 
10631   size(0);
10632   format %{ "# castHH of $dst" %}
10633   ins_encode(/* empty encoding */);
10634   ins_cost(0);
10635   ins_pipe(empty);
10636 %}
10637 
10638 instruct castDD(regD dst)
10639 %{
10640   match(Set dst (CastDD dst));
10641 
10642   size(0);
10643   format %{ "# castDD of $dst" %}
10644   ins_encode(/* empty encoding */);
10645   ins_cost(0);
10646   ins_pipe(empty);
10647 %}
10648 
10649 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10650 instruct compareAndSwapP(rRegI res,
10651                          memory mem_ptr,
10652                          rax_RegP oldval, rRegP newval,
10653                          rFlagsReg cr)
10654 %{
10655   predicate(n->as_LoadStore()->barrier_data() == 0);
10656   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10657   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10658   effect(KILL cr, KILL oldval);
10659 
10660   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10661             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10662             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10663   ins_encode %{
10664     __ lock();
10665     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10666     __ setcc(Assembler::equal, $res$$Register);
10667   %}
10668   ins_pipe( pipe_cmpxchg );
10669 %}
10670 
10671 instruct compareAndSwapL(rRegI res,
10672                          memory mem_ptr,
10673                          rax_RegL oldval, rRegL newval,
10674                          rFlagsReg cr)
10675 %{
10676   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10677   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10678   effect(KILL cr, KILL oldval);
10679 
10680   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10682             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10683   ins_encode %{
10684     __ lock();
10685     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10686     __ setcc(Assembler::equal, $res$$Register);
10687   %}
10688   ins_pipe( pipe_cmpxchg );
10689 %}
10690 
10691 instruct compareAndSwapI(rRegI res,
10692                          memory mem_ptr,
10693                          rax_RegI oldval, rRegI newval,
10694                          rFlagsReg cr)
10695 %{
10696   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10697   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10698   effect(KILL cr, KILL oldval);
10699 
10700   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10701             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10702             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10703   ins_encode %{
10704     __ lock();
10705     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10706     __ setcc(Assembler::equal, $res$$Register);
10707   %}
10708   ins_pipe( pipe_cmpxchg );
10709 %}
10710 
10711 instruct compareAndSwapB(rRegI res,
10712                          memory mem_ptr,
10713                          rax_RegI oldval, rRegI newval,
10714                          rFlagsReg cr)
10715 %{
10716   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10717   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10718   effect(KILL cr, KILL oldval);
10719 
10720   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10721             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10722             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10723   ins_encode %{
10724     __ lock();
10725     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10726     __ setcc(Assembler::equal, $res$$Register);
10727   %}
10728   ins_pipe( pipe_cmpxchg );
10729 %}
10730 
10731 instruct compareAndSwapS(rRegI res,
10732                          memory mem_ptr,
10733                          rax_RegI oldval, rRegI newval,
10734                          rFlagsReg cr)
10735 %{
10736   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10737   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10738   effect(KILL cr, KILL oldval);
10739 
10740   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10741             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10742             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10743   ins_encode %{
10744     __ lock();
10745     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10746     __ setcc(Assembler::equal, $res$$Register);
10747   %}
10748   ins_pipe( pipe_cmpxchg );
10749 %}
10750 
10751 instruct compareAndSwapN(rRegI res,
10752                           memory mem_ptr,
10753                           rax_RegN oldval, rRegN newval,
10754                           rFlagsReg cr) %{
10755   predicate(n->as_LoadStore()->barrier_data() == 0);
10756   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10757   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10758   effect(KILL cr, KILL oldval);
10759 
10760   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10761             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10762             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10763   ins_encode %{
10764     __ lock();
10765     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10766     __ setcc(Assembler::equal, $res$$Register);
10767   %}
10768   ins_pipe( pipe_cmpxchg );
10769 %}
10770 
10771 instruct compareAndExchangeB(
10772                          memory mem_ptr,
10773                          rax_RegI oldval, rRegI newval,
10774                          rFlagsReg cr)
10775 %{
10776   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10777   effect(KILL cr);
10778 
10779   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10781   ins_encode %{
10782     __ lock();
10783     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10784   %}
10785   ins_pipe( pipe_cmpxchg );
10786 %}
10787 
10788 instruct compareAndExchangeS(
10789                          memory mem_ptr,
10790                          rax_RegI oldval, rRegI newval,
10791                          rFlagsReg cr)
10792 %{
10793   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10794   effect(KILL cr);
10795 
10796   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10797             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10798   ins_encode %{
10799     __ lock();
10800     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10801   %}
10802   ins_pipe( pipe_cmpxchg );
10803 %}
10804 
10805 instruct compareAndExchangeI(
10806                          memory mem_ptr,
10807                          rax_RegI oldval, rRegI newval,
10808                          rFlagsReg cr)
10809 %{
10810   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10811   effect(KILL cr);
10812 
10813   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10814             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10815   ins_encode %{
10816     __ lock();
10817     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10818   %}
10819   ins_pipe( pipe_cmpxchg );
10820 %}
10821 
10822 instruct compareAndExchangeL(
10823                          memory mem_ptr,
10824                          rax_RegL oldval, rRegL newval,
10825                          rFlagsReg cr)
10826 %{
10827   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10828   effect(KILL cr);
10829 
10830   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10831             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10832   ins_encode %{
10833     __ lock();
10834     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10835   %}
10836   ins_pipe( pipe_cmpxchg );
10837 %}
10838 
10839 instruct compareAndExchangeN(
10840                           memory mem_ptr,
10841                           rax_RegN oldval, rRegN newval,
10842                           rFlagsReg cr) %{
10843   predicate(n->as_LoadStore()->barrier_data() == 0);
10844   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10845   effect(KILL cr);
10846 
10847   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849   ins_encode %{
10850     __ lock();
10851     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852   %}
10853   ins_pipe( pipe_cmpxchg );
10854 %}
10855 
10856 instruct compareAndExchangeP(
10857                          memory mem_ptr,
10858                          rax_RegP oldval, rRegP newval,
10859                          rFlagsReg cr)
10860 %{
10861   predicate(n->as_LoadStore()->barrier_data() == 0);
10862   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10863   effect(KILL cr);
10864 
10865   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10866             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10867   ins_encode %{
10868     __ lock();
10869     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10870   %}
10871   ins_pipe( pipe_cmpxchg );
10872 %}
10873 
10874 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10875   predicate(n->as_LoadStore()->result_not_used());
10876   match(Set dummy (GetAndAddB mem add));
10877   effect(KILL cr);
10878   format %{ "addb_lock   $mem, $add" %}
10879   ins_encode %{
10880     __ lock();
10881     __ addb($mem$$Address, $add$$Register);
10882   %}
10883   ins_pipe(pipe_cmpxchg);
10884 %}
10885 
10886 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10887   predicate(n->as_LoadStore()->result_not_used());
10888   match(Set dummy (GetAndAddB mem add));
10889   effect(KILL cr);
10890   format %{ "addb_lock   $mem, $add" %}
10891   ins_encode %{
10892     __ lock();
10893     __ addb($mem$$Address, $add$$constant);
10894   %}
10895   ins_pipe(pipe_cmpxchg);
10896 %}
10897 
10898 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10899   predicate(!n->as_LoadStore()->result_not_used());
10900   match(Set newval (GetAndAddB mem newval));
10901   effect(KILL cr);
10902   format %{ "xaddb_lock  $mem, $newval" %}
10903   ins_encode %{
10904     __ lock();
10905     __ xaddb($mem$$Address, $newval$$Register);
10906   %}
10907   ins_pipe(pipe_cmpxchg);
10908 %}
10909 
10910 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10911   predicate(n->as_LoadStore()->result_not_used());
10912   match(Set dummy (GetAndAddS mem add));
10913   effect(KILL cr);
10914   format %{ "addw_lock   $mem, $add" %}
10915   ins_encode %{
10916     __ lock();
10917     __ addw($mem$$Address, $add$$Register);
10918   %}
10919   ins_pipe(pipe_cmpxchg);
10920 %}
10921 
10922 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10923   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10924   match(Set dummy (GetAndAddS mem add));
10925   effect(KILL cr);
10926   format %{ "addw_lock   $mem, $add" %}
10927   ins_encode %{
10928     __ lock();
10929     __ addw($mem$$Address, $add$$constant);
10930   %}
10931   ins_pipe(pipe_cmpxchg);
10932 %}
10933 
10934 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10935   predicate(!n->as_LoadStore()->result_not_used());
10936   match(Set newval (GetAndAddS mem newval));
10937   effect(KILL cr);
10938   format %{ "xaddw_lock  $mem, $newval" %}
10939   ins_encode %{
10940     __ lock();
10941     __ xaddw($mem$$Address, $newval$$Register);
10942   %}
10943   ins_pipe(pipe_cmpxchg);
10944 %}
10945 
10946 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10947   predicate(n->as_LoadStore()->result_not_used());
10948   match(Set dummy (GetAndAddI mem add));
10949   effect(KILL cr);
10950   format %{ "addl_lock   $mem, $add" %}
10951   ins_encode %{
10952     __ lock();
10953     __ addl($mem$$Address, $add$$Register);
10954   %}
10955   ins_pipe(pipe_cmpxchg);
10956 %}
10957 
10958 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10959   predicate(n->as_LoadStore()->result_not_used());
10960   match(Set dummy (GetAndAddI mem add));
10961   effect(KILL cr);
10962   format %{ "addl_lock   $mem, $add" %}
10963   ins_encode %{
10964     __ lock();
10965     __ addl($mem$$Address, $add$$constant);
10966   %}
10967   ins_pipe(pipe_cmpxchg);
10968 %}
10969 
10970 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10971   predicate(!n->as_LoadStore()->result_not_used());
10972   match(Set newval (GetAndAddI mem newval));
10973   effect(KILL cr);
10974   format %{ "xaddl_lock  $mem, $newval" %}
10975   ins_encode %{
10976     __ lock();
10977     __ xaddl($mem$$Address, $newval$$Register);
10978   %}
10979   ins_pipe(pipe_cmpxchg);
10980 %}
10981 
10982 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10983   predicate(n->as_LoadStore()->result_not_used());
10984   match(Set dummy (GetAndAddL mem add));
10985   effect(KILL cr);
10986   format %{ "addq_lock   $mem, $add" %}
10987   ins_encode %{
10988     __ lock();
10989     __ addq($mem$$Address, $add$$Register);
10990   %}
10991   ins_pipe(pipe_cmpxchg);
10992 %}
10993 
10994 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10995   predicate(n->as_LoadStore()->result_not_used());
10996   match(Set dummy (GetAndAddL mem add));
10997   effect(KILL cr);
10998   format %{ "addq_lock   $mem, $add" %}
10999   ins_encode %{
11000     __ lock();
11001     __ addq($mem$$Address, $add$$constant);
11002   %}
11003   ins_pipe(pipe_cmpxchg);
11004 %}
11005 
11006 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11007   predicate(!n->as_LoadStore()->result_not_used());
11008   match(Set newval (GetAndAddL mem newval));
11009   effect(KILL cr);
11010   format %{ "xaddq_lock  $mem, $newval" %}
11011   ins_encode %{
11012     __ lock();
11013     __ xaddq($mem$$Address, $newval$$Register);
11014   %}
11015   ins_pipe(pipe_cmpxchg);
11016 %}
11017 
11018 instruct xchgB( memory mem, rRegI newval) %{
11019   match(Set newval (GetAndSetB mem newval));
11020   format %{ "XCHGB  $newval,[$mem]" %}
11021   ins_encode %{
11022     __ xchgb($newval$$Register, $mem$$Address);
11023   %}
11024   ins_pipe( pipe_cmpxchg );
11025 %}
11026 
11027 instruct xchgS( memory mem, rRegI newval) %{
11028   match(Set newval (GetAndSetS mem newval));
11029   format %{ "XCHGW  $newval,[$mem]" %}
11030   ins_encode %{
11031     __ xchgw($newval$$Register, $mem$$Address);
11032   %}
11033   ins_pipe( pipe_cmpxchg );
11034 %}
11035 
11036 instruct xchgI( memory mem, rRegI newval) %{
11037   match(Set newval (GetAndSetI mem newval));
11038   format %{ "XCHGL  $newval,[$mem]" %}
11039   ins_encode %{
11040     __ xchgl($newval$$Register, $mem$$Address);
11041   %}
11042   ins_pipe( pipe_cmpxchg );
11043 %}
11044 
11045 instruct xchgL( memory mem, rRegL newval) %{
11046   match(Set newval (GetAndSetL mem newval));
11047   format %{ "XCHGL  $newval,[$mem]" %}
11048   ins_encode %{
11049     __ xchgq($newval$$Register, $mem$$Address);
11050   %}
11051   ins_pipe( pipe_cmpxchg );
11052 %}
11053 
11054 instruct xchgP( memory mem, rRegP newval) %{
11055   match(Set newval (GetAndSetP mem newval));
11056   predicate(n->as_LoadStore()->barrier_data() == 0);
11057   format %{ "XCHGQ  $newval,[$mem]" %}
11058   ins_encode %{
11059     __ xchgq($newval$$Register, $mem$$Address);
11060   %}
11061   ins_pipe( pipe_cmpxchg );
11062 %}
11063 
11064 instruct xchgN( memory mem, rRegN newval) %{
11065   predicate(n->as_LoadStore()->barrier_data() == 0);
11066   match(Set newval (GetAndSetN mem newval));
11067   format %{ "XCHGL  $newval,$mem]" %}
11068   ins_encode %{
11069     __ xchgl($newval$$Register, $mem$$Address);
11070   %}
11071   ins_pipe( pipe_cmpxchg );
11072 %}
11073 
11074 //----------Abs Instructions-------------------------------------------
11075 
11076 // Integer Absolute Instructions
11077 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11078 %{
11079   match(Set dst (AbsI src));
11080   effect(TEMP dst, KILL cr);
11081   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11082             "subl    $dst, $src\n\t"
11083             "cmovll  $dst, $src" %}
11084   ins_encode %{
11085     __ xorl($dst$$Register, $dst$$Register);
11086     __ subl($dst$$Register, $src$$Register);
11087     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11088   %}
11089 
11090   ins_pipe(ialu_reg_reg);
11091 %}
11092 
11093 // Long Absolute Instructions
11094 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11095 %{
11096   match(Set dst (AbsL src));
11097   effect(TEMP dst, KILL cr);
11098   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11099             "subq    $dst, $src\n\t"
11100             "cmovlq  $dst, $src" %}
11101   ins_encode %{
11102     __ xorl($dst$$Register, $dst$$Register);
11103     __ subq($dst$$Register, $src$$Register);
11104     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11105   %}
11106 
11107   ins_pipe(ialu_reg_reg);
11108 %}
11109 
11110 //----------Subtraction Instructions-------------------------------------------
11111 
11112 // Integer Subtraction Instructions
11113 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11114 %{
11115   predicate(!UseAPX);
11116   match(Set dst (SubI dst src));
11117   effect(KILL cr);
11118   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11119 
11120   format %{ "subl    $dst, $src\t# int" %}
11121   ins_encode %{
11122     __ subl($dst$$Register, $src$$Register);
11123   %}
11124   ins_pipe(ialu_reg_reg);
11125 %}
11126 
11127 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11128 %{
11129   predicate(UseAPX);
11130   match(Set dst (SubI src1 src2));
11131   effect(KILL cr);
11132   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11133 
11134   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11135   ins_encode %{
11136     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11137   %}
11138   ins_pipe(ialu_reg_reg);
11139 %}
11140 
11141 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11142 %{
11143   predicate(UseAPX);
11144   match(Set dst (SubI src1 src2));
11145   effect(KILL cr);
11146   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11147 
11148   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11149   ins_encode %{
11150     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11151   %}
11152   ins_pipe(ialu_reg_reg);
11153 %}
11154 
11155 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11156 %{
11157   predicate(UseAPX);
11158   match(Set dst (SubI (LoadI src1) src2));
11159   effect(KILL cr);
11160   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11161 
11162   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11163   ins_encode %{
11164     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11165   %}
11166   ins_pipe(ialu_reg_reg);
11167 %}
11168 
11169 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11170 %{
11171   predicate(!UseAPX);
11172   match(Set dst (SubI dst (LoadI src)));
11173   effect(KILL cr);
11174   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175 
11176   ins_cost(150);
11177   format %{ "subl    $dst, $src\t# int" %}
11178   ins_encode %{
11179     __ subl($dst$$Register, $src$$Address);
11180   %}
11181   ins_pipe(ialu_reg_mem);
11182 %}
11183 
11184 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11185 %{
11186   predicate(UseAPX);
11187   match(Set dst (SubI src1 (LoadI src2)));
11188   effect(KILL cr);
11189   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11190 
11191   ins_cost(150);
11192   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11193   ins_encode %{
11194     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11195   %}
11196   ins_pipe(ialu_reg_mem);
11197 %}
11198 
11199 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11200 %{
11201   predicate(UseAPX);
11202   match(Set dst (SubI (LoadI src1) src2));
11203   effect(KILL cr);
11204   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11205 
11206   ins_cost(150);
11207   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11208   ins_encode %{
11209     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11210   %}
11211   ins_pipe(ialu_reg_mem);
11212 %}
11213 
11214 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11215 %{
11216   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11217   effect(KILL cr);
11218   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11219 
11220   ins_cost(150);
11221   format %{ "subl    $dst, $src\t# int" %}
11222   ins_encode %{
11223     __ subl($dst$$Address, $src$$Register);
11224   %}
11225   ins_pipe(ialu_mem_reg);
11226 %}
11227 
11228 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11229 %{
11230   predicate(!UseAPX);
11231   match(Set dst (SubL dst src));
11232   effect(KILL cr);
11233   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11234 
11235   format %{ "subq    $dst, $src\t# long" %}
11236   ins_encode %{
11237     __ subq($dst$$Register, $src$$Register);
11238   %}
11239   ins_pipe(ialu_reg_reg);
11240 %}
11241 
11242 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11243 %{
11244   predicate(UseAPX);
11245   match(Set dst (SubL src1 src2));
11246   effect(KILL cr);
11247   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11248 
11249   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11250   ins_encode %{
11251     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11252   %}
11253   ins_pipe(ialu_reg_reg);
11254 %}
11255 
11256 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11257 %{
11258   predicate(UseAPX);
11259   match(Set dst (SubL src1 src2));
11260   effect(KILL cr);
11261   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11262 
11263   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11264   ins_encode %{
11265     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11266   %}
11267   ins_pipe(ialu_reg_reg);
11268 %}
11269 
11270 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11271 %{
11272   predicate(UseAPX);
11273   match(Set dst (SubL (LoadL src1) src2));
11274   effect(KILL cr);
11275   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11276 
11277   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11278   ins_encode %{
11279     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11280   %}
11281   ins_pipe(ialu_reg_reg);
11282 %}
11283 
11284 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11285 %{
11286   predicate(!UseAPX);
11287   match(Set dst (SubL dst (LoadL src)));
11288   effect(KILL cr);
11289   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290 
11291   ins_cost(150);
11292   format %{ "subq    $dst, $src\t# long" %}
11293   ins_encode %{
11294     __ subq($dst$$Register, $src$$Address);
11295   %}
11296   ins_pipe(ialu_reg_mem);
11297 %}
11298 
11299 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11300 %{
11301   predicate(UseAPX);
11302   match(Set dst (SubL src1 (LoadL src2)));
11303   effect(KILL cr);
11304   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11305 
11306   ins_cost(150);
11307   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11308   ins_encode %{
11309     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11310   %}
11311   ins_pipe(ialu_reg_mem);
11312 %}
11313 
11314 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11315 %{
11316   predicate(UseAPX);
11317   match(Set dst (SubL (LoadL src1) src2));
11318   effect(KILL cr);
11319   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11320 
11321   ins_cost(150);
11322   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11323   ins_encode %{
11324     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11325   %}
11326   ins_pipe(ialu_reg_mem);
11327 %}
11328 
11329 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11330 %{
11331   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11332   effect(KILL cr);
11333   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11334 
11335   ins_cost(150);
11336   format %{ "subq    $dst, $src\t# long" %}
11337   ins_encode %{
11338     __ subq($dst$$Address, $src$$Register);
11339   %}
11340   ins_pipe(ialu_mem_reg);
11341 %}
11342 
11343 // Subtract from a pointer
11344 // XXX hmpf???
11345 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11346 %{
11347   match(Set dst (AddP dst (SubI zero src)));
11348   effect(KILL cr);
11349 
11350   format %{ "subq    $dst, $src\t# ptr - int" %}
11351   ins_encode %{
11352     __ subq($dst$$Register, $src$$Register);
11353   %}
11354   ins_pipe(ialu_reg_reg);
11355 %}
11356 
11357 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11358 %{
11359   predicate(!UseAPX);
11360   match(Set dst (SubI zero dst));
11361   effect(KILL cr);
11362   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11363 
11364   format %{ "negl    $dst\t# int" %}
11365   ins_encode %{
11366     __ negl($dst$$Register);
11367   %}
11368   ins_pipe(ialu_reg);
11369 %}
11370 
11371 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11372 %{
11373   predicate(UseAPX);
11374   match(Set dst (SubI zero src));
11375   effect(KILL cr);
11376   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11377 
11378   format %{ "enegl    $dst, $src\t# int ndd" %}
11379   ins_encode %{
11380     __ enegl($dst$$Register, $src$$Register, false);
11381   %}
11382   ins_pipe(ialu_reg);
11383 %}
11384 
11385 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11386 %{
11387   predicate(!UseAPX);
11388   match(Set dst (NegI dst));
11389   effect(KILL cr);
11390   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391 
11392   format %{ "negl    $dst\t# int" %}
11393   ins_encode %{
11394     __ negl($dst$$Register);
11395   %}
11396   ins_pipe(ialu_reg);
11397 %}
11398 
11399 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11400 %{
11401   predicate(UseAPX);
11402   match(Set dst (NegI src));
11403   effect(KILL cr);
11404   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11405 
11406   format %{ "enegl    $dst, $src\t# int ndd" %}
11407   ins_encode %{
11408     __ enegl($dst$$Register, $src$$Register, false);
11409   %}
11410   ins_pipe(ialu_reg);
11411 %}
11412 
11413 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11414 %{
11415   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11416   effect(KILL cr);
11417   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11418 
11419   format %{ "negl    $dst\t# int" %}
11420   ins_encode %{
11421     __ negl($dst$$Address);
11422   %}
11423   ins_pipe(ialu_reg);
11424 %}
11425 
11426 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11427 %{
11428   predicate(!UseAPX);
11429   match(Set dst (SubL zero dst));
11430   effect(KILL cr);
11431   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11432 
11433   format %{ "negq    $dst\t# long" %}
11434   ins_encode %{
11435     __ negq($dst$$Register);
11436   %}
11437   ins_pipe(ialu_reg);
11438 %}
11439 
11440 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11441 %{
11442   predicate(UseAPX);
11443   match(Set dst (SubL zero src));
11444   effect(KILL cr);
11445   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11446 
11447   format %{ "enegq    $dst, $src\t# long ndd" %}
11448   ins_encode %{
11449     __ enegq($dst$$Register, $src$$Register, false);
11450   %}
11451   ins_pipe(ialu_reg);
11452 %}
11453 
11454 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11455 %{
11456   predicate(!UseAPX);
11457   match(Set dst (NegL dst));
11458   effect(KILL cr);
11459   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11460 
11461   format %{ "negq    $dst\t# int" %}
11462   ins_encode %{
11463     __ negq($dst$$Register);
11464   %}
11465   ins_pipe(ialu_reg);
11466 %}
11467 
11468 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11469 %{
11470   predicate(UseAPX);
11471   match(Set dst (NegL src));
11472   effect(KILL cr);
11473   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11474 
11475   format %{ "enegq    $dst, $src\t# long ndd" %}
11476   ins_encode %{
11477     __ enegq($dst$$Register, $src$$Register, false);
11478   %}
11479   ins_pipe(ialu_reg);
11480 %}
11481 
11482 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11483 %{
11484   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11485   effect(KILL cr);
11486   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11487 
11488   format %{ "negq    $dst\t# long" %}
11489   ins_encode %{
11490     __ negq($dst$$Address);
11491   %}
11492   ins_pipe(ialu_reg);
11493 %}
11494 
11495 //----------Multiplication/Division Instructions-------------------------------
11496 // Integer Multiplication Instructions
11497 // Multiply Register
11498 
11499 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11500 %{
11501   predicate(!UseAPX);
11502   match(Set dst (MulI dst src));
11503   effect(KILL cr);
11504 
11505   ins_cost(300);
11506   format %{ "imull   $dst, $src\t# int" %}
11507   ins_encode %{
11508     __ imull($dst$$Register, $src$$Register);
11509   %}
11510   ins_pipe(ialu_reg_reg_alu0);
11511 %}
11512 
11513 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11514 %{
11515   predicate(UseAPX);
11516   match(Set dst (MulI src1 src2));
11517   effect(KILL cr);
11518   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11519 
11520   ins_cost(300);
11521   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11522   ins_encode %{
11523     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11524   %}
11525   ins_pipe(ialu_reg_reg_alu0);
11526 %}
11527 
11528 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11529 %{
11530   match(Set dst (MulI src imm));
11531   effect(KILL cr);
11532 
11533   ins_cost(300);
11534   format %{ "imull   $dst, $src, $imm\t# int" %}
11535   ins_encode %{
11536     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11537   %}
11538   ins_pipe(ialu_reg_reg_alu0);
11539 %}
11540 
11541 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11542 %{
11543   predicate(!UseAPX);
11544   match(Set dst (MulI dst (LoadI src)));
11545   effect(KILL cr);
11546 
11547   ins_cost(350);
11548   format %{ "imull   $dst, $src\t# int" %}
11549   ins_encode %{
11550     __ imull($dst$$Register, $src$$Address);
11551   %}
11552   ins_pipe(ialu_reg_mem_alu0);
11553 %}
11554 
11555 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11556 %{
11557   predicate(UseAPX);
11558   match(Set dst (MulI src1 (LoadI src2)));
11559   effect(KILL cr);
11560   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11561 
11562   ins_cost(350);
11563   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11564   ins_encode %{
11565     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11566   %}
11567   ins_pipe(ialu_reg_mem_alu0);
11568 %}
11569 
11570 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11571 %{
11572   match(Set dst (MulI (LoadI src) imm));
11573   effect(KILL cr);
11574 
11575   ins_cost(300);
11576   format %{ "imull   $dst, $src, $imm\t# int" %}
11577   ins_encode %{
11578     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11579   %}
11580   ins_pipe(ialu_reg_mem_alu0);
11581 %}
11582 
11583 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11584 %{
11585   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11586   effect(KILL cr, KILL src2);
11587 
11588   expand %{ mulI_rReg(dst, src1, cr);
11589            mulI_rReg(src2, src3, cr);
11590            addI_rReg(dst, src2, cr); %}
11591 %}
11592 
11593 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11594 %{
11595   predicate(!UseAPX);
11596   match(Set dst (MulL dst src));
11597   effect(KILL cr);
11598 
11599   ins_cost(300);
11600   format %{ "imulq   $dst, $src\t# long" %}
11601   ins_encode %{
11602     __ imulq($dst$$Register, $src$$Register);
11603   %}
11604   ins_pipe(ialu_reg_reg_alu0);
11605 %}
11606 
11607 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11608 %{
11609   predicate(UseAPX);
11610   match(Set dst (MulL src1 src2));
11611   effect(KILL cr);
11612   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11613 
11614   ins_cost(300);
11615   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11616   ins_encode %{
11617     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11618   %}
11619   ins_pipe(ialu_reg_reg_alu0);
11620 %}
11621 
11622 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11623 %{
11624   match(Set dst (MulL src imm));
11625   effect(KILL cr);
11626 
11627   ins_cost(300);
11628   format %{ "imulq   $dst, $src, $imm\t# long" %}
11629   ins_encode %{
11630     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11631   %}
11632   ins_pipe(ialu_reg_reg_alu0);
11633 %}
11634 
11635 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11636 %{
11637   predicate(!UseAPX);
11638   match(Set dst (MulL dst (LoadL src)));
11639   effect(KILL cr);
11640 
11641   ins_cost(350);
11642   format %{ "imulq   $dst, $src\t# long" %}
11643   ins_encode %{
11644     __ imulq($dst$$Register, $src$$Address);
11645   %}
11646   ins_pipe(ialu_reg_mem_alu0);
11647 %}
11648 
11649 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11650 %{
11651   predicate(UseAPX);
11652   match(Set dst (MulL src1 (LoadL src2)));
11653   effect(KILL cr);
11654   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11655 
11656   ins_cost(350);
11657   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11658   ins_encode %{
11659     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11660   %}
11661   ins_pipe(ialu_reg_mem_alu0);
11662 %}
11663 
11664 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11665 %{
11666   match(Set dst (MulL (LoadL src) imm));
11667   effect(KILL cr);
11668 
11669   ins_cost(300);
11670   format %{ "imulq   $dst, $src, $imm\t# long" %}
11671   ins_encode %{
11672     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11673   %}
11674   ins_pipe(ialu_reg_mem_alu0);
11675 %}
11676 
11677 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11678 %{
11679   match(Set dst (MulHiL src rax));
11680   effect(USE_KILL rax, KILL cr);
11681 
11682   ins_cost(300);
11683   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11684   ins_encode %{
11685     __ imulq($src$$Register);
11686   %}
11687   ins_pipe(ialu_reg_reg_alu0);
11688 %}
11689 
11690 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11691 %{
11692   match(Set dst (UMulHiL src rax));
11693   effect(USE_KILL rax, KILL cr);
11694 
11695   ins_cost(300);
11696   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11697   ins_encode %{
11698     __ mulq($src$$Register);
11699   %}
11700   ins_pipe(ialu_reg_reg_alu0);
11701 %}
11702 
11703 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11704                    rFlagsReg cr)
11705 %{
11706   match(Set rax (DivI rax div));
11707   effect(KILL rdx, KILL cr);
11708 
11709   ins_cost(30*100+10*100); // XXX
11710   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11711             "jne,s   normal\n\t"
11712             "xorl    rdx, rdx\n\t"
11713             "cmpl    $div, -1\n\t"
11714             "je,s    done\n"
11715     "normal: cdql\n\t"
11716             "idivl   $div\n"
11717     "done:"        %}
11718   ins_encode(cdql_enc(div));
11719   ins_pipe(ialu_reg_reg_alu0);
11720 %}
11721 
11722 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11723                    rFlagsReg cr)
11724 %{
11725   match(Set rax (DivL rax div));
11726   effect(KILL rdx, KILL cr);
11727 
11728   ins_cost(30*100+10*100); // XXX
11729   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11730             "cmpq    rax, rdx\n\t"
11731             "jne,s   normal\n\t"
11732             "xorl    rdx, rdx\n\t"
11733             "cmpq    $div, -1\n\t"
11734             "je,s    done\n"
11735     "normal: cdqq\n\t"
11736             "idivq   $div\n"
11737     "done:"        %}
11738   ins_encode(cdqq_enc(div));
11739   ins_pipe(ialu_reg_reg_alu0);
11740 %}
11741 
11742 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11743 %{
11744   match(Set rax (UDivI rax div));
11745   effect(KILL rdx, KILL cr);
11746 
11747   ins_cost(300);
11748   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11749   ins_encode %{
11750     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11751   %}
11752   ins_pipe(ialu_reg_reg_alu0);
11753 %}
11754 
11755 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11756 %{
11757   match(Set rax (UDivL rax div));
11758   effect(KILL rdx, KILL cr);
11759 
11760   ins_cost(300);
11761   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11762   ins_encode %{
11763      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11764   %}
11765   ins_pipe(ialu_reg_reg_alu0);
11766 %}
11767 
11768 // Integer DIVMOD with Register, both quotient and mod results
11769 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11770                              rFlagsReg cr)
11771 %{
11772   match(DivModI rax div);
11773   effect(KILL cr);
11774 
11775   ins_cost(30*100+10*100); // XXX
11776   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11777             "jne,s   normal\n\t"
11778             "xorl    rdx, rdx\n\t"
11779             "cmpl    $div, -1\n\t"
11780             "je,s    done\n"
11781     "normal: cdql\n\t"
11782             "idivl   $div\n"
11783     "done:"        %}
11784   ins_encode(cdql_enc(div));
11785   ins_pipe(pipe_slow);
11786 %}
11787 
11788 // Long DIVMOD with Register, both quotient and mod results
11789 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11790                              rFlagsReg cr)
11791 %{
11792   match(DivModL rax div);
11793   effect(KILL cr);
11794 
11795   ins_cost(30*100+10*100); // XXX
11796   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11797             "cmpq    rax, rdx\n\t"
11798             "jne,s   normal\n\t"
11799             "xorl    rdx, rdx\n\t"
11800             "cmpq    $div, -1\n\t"
11801             "je,s    done\n"
11802     "normal: cdqq\n\t"
11803             "idivq   $div\n"
11804     "done:"        %}
11805   ins_encode(cdqq_enc(div));
11806   ins_pipe(pipe_slow);
11807 %}
11808 
11809 // Unsigned integer DIVMOD with Register, both quotient and mod results
11810 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11811                               no_rax_rdx_RegI div, rFlagsReg cr)
11812 %{
11813   match(UDivModI rax div);
11814   effect(TEMP tmp, KILL cr);
11815 
11816   ins_cost(300);
11817   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11818             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11819           %}
11820   ins_encode %{
11821     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11822   %}
11823   ins_pipe(pipe_slow);
11824 %}
11825 
11826 // Unsigned long DIVMOD with Register, both quotient and mod results
11827 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11828                               no_rax_rdx_RegL div, rFlagsReg cr)
11829 %{
11830   match(UDivModL rax div);
11831   effect(TEMP tmp, KILL cr);
11832 
11833   ins_cost(300);
11834   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11835             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11836           %}
11837   ins_encode %{
11838     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11839   %}
11840   ins_pipe(pipe_slow);
11841 %}
11842 
11843 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11844                    rFlagsReg cr)
11845 %{
11846   match(Set rdx (ModI rax div));
11847   effect(KILL rax, KILL cr);
11848 
11849   ins_cost(300); // XXX
11850   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11851             "jne,s   normal\n\t"
11852             "xorl    rdx, rdx\n\t"
11853             "cmpl    $div, -1\n\t"
11854             "je,s    done\n"
11855     "normal: cdql\n\t"
11856             "idivl   $div\n"
11857     "done:"        %}
11858   ins_encode(cdql_enc(div));
11859   ins_pipe(ialu_reg_reg_alu0);
11860 %}
11861 
11862 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11863                    rFlagsReg cr)
11864 %{
11865   match(Set rdx (ModL rax div));
11866   effect(KILL rax, KILL cr);
11867 
11868   ins_cost(300); // XXX
11869   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11870             "cmpq    rax, rdx\n\t"
11871             "jne,s   normal\n\t"
11872             "xorl    rdx, rdx\n\t"
11873             "cmpq    $div, -1\n\t"
11874             "je,s    done\n"
11875     "normal: cdqq\n\t"
11876             "idivq   $div\n"
11877     "done:"        %}
11878   ins_encode(cdqq_enc(div));
11879   ins_pipe(ialu_reg_reg_alu0);
11880 %}
11881 
11882 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11883 %{
11884   match(Set rdx (UModI rax div));
11885   effect(KILL rax, KILL cr);
11886 
11887   ins_cost(300);
11888   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11889   ins_encode %{
11890     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11891   %}
11892   ins_pipe(ialu_reg_reg_alu0);
11893 %}
11894 
11895 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11896 %{
11897   match(Set rdx (UModL rax div));
11898   effect(KILL rax, KILL cr);
11899 
11900   ins_cost(300);
11901   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11902   ins_encode %{
11903     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11904   %}
11905   ins_pipe(ialu_reg_reg_alu0);
11906 %}
11907 
11908 // Integer Shift Instructions
11909 // Shift Left by one, two, three
11910 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11911 %{
11912   predicate(!UseAPX);
11913   match(Set dst (LShiftI dst shift));
11914   effect(KILL cr);
11915 
11916   format %{ "sall    $dst, $shift" %}
11917   ins_encode %{
11918     __ sall($dst$$Register, $shift$$constant);
11919   %}
11920   ins_pipe(ialu_reg);
11921 %}
11922 
11923 // Shift Left by one, two, three
11924 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11925 %{
11926   predicate(UseAPX);
11927   match(Set dst (LShiftI src shift));
11928   effect(KILL cr);
11929   flag(PD::Flag_ndd_demotable_opr1);
11930 
11931   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11932   ins_encode %{
11933     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11934   %}
11935   ins_pipe(ialu_reg);
11936 %}
11937 
11938 // Shift Left by 8-bit immediate
11939 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11940 %{
11941   predicate(!UseAPX);
11942   match(Set dst (LShiftI dst shift));
11943   effect(KILL cr);
11944 
11945   format %{ "sall    $dst, $shift" %}
11946   ins_encode %{
11947     __ sall($dst$$Register, $shift$$constant);
11948   %}
11949   ins_pipe(ialu_reg);
11950 %}
11951 
11952 // Shift Left by 8-bit immediate
11953 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11954 %{
11955   predicate(UseAPX);
11956   match(Set dst (LShiftI src shift));
11957   effect(KILL cr);
11958   flag(PD::Flag_ndd_demotable_opr1);
11959 
11960   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11961   ins_encode %{
11962     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11963   %}
11964   ins_pipe(ialu_reg);
11965 %}
11966 
11967 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11968 %{
11969   predicate(UseAPX);
11970   match(Set dst (LShiftI (LoadI src) shift));
11971   effect(KILL cr);
11972 
11973   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11974   ins_encode %{
11975     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11976   %}
11977   ins_pipe(ialu_reg);
11978 %}
11979 
11980 // Shift Left by 8-bit immediate
11981 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11982 %{
11983   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11984   effect(KILL cr);
11985 
11986   format %{ "sall    $dst, $shift" %}
11987   ins_encode %{
11988     __ sall($dst$$Address, $shift$$constant);
11989   %}
11990   ins_pipe(ialu_mem_imm);
11991 %}
11992 
11993 // Shift Left by variable
11994 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11995 %{
11996   predicate(!VM_Version::supports_bmi2());
11997   match(Set dst (LShiftI dst shift));
11998   effect(KILL cr);
11999 
12000   format %{ "sall    $dst, $shift" %}
12001   ins_encode %{
12002     __ sall($dst$$Register);
12003   %}
12004   ins_pipe(ialu_reg_reg);
12005 %}
12006 
12007 // Shift Left by variable
12008 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12009 %{
12010   predicate(!VM_Version::supports_bmi2());
12011   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12012   effect(KILL cr);
12013 
12014   format %{ "sall    $dst, $shift" %}
12015   ins_encode %{
12016     __ sall($dst$$Address);
12017   %}
12018   ins_pipe(ialu_mem_reg);
12019 %}
12020 
12021 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12022 %{
12023   predicate(VM_Version::supports_bmi2());
12024   match(Set dst (LShiftI src shift));
12025 
12026   format %{ "shlxl   $dst, $src, $shift" %}
12027   ins_encode %{
12028     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12029   %}
12030   ins_pipe(ialu_reg_reg);
12031 %}
12032 
12033 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12034 %{
12035   predicate(VM_Version::supports_bmi2());
12036   match(Set dst (LShiftI (LoadI src) shift));
12037   ins_cost(175);
12038   format %{ "shlxl   $dst, $src, $shift" %}
12039   ins_encode %{
12040     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12041   %}
12042   ins_pipe(ialu_reg_mem);
12043 %}
12044 
12045 // Arithmetic Shift Right by 8-bit immediate
12046 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12047 %{
12048   predicate(!UseAPX);
12049   match(Set dst (RShiftI dst shift));
12050   effect(KILL cr);
12051 
12052   format %{ "sarl    $dst, $shift" %}
12053   ins_encode %{
12054     __ sarl($dst$$Register, $shift$$constant);
12055   %}
12056   ins_pipe(ialu_mem_imm);
12057 %}
12058 
12059 // Arithmetic Shift Right by 8-bit immediate
12060 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12061 %{
12062   predicate(UseAPX);
12063   match(Set dst (RShiftI src shift));
12064   effect(KILL cr);
12065   flag(PD::Flag_ndd_demotable_opr1);
12066 
12067   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12068   ins_encode %{
12069     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12070   %}
12071   ins_pipe(ialu_mem_imm);
12072 %}
12073 
12074 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12075 %{
12076   predicate(UseAPX);
12077   match(Set dst (RShiftI (LoadI src) shift));
12078   effect(KILL cr);
12079 
12080   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12081   ins_encode %{
12082     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12083   %}
12084   ins_pipe(ialu_mem_imm);
12085 %}
12086 
12087 // Arithmetic Shift Right by 8-bit immediate
12088 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12089 %{
12090   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12091   effect(KILL cr);
12092 
12093   format %{ "sarl    $dst, $shift" %}
12094   ins_encode %{
12095     __ sarl($dst$$Address, $shift$$constant);
12096   %}
12097   ins_pipe(ialu_mem_imm);
12098 %}
12099 
12100 // Arithmetic Shift Right by variable
12101 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12102 %{
12103   predicate(!VM_Version::supports_bmi2());
12104   match(Set dst (RShiftI dst shift));
12105   effect(KILL cr);
12106 
12107   format %{ "sarl    $dst, $shift" %}
12108   ins_encode %{
12109     __ sarl($dst$$Register);
12110   %}
12111   ins_pipe(ialu_reg_reg);
12112 %}
12113 
12114 // Arithmetic Shift Right by variable
12115 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12116 %{
12117   predicate(!VM_Version::supports_bmi2());
12118   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12119   effect(KILL cr);
12120 
12121   format %{ "sarl    $dst, $shift" %}
12122   ins_encode %{
12123     __ sarl($dst$$Address);
12124   %}
12125   ins_pipe(ialu_mem_reg);
12126 %}
12127 
12128 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12129 %{
12130   predicate(VM_Version::supports_bmi2());
12131   match(Set dst (RShiftI src shift));
12132 
12133   format %{ "sarxl   $dst, $src, $shift" %}
12134   ins_encode %{
12135     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12136   %}
12137   ins_pipe(ialu_reg_reg);
12138 %}
12139 
12140 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12141 %{
12142   predicate(VM_Version::supports_bmi2());
12143   match(Set dst (RShiftI (LoadI src) shift));
12144   ins_cost(175);
12145   format %{ "sarxl   $dst, $src, $shift" %}
12146   ins_encode %{
12147     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12148   %}
12149   ins_pipe(ialu_reg_mem);
12150 %}
12151 
12152 // Logical Shift Right by 8-bit immediate
12153 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12154 %{
12155   predicate(!UseAPX);
12156   match(Set dst (URShiftI dst shift));
12157   effect(KILL cr);
12158 
12159   format %{ "shrl    $dst, $shift" %}
12160   ins_encode %{
12161     __ shrl($dst$$Register, $shift$$constant);
12162   %}
12163   ins_pipe(ialu_reg);
12164 %}
12165 
12166 // Logical Shift Right by 8-bit immediate
12167 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12168 %{
12169   predicate(UseAPX);
12170   match(Set dst (URShiftI src shift));
12171   effect(KILL cr);
12172   flag(PD::Flag_ndd_demotable_opr1);
12173 
12174   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12175   ins_encode %{
12176     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12177   %}
12178   ins_pipe(ialu_reg);
12179 %}
12180 
12181 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12182 %{
12183   predicate(UseAPX);
12184   match(Set dst (URShiftI (LoadI src) shift));
12185   effect(KILL cr);
12186 
12187   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12188   ins_encode %{
12189     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12190   %}
12191   ins_pipe(ialu_reg);
12192 %}
12193 
12194 // Logical Shift Right by 8-bit immediate
12195 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12196 %{
12197   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12198   effect(KILL cr);
12199 
12200   format %{ "shrl    $dst, $shift" %}
12201   ins_encode %{
12202     __ shrl($dst$$Address, $shift$$constant);
12203   %}
12204   ins_pipe(ialu_mem_imm);
12205 %}
12206 
12207 // Logical Shift Right by variable
12208 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12209 %{
12210   predicate(!VM_Version::supports_bmi2());
12211   match(Set dst (URShiftI dst shift));
12212   effect(KILL cr);
12213 
12214   format %{ "shrl    $dst, $shift" %}
12215   ins_encode %{
12216     __ shrl($dst$$Register);
12217   %}
12218   ins_pipe(ialu_reg_reg);
12219 %}
12220 
12221 // Logical Shift Right by variable
12222 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12223 %{
12224   predicate(!VM_Version::supports_bmi2());
12225   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12226   effect(KILL cr);
12227 
12228   format %{ "shrl    $dst, $shift" %}
12229   ins_encode %{
12230     __ shrl($dst$$Address);
12231   %}
12232   ins_pipe(ialu_mem_reg);
12233 %}
12234 
12235 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12236 %{
12237   predicate(VM_Version::supports_bmi2());
12238   match(Set dst (URShiftI src shift));
12239 
12240   format %{ "shrxl   $dst, $src, $shift" %}
12241   ins_encode %{
12242     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12243   %}
12244   ins_pipe(ialu_reg_reg);
12245 %}
12246 
12247 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12248 %{
12249   predicate(VM_Version::supports_bmi2());
12250   match(Set dst (URShiftI (LoadI src) shift));
12251   ins_cost(175);
12252   format %{ "shrxl   $dst, $src, $shift" %}
12253   ins_encode %{
12254     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12255   %}
12256   ins_pipe(ialu_reg_mem);
12257 %}
12258 
12259 // Long Shift Instructions
12260 // Shift Left by one, two, three
12261 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12262 %{
12263   predicate(!UseAPX);
12264   match(Set dst (LShiftL dst shift));
12265   effect(KILL cr);
12266 
12267   format %{ "salq    $dst, $shift" %}
12268   ins_encode %{
12269     __ salq($dst$$Register, $shift$$constant);
12270   %}
12271   ins_pipe(ialu_reg);
12272 %}
12273 
12274 // Shift Left by one, two, three
12275 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12276 %{
12277   predicate(UseAPX);
12278   match(Set dst (LShiftL src shift));
12279   effect(KILL cr);
12280   flag(PD::Flag_ndd_demotable_opr1);
12281 
12282   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12283   ins_encode %{
12284     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12285   %}
12286   ins_pipe(ialu_reg);
12287 %}
12288 
12289 // Shift Left by 8-bit immediate
12290 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12291 %{
12292   predicate(!UseAPX);
12293   match(Set dst (LShiftL dst shift));
12294   effect(KILL cr);
12295 
12296   format %{ "salq    $dst, $shift" %}
12297   ins_encode %{
12298     __ salq($dst$$Register, $shift$$constant);
12299   %}
12300   ins_pipe(ialu_reg);
12301 %}
12302 
12303 // Shift Left by 8-bit immediate
12304 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12305 %{
12306   predicate(UseAPX);
12307   match(Set dst (LShiftL src shift));
12308   effect(KILL cr);
12309   flag(PD::Flag_ndd_demotable_opr1);
12310 
12311   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12312   ins_encode %{
12313     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12314   %}
12315   ins_pipe(ialu_reg);
12316 %}
12317 
12318 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12319 %{
12320   predicate(UseAPX);
12321   match(Set dst (LShiftL (LoadL src) shift));
12322   effect(KILL cr);
12323 
12324   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12325   ins_encode %{
12326     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12327   %}
12328   ins_pipe(ialu_reg);
12329 %}
12330 
12331 // Shift Left by 8-bit immediate
12332 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12333 %{
12334   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12335   effect(KILL cr);
12336 
12337   format %{ "salq    $dst, $shift" %}
12338   ins_encode %{
12339     __ salq($dst$$Address, $shift$$constant);
12340   %}
12341   ins_pipe(ialu_mem_imm);
12342 %}
12343 
12344 // Shift Left by variable
12345 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12346 %{
12347   predicate(!VM_Version::supports_bmi2());
12348   match(Set dst (LShiftL dst shift));
12349   effect(KILL cr);
12350 
12351   format %{ "salq    $dst, $shift" %}
12352   ins_encode %{
12353     __ salq($dst$$Register);
12354   %}
12355   ins_pipe(ialu_reg_reg);
12356 %}
12357 
12358 // Shift Left by variable
12359 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12360 %{
12361   predicate(!VM_Version::supports_bmi2());
12362   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12363   effect(KILL cr);
12364 
12365   format %{ "salq    $dst, $shift" %}
12366   ins_encode %{
12367     __ salq($dst$$Address);
12368   %}
12369   ins_pipe(ialu_mem_reg);
12370 %}
12371 
12372 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12373 %{
12374   predicate(VM_Version::supports_bmi2());
12375   match(Set dst (LShiftL src shift));
12376 
12377   format %{ "shlxq   $dst, $src, $shift" %}
12378   ins_encode %{
12379     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12380   %}
12381   ins_pipe(ialu_reg_reg);
12382 %}
12383 
12384 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12385 %{
12386   predicate(VM_Version::supports_bmi2());
12387   match(Set dst (LShiftL (LoadL src) shift));
12388   ins_cost(175);
12389   format %{ "shlxq   $dst, $src, $shift" %}
12390   ins_encode %{
12391     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12392   %}
12393   ins_pipe(ialu_reg_mem);
12394 %}
12395 
12396 // Arithmetic Shift Right by 8-bit immediate
12397 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12398 %{
12399   predicate(!UseAPX);
12400   match(Set dst (RShiftL dst shift));
12401   effect(KILL cr);
12402 
12403   format %{ "sarq    $dst, $shift" %}
12404   ins_encode %{
12405     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12406   %}
12407   ins_pipe(ialu_mem_imm);
12408 %}
12409 
12410 // Arithmetic Shift Right by 8-bit immediate
12411 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12412 %{
12413   predicate(UseAPX);
12414   match(Set dst (RShiftL src shift));
12415   effect(KILL cr);
12416   flag(PD::Flag_ndd_demotable_opr1);
12417 
12418   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12419   ins_encode %{
12420     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12421   %}
12422   ins_pipe(ialu_mem_imm);
12423 %}
12424 
12425 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12426 %{
12427   predicate(UseAPX);
12428   match(Set dst (RShiftL (LoadL src) shift));
12429   effect(KILL cr);
12430 
12431   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12432   ins_encode %{
12433     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12434   %}
12435   ins_pipe(ialu_mem_imm);
12436 %}
12437 
12438 // Arithmetic Shift Right by 8-bit immediate
12439 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12440 %{
12441   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12442   effect(KILL cr);
12443 
12444   format %{ "sarq    $dst, $shift" %}
12445   ins_encode %{
12446     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12447   %}
12448   ins_pipe(ialu_mem_imm);
12449 %}
12450 
12451 // Arithmetic Shift Right by variable
12452 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12453 %{
12454   predicate(!VM_Version::supports_bmi2());
12455   match(Set dst (RShiftL dst shift));
12456   effect(KILL cr);
12457 
12458   format %{ "sarq    $dst, $shift" %}
12459   ins_encode %{
12460     __ sarq($dst$$Register);
12461   %}
12462   ins_pipe(ialu_reg_reg);
12463 %}
12464 
12465 // Arithmetic Shift Right by variable
12466 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12467 %{
12468   predicate(!VM_Version::supports_bmi2());
12469   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12470   effect(KILL cr);
12471 
12472   format %{ "sarq    $dst, $shift" %}
12473   ins_encode %{
12474     __ sarq($dst$$Address);
12475   %}
12476   ins_pipe(ialu_mem_reg);
12477 %}
12478 
12479 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12480 %{
12481   predicate(VM_Version::supports_bmi2());
12482   match(Set dst (RShiftL src shift));
12483 
12484   format %{ "sarxq   $dst, $src, $shift" %}
12485   ins_encode %{
12486     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12487   %}
12488   ins_pipe(ialu_reg_reg);
12489 %}
12490 
12491 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12492 %{
12493   predicate(VM_Version::supports_bmi2());
12494   match(Set dst (RShiftL (LoadL src) shift));
12495   ins_cost(175);
12496   format %{ "sarxq   $dst, $src, $shift" %}
12497   ins_encode %{
12498     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12499   %}
12500   ins_pipe(ialu_reg_mem);
12501 %}
12502 
12503 // Logical Shift Right by 8-bit immediate
12504 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12505 %{
12506   predicate(!UseAPX);
12507   match(Set dst (URShiftL dst shift));
12508   effect(KILL cr);
12509 
12510   format %{ "shrq    $dst, $shift" %}
12511   ins_encode %{
12512     __ shrq($dst$$Register, $shift$$constant);
12513   %}
12514   ins_pipe(ialu_reg);
12515 %}
12516 
12517 // Logical Shift Right by 8-bit immediate
12518 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12519 %{
12520   predicate(UseAPX);
12521   match(Set dst (URShiftL src shift));
12522   effect(KILL cr);
12523   flag(PD::Flag_ndd_demotable_opr1);
12524 
12525   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12526   ins_encode %{
12527     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12528   %}
12529   ins_pipe(ialu_reg);
12530 %}
12531 
12532 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12533 %{
12534   predicate(UseAPX);
12535   match(Set dst (URShiftL (LoadL src) shift));
12536   effect(KILL cr);
12537 
12538   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12539   ins_encode %{
12540     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12541   %}
12542   ins_pipe(ialu_reg);
12543 %}
12544 
12545 // Logical Shift Right by 8-bit immediate
12546 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12547 %{
12548   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12549   effect(KILL cr);
12550 
12551   format %{ "shrq    $dst, $shift" %}
12552   ins_encode %{
12553     __ shrq($dst$$Address, $shift$$constant);
12554   %}
12555   ins_pipe(ialu_mem_imm);
12556 %}
12557 
12558 // Logical Shift Right by variable
12559 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12560 %{
12561   predicate(!VM_Version::supports_bmi2());
12562   match(Set dst (URShiftL dst shift));
12563   effect(KILL cr);
12564 
12565   format %{ "shrq    $dst, $shift" %}
12566   ins_encode %{
12567     __ shrq($dst$$Register);
12568   %}
12569   ins_pipe(ialu_reg_reg);
12570 %}
12571 
12572 // Logical Shift Right by variable
12573 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12574 %{
12575   predicate(!VM_Version::supports_bmi2());
12576   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12577   effect(KILL cr);
12578 
12579   format %{ "shrq    $dst, $shift" %}
12580   ins_encode %{
12581     __ shrq($dst$$Address);
12582   %}
12583   ins_pipe(ialu_mem_reg);
12584 %}
12585 
12586 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12587 %{
12588   predicate(VM_Version::supports_bmi2());
12589   match(Set dst (URShiftL src shift));
12590 
12591   format %{ "shrxq   $dst, $src, $shift" %}
12592   ins_encode %{
12593     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12594   %}
12595   ins_pipe(ialu_reg_reg);
12596 %}
12597 
12598 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12599 %{
12600   predicate(VM_Version::supports_bmi2());
12601   match(Set dst (URShiftL (LoadL src) shift));
12602   ins_cost(175);
12603   format %{ "shrxq   $dst, $src, $shift" %}
12604   ins_encode %{
12605     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12606   %}
12607   ins_pipe(ialu_reg_mem);
12608 %}
12609 
12610 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12611 // This idiom is used by the compiler for the i2b bytecode.
12612 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12613 %{
12614   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12615 
12616   format %{ "movsbl  $dst, $src\t# i2b" %}
12617   ins_encode %{
12618     __ movsbl($dst$$Register, $src$$Register);
12619   %}
12620   ins_pipe(ialu_reg_reg);
12621 %}
12622 
12623 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12624 // This idiom is used by the compiler the i2s bytecode.
12625 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12626 %{
12627   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12628 
12629   format %{ "movswl  $dst, $src\t# i2s" %}
12630   ins_encode %{
12631     __ movswl($dst$$Register, $src$$Register);
12632   %}
12633   ins_pipe(ialu_reg_reg);
12634 %}
12635 
12636 // ROL/ROR instructions
12637 
12638 // Rotate left by constant.
12639 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12640 %{
12641   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12642   match(Set dst (RotateLeft dst shift));
12643   effect(KILL cr);
12644   format %{ "roll    $dst, $shift" %}
12645   ins_encode %{
12646     __ roll($dst$$Register, $shift$$constant);
12647   %}
12648   ins_pipe(ialu_reg);
12649 %}
12650 
12651 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12652 %{
12653   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12654   match(Set dst (RotateLeft src shift));
12655   format %{ "rolxl   $dst, $src, $shift" %}
12656   ins_encode %{
12657     int shift = 32 - ($shift$$constant & 31);
12658     __ rorxl($dst$$Register, $src$$Register, shift);
12659   %}
12660   ins_pipe(ialu_reg_reg);
12661 %}
12662 
12663 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12664 %{
12665   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12666   match(Set dst (RotateLeft (LoadI src) shift));
12667   ins_cost(175);
12668   format %{ "rolxl   $dst, $src, $shift" %}
12669   ins_encode %{
12670     int shift = 32 - ($shift$$constant & 31);
12671     __ rorxl($dst$$Register, $src$$Address, shift);
12672   %}
12673   ins_pipe(ialu_reg_mem);
12674 %}
12675 
12676 // Rotate Left by variable
12677 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12678 %{
12679   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12680   match(Set dst (RotateLeft dst shift));
12681   effect(KILL cr);
12682   format %{ "roll    $dst, $shift" %}
12683   ins_encode %{
12684     __ roll($dst$$Register);
12685   %}
12686   ins_pipe(ialu_reg_reg);
12687 %}
12688 
12689 // Rotate Left by variable
12690 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12691 %{
12692   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12693   match(Set dst (RotateLeft src shift));
12694   effect(KILL cr);
12695   flag(PD::Flag_ndd_demotable_opr1);
12696 
12697   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12698   ins_encode %{
12699     __ eroll($dst$$Register, $src$$Register, false);
12700   %}
12701   ins_pipe(ialu_reg_reg);
12702 %}
12703 
12704 // Rotate Right by constant.
12705 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12706 %{
12707   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12708   match(Set dst (RotateRight dst shift));
12709   effect(KILL cr);
12710   format %{ "rorl    $dst, $shift" %}
12711   ins_encode %{
12712     __ rorl($dst$$Register, $shift$$constant);
12713   %}
12714   ins_pipe(ialu_reg);
12715 %}
12716 
12717 // Rotate Right by constant.
12718 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12719 %{
12720   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12721   match(Set dst (RotateRight src shift));
12722   format %{ "rorxl   $dst, $src, $shift" %}
12723   ins_encode %{
12724     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12725   %}
12726   ins_pipe(ialu_reg_reg);
12727 %}
12728 
12729 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12730 %{
12731   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12732   match(Set dst (RotateRight (LoadI src) shift));
12733   ins_cost(175);
12734   format %{ "rorxl   $dst, $src, $shift" %}
12735   ins_encode %{
12736     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12737   %}
12738   ins_pipe(ialu_reg_mem);
12739 %}
12740 
12741 // Rotate Right by variable
12742 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12743 %{
12744   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12745   match(Set dst (RotateRight dst shift));
12746   effect(KILL cr);
12747   format %{ "rorl    $dst, $shift" %}
12748   ins_encode %{
12749     __ rorl($dst$$Register);
12750   %}
12751   ins_pipe(ialu_reg_reg);
12752 %}
12753 
12754 // Rotate Right by variable
12755 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12756 %{
12757   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12758   match(Set dst (RotateRight src shift));
12759   effect(KILL cr);
12760   flag(PD::Flag_ndd_demotable_opr1);
12761 
12762   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12763   ins_encode %{
12764     __ erorl($dst$$Register, $src$$Register, false);
12765   %}
12766   ins_pipe(ialu_reg_reg);
12767 %}
12768 
12769 // Rotate Left by constant.
12770 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12771 %{
12772   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12773   match(Set dst (RotateLeft dst shift));
12774   effect(KILL cr);
12775   format %{ "rolq    $dst, $shift" %}
12776   ins_encode %{
12777     __ rolq($dst$$Register, $shift$$constant);
12778   %}
12779   ins_pipe(ialu_reg);
12780 %}
12781 
12782 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12783 %{
12784   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12785   match(Set dst (RotateLeft src shift));
12786   format %{ "rolxq   $dst, $src, $shift" %}
12787   ins_encode %{
12788     int shift = 64 - ($shift$$constant & 63);
12789     __ rorxq($dst$$Register, $src$$Register, shift);
12790   %}
12791   ins_pipe(ialu_reg_reg);
12792 %}
12793 
12794 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12795 %{
12796   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12797   match(Set dst (RotateLeft (LoadL src) shift));
12798   ins_cost(175);
12799   format %{ "rolxq   $dst, $src, $shift" %}
12800   ins_encode %{
12801     int shift = 64 - ($shift$$constant & 63);
12802     __ rorxq($dst$$Register, $src$$Address, shift);
12803   %}
12804   ins_pipe(ialu_reg_mem);
12805 %}
12806 
12807 // Rotate Left by variable
12808 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12809 %{
12810   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12811   match(Set dst (RotateLeft dst shift));
12812   effect(KILL cr);
12813 
12814   format %{ "rolq    $dst, $shift" %}
12815   ins_encode %{
12816     __ rolq($dst$$Register);
12817   %}
12818   ins_pipe(ialu_reg_reg);
12819 %}
12820 
12821 // Rotate Left by variable
12822 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12823 %{
12824   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12825   match(Set dst (RotateLeft src shift));
12826   effect(KILL cr);
12827   flag(PD::Flag_ndd_demotable_opr1);
12828 
12829   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12830   ins_encode %{
12831     __ erolq($dst$$Register, $src$$Register, false);
12832   %}
12833   ins_pipe(ialu_reg_reg);
12834 %}
12835 
12836 // Rotate Right by constant.
12837 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12838 %{
12839   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12840   match(Set dst (RotateRight dst shift));
12841   effect(KILL cr);
12842   format %{ "rorq    $dst, $shift" %}
12843   ins_encode %{
12844     __ rorq($dst$$Register, $shift$$constant);
12845   %}
12846   ins_pipe(ialu_reg);
12847 %}
12848 
12849 // Rotate Right by constant
12850 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12851 %{
12852   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12853   match(Set dst (RotateRight src shift));
12854   format %{ "rorxq   $dst, $src, $shift" %}
12855   ins_encode %{
12856     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12857   %}
12858   ins_pipe(ialu_reg_reg);
12859 %}
12860 
12861 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12862 %{
12863   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12864   match(Set dst (RotateRight (LoadL src) shift));
12865   ins_cost(175);
12866   format %{ "rorxq   $dst, $src, $shift" %}
12867   ins_encode %{
12868     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12869   %}
12870   ins_pipe(ialu_reg_mem);
12871 %}
12872 
12873 // Rotate Right by variable
12874 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12875 %{
12876   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12877   match(Set dst (RotateRight dst shift));
12878   effect(KILL cr);
12879   format %{ "rorq    $dst, $shift" %}
12880   ins_encode %{
12881     __ rorq($dst$$Register);
12882   %}
12883   ins_pipe(ialu_reg_reg);
12884 %}
12885 
12886 // Rotate Right by variable
12887 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12888 %{
12889   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12890   match(Set dst (RotateRight src shift));
12891   effect(KILL cr);
12892   flag(PD::Flag_ndd_demotable_opr1);
12893 
12894   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12895   ins_encode %{
12896     __ erorq($dst$$Register, $src$$Register, false);
12897   %}
12898   ins_pipe(ialu_reg_reg);
12899 %}
12900 
12901 //----------------------------- CompressBits/ExpandBits ------------------------
12902 
12903 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12904   predicate(n->bottom_type()->isa_long());
12905   match(Set dst (CompressBits src mask));
12906   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12907   ins_encode %{
12908     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12909   %}
12910   ins_pipe( pipe_slow );
12911 %}
12912 
12913 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12914   predicate(n->bottom_type()->isa_long());
12915   match(Set dst (ExpandBits src mask));
12916   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12917   ins_encode %{
12918     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12919   %}
12920   ins_pipe( pipe_slow );
12921 %}
12922 
12923 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12924   predicate(n->bottom_type()->isa_long());
12925   match(Set dst (CompressBits src (LoadL mask)));
12926   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12927   ins_encode %{
12928     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12929   %}
12930   ins_pipe( pipe_slow );
12931 %}
12932 
12933 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12934   predicate(n->bottom_type()->isa_long());
12935   match(Set dst (ExpandBits src (LoadL mask)));
12936   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12937   ins_encode %{
12938     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12939   %}
12940   ins_pipe( pipe_slow );
12941 %}
12942 
12943 
12944 // Logical Instructions
12945 
12946 // Integer Logical Instructions
12947 
12948 // And Instructions
12949 // And Register with Register
12950 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12951 %{
12952   predicate(!UseAPX);
12953   match(Set dst (AndI dst src));
12954   effect(KILL cr);
12955   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12956 
12957   format %{ "andl    $dst, $src\t# int" %}
12958   ins_encode %{
12959     __ andl($dst$$Register, $src$$Register);
12960   %}
12961   ins_pipe(ialu_reg_reg);
12962 %}
12963 
12964 // And Register with Register using New Data Destination (NDD)
12965 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12966 %{
12967   predicate(UseAPX);
12968   match(Set dst (AndI src1 src2));
12969   effect(KILL cr);
12970   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12971 
12972   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12973   ins_encode %{
12974     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12975 
12976   %}
12977   ins_pipe(ialu_reg_reg);
12978 %}
12979 
12980 // And Register with Immediate 255
12981 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12982 %{
12983   match(Set dst (AndI src mask));
12984 
12985   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12986   ins_encode %{
12987     __ movzbl($dst$$Register, $src$$Register);
12988   %}
12989   ins_pipe(ialu_reg);
12990 %}
12991 
12992 // And Register with Immediate 255 and promote to long
12993 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12994 %{
12995   match(Set dst (ConvI2L (AndI src mask)));
12996 
12997   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12998   ins_encode %{
12999     __ movzbl($dst$$Register, $src$$Register);
13000   %}
13001   ins_pipe(ialu_reg);
13002 %}
13003 
13004 // And Register with Immediate 65535
13005 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13006 %{
13007   match(Set dst (AndI src mask));
13008 
13009   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13010   ins_encode %{
13011     __ movzwl($dst$$Register, $src$$Register);
13012   %}
13013   ins_pipe(ialu_reg);
13014 %}
13015 
13016 // And Register with Immediate 65535 and promote to long
13017 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13018 %{
13019   match(Set dst (ConvI2L (AndI src mask)));
13020 
13021   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13022   ins_encode %{
13023     __ movzwl($dst$$Register, $src$$Register);
13024   %}
13025   ins_pipe(ialu_reg);
13026 %}
13027 
13028 // Can skip int2long conversions after AND with small bitmask
13029 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13030 %{
13031   predicate(VM_Version::supports_bmi2());
13032   ins_cost(125);
13033   effect(TEMP tmp, KILL cr);
13034   match(Set dst (ConvI2L (AndI src mask)));
13035   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13036   ins_encode %{
13037     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13038     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13039   %}
13040   ins_pipe(ialu_reg_reg);
13041 %}
13042 
13043 // And Register with Immediate
13044 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13045 %{
13046   predicate(!UseAPX);
13047   match(Set dst (AndI dst src));
13048   effect(KILL cr);
13049   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13050 
13051   format %{ "andl    $dst, $src\t# int" %}
13052   ins_encode %{
13053     __ andl($dst$$Register, $src$$constant);
13054   %}
13055   ins_pipe(ialu_reg);
13056 %}
13057 
13058 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13059 %{
13060   predicate(UseAPX);
13061   match(Set dst (AndI src1 src2));
13062   effect(KILL cr);
13063   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13064 
13065   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13066   ins_encode %{
13067     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13068   %}
13069   ins_pipe(ialu_reg);
13070 %}
13071 
13072 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13073 %{
13074   predicate(UseAPX);
13075   match(Set dst (AndI (LoadI src1) src2));
13076   effect(KILL cr);
13077   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13078 
13079   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13080   ins_encode %{
13081     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13082   %}
13083   ins_pipe(ialu_reg);
13084 %}
13085 
13086 // And Register with Memory
13087 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13088 %{
13089   predicate(!UseAPX);
13090   match(Set dst (AndI dst (LoadI src)));
13091   effect(KILL cr);
13092   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13093 
13094   ins_cost(150);
13095   format %{ "andl    $dst, $src\t# int" %}
13096   ins_encode %{
13097     __ andl($dst$$Register, $src$$Address);
13098   %}
13099   ins_pipe(ialu_reg_mem);
13100 %}
13101 
13102 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13103 %{
13104   predicate(UseAPX);
13105   match(Set dst (AndI src1 (LoadI src2)));
13106   effect(KILL cr);
13107   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13108 
13109   ins_cost(150);
13110   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13111   ins_encode %{
13112     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13113   %}
13114   ins_pipe(ialu_reg_mem);
13115 %}
13116 
13117 // And Memory with Register
13118 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13119 %{
13120   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13121   effect(KILL cr);
13122   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13123 
13124   ins_cost(150);
13125   format %{ "andb    $dst, $src\t# byte" %}
13126   ins_encode %{
13127     __ andb($dst$$Address, $src$$Register);
13128   %}
13129   ins_pipe(ialu_mem_reg);
13130 %}
13131 
13132 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13133 %{
13134   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13135   effect(KILL cr);
13136   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13137 
13138   ins_cost(150);
13139   format %{ "andl    $dst, $src\t# int" %}
13140   ins_encode %{
13141     __ andl($dst$$Address, $src$$Register);
13142   %}
13143   ins_pipe(ialu_mem_reg);
13144 %}
13145 
13146 // And Memory with Immediate
13147 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13148 %{
13149   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13150   effect(KILL cr);
13151   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13152 
13153   ins_cost(125);
13154   format %{ "andl    $dst, $src\t# int" %}
13155   ins_encode %{
13156     __ andl($dst$$Address, $src$$constant);
13157   %}
13158   ins_pipe(ialu_mem_imm);
13159 %}
13160 
13161 // BMI1 instructions
13162 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13163   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13164   predicate(UseBMI1Instructions);
13165   effect(KILL cr);
13166   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13167 
13168   ins_cost(125);
13169   format %{ "andnl  $dst, $src1, $src2" %}
13170 
13171   ins_encode %{
13172     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13173   %}
13174   ins_pipe(ialu_reg_mem);
13175 %}
13176 
13177 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13178   match(Set dst (AndI (XorI src1 minus_1) src2));
13179   predicate(UseBMI1Instructions);
13180   effect(KILL cr);
13181   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182 
13183   format %{ "andnl  $dst, $src1, $src2" %}
13184 
13185   ins_encode %{
13186     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13187   %}
13188   ins_pipe(ialu_reg);
13189 %}
13190 
13191 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13192   match(Set dst (AndI (SubI imm_zero src) src));
13193   predicate(UseBMI1Instructions);
13194   effect(KILL cr);
13195   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13196 
13197   format %{ "blsil  $dst, $src" %}
13198 
13199   ins_encode %{
13200     __ blsil($dst$$Register, $src$$Register);
13201   %}
13202   ins_pipe(ialu_reg);
13203 %}
13204 
13205 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13206   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13207   predicate(UseBMI1Instructions);
13208   effect(KILL cr);
13209   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13210 
13211   ins_cost(125);
13212   format %{ "blsil  $dst, $src" %}
13213 
13214   ins_encode %{
13215     __ blsil($dst$$Register, $src$$Address);
13216   %}
13217   ins_pipe(ialu_reg_mem);
13218 %}
13219 
13220 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13221 %{
13222   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13223   predicate(UseBMI1Instructions);
13224   effect(KILL cr);
13225   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13226 
13227   ins_cost(125);
13228   format %{ "blsmskl $dst, $src" %}
13229 
13230   ins_encode %{
13231     __ blsmskl($dst$$Register, $src$$Address);
13232   %}
13233   ins_pipe(ialu_reg_mem);
13234 %}
13235 
13236 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13237 %{
13238   match(Set dst (XorI (AddI src minus_1) src));
13239   predicate(UseBMI1Instructions);
13240   effect(KILL cr);
13241   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13242 
13243   format %{ "blsmskl $dst, $src" %}
13244 
13245   ins_encode %{
13246     __ blsmskl($dst$$Register, $src$$Register);
13247   %}
13248 
13249   ins_pipe(ialu_reg);
13250 %}
13251 
13252 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13253 %{
13254   match(Set dst (AndI (AddI src minus_1) src) );
13255   predicate(UseBMI1Instructions);
13256   effect(KILL cr);
13257   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13258 
13259   format %{ "blsrl  $dst, $src" %}
13260 
13261   ins_encode %{
13262     __ blsrl($dst$$Register, $src$$Register);
13263   %}
13264 
13265   ins_pipe(ialu_reg_mem);
13266 %}
13267 
13268 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13269 %{
13270   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13271   predicate(UseBMI1Instructions);
13272   effect(KILL cr);
13273   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13274 
13275   ins_cost(125);
13276   format %{ "blsrl  $dst, $src" %}
13277 
13278   ins_encode %{
13279     __ blsrl($dst$$Register, $src$$Address);
13280   %}
13281 
13282   ins_pipe(ialu_reg);
13283 %}
13284 
13285 // Or Instructions
13286 // Or Register with Register
13287 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13288 %{
13289   predicate(!UseAPX);
13290   match(Set dst (OrI dst src));
13291   effect(KILL cr);
13292   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13293 
13294   format %{ "orl     $dst, $src\t# int" %}
13295   ins_encode %{
13296     __ orl($dst$$Register, $src$$Register);
13297   %}
13298   ins_pipe(ialu_reg_reg);
13299 %}
13300 
13301 // Or Register with Register using New Data Destination (NDD)
13302 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13303 %{
13304   predicate(UseAPX);
13305   match(Set dst (OrI src1 src2));
13306   effect(KILL cr);
13307   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13308 
13309   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13310   ins_encode %{
13311     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13312   %}
13313   ins_pipe(ialu_reg_reg);
13314 %}
13315 
13316 // Or Register with Immediate
13317 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13318 %{
13319   predicate(!UseAPX);
13320   match(Set dst (OrI dst src));
13321   effect(KILL cr);
13322   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13323 
13324   format %{ "orl     $dst, $src\t# int" %}
13325   ins_encode %{
13326     __ orl($dst$$Register, $src$$constant);
13327   %}
13328   ins_pipe(ialu_reg);
13329 %}
13330 
13331 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13332 %{
13333   predicate(UseAPX);
13334   match(Set dst (OrI src1 src2));
13335   effect(KILL cr);
13336   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13337 
13338   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13339   ins_encode %{
13340     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13341   %}
13342   ins_pipe(ialu_reg);
13343 %}
13344 
13345 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13346 %{
13347   predicate(UseAPX);
13348   match(Set dst (OrI src1 src2));
13349   effect(KILL cr);
13350   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13351 
13352   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13353   ins_encode %{
13354     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13355   %}
13356   ins_pipe(ialu_reg);
13357 %}
13358 
13359 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13360 %{
13361   predicate(UseAPX);
13362   match(Set dst (OrI (LoadI src1) src2));
13363   effect(KILL cr);
13364   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13365 
13366   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13367   ins_encode %{
13368     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13369   %}
13370   ins_pipe(ialu_reg);
13371 %}
13372 
13373 // Or Register with Memory
13374 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13375 %{
13376   predicate(!UseAPX);
13377   match(Set dst (OrI dst (LoadI src)));
13378   effect(KILL cr);
13379   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13380 
13381   ins_cost(150);
13382   format %{ "orl     $dst, $src\t# int" %}
13383   ins_encode %{
13384     __ orl($dst$$Register, $src$$Address);
13385   %}
13386   ins_pipe(ialu_reg_mem);
13387 %}
13388 
13389 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13390 %{
13391   predicate(UseAPX);
13392   match(Set dst (OrI src1 (LoadI src2)));
13393   effect(KILL cr);
13394   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13395 
13396   ins_cost(150);
13397   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13398   ins_encode %{
13399     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13400   %}
13401   ins_pipe(ialu_reg_mem);
13402 %}
13403 
13404 // Or Memory with Register
13405 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13406 %{
13407   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13408   effect(KILL cr);
13409   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13410 
13411   ins_cost(150);
13412   format %{ "orb    $dst, $src\t# byte" %}
13413   ins_encode %{
13414     __ orb($dst$$Address, $src$$Register);
13415   %}
13416   ins_pipe(ialu_mem_reg);
13417 %}
13418 
13419 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13420 %{
13421   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13422   effect(KILL cr);
13423   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424 
13425   ins_cost(150);
13426   format %{ "orl     $dst, $src\t# int" %}
13427   ins_encode %{
13428     __ orl($dst$$Address, $src$$Register);
13429   %}
13430   ins_pipe(ialu_mem_reg);
13431 %}
13432 
13433 // Or Memory with Immediate
13434 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13435 %{
13436   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13437   effect(KILL cr);
13438   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439 
13440   ins_cost(125);
13441   format %{ "orl     $dst, $src\t# int" %}
13442   ins_encode %{
13443     __ orl($dst$$Address, $src$$constant);
13444   %}
13445   ins_pipe(ialu_mem_imm);
13446 %}
13447 
13448 // Xor Instructions
13449 // Xor Register with Register
13450 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13451 %{
13452   predicate(!UseAPX);
13453   match(Set dst (XorI dst src));
13454   effect(KILL cr);
13455   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456 
13457   format %{ "xorl    $dst, $src\t# int" %}
13458   ins_encode %{
13459     __ xorl($dst$$Register, $src$$Register);
13460   %}
13461   ins_pipe(ialu_reg_reg);
13462 %}
13463 
13464 // Xor Register with Register using New Data Destination (NDD)
13465 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13466 %{
13467   predicate(UseAPX);
13468   match(Set dst (XorI src1 src2));
13469   effect(KILL cr);
13470   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13471 
13472   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13473   ins_encode %{
13474     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13475   %}
13476   ins_pipe(ialu_reg_reg);
13477 %}
13478 
13479 // Xor Register with Immediate -1
13480 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13481 %{
13482   predicate(!UseAPX);
13483   match(Set dst (XorI dst imm));
13484 
13485   format %{ "notl    $dst" %}
13486   ins_encode %{
13487      __ notl($dst$$Register);
13488   %}
13489   ins_pipe(ialu_reg);
13490 %}
13491 
13492 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13493 %{
13494   match(Set dst (XorI src imm));
13495   predicate(UseAPX);
13496   flag(PD::Flag_ndd_demotable_opr1);
13497 
13498   format %{ "enotl    $dst, $src" %}
13499   ins_encode %{
13500      __ enotl($dst$$Register, $src$$Register);
13501   %}
13502   ins_pipe(ialu_reg);
13503 %}
13504 
13505 // Xor Register with Immediate
13506 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13507 %{
13508   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13509   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13510   match(Set dst (XorI dst src));
13511   effect(KILL cr);
13512   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13513 
13514   format %{ "xorl    $dst, $src\t# int" %}
13515   ins_encode %{
13516     __ xorl($dst$$Register, $src$$constant);
13517   %}
13518   ins_pipe(ialu_reg);
13519 %}
13520 
13521 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13522 %{
13523   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13524   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13525   match(Set dst (XorI src1 src2));
13526   effect(KILL cr);
13527   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13528 
13529   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13530   ins_encode %{
13531     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13532   %}
13533   ins_pipe(ialu_reg);
13534 %}
13535 
13536 // Xor Memory with Immediate
13537 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13538 %{
13539   predicate(UseAPX);
13540   match(Set dst (XorI (LoadI src1) src2));
13541   effect(KILL cr);
13542   ins_cost(150);
13543   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13544 
13545   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13546   ins_encode %{
13547     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13548   %}
13549   ins_pipe(ialu_reg);
13550 %}
13551 
13552 // Xor Register with Memory
13553 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13554 %{
13555   predicate(!UseAPX);
13556   match(Set dst (XorI dst (LoadI src)));
13557   effect(KILL cr);
13558   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13559 
13560   ins_cost(150);
13561   format %{ "xorl    $dst, $src\t# int" %}
13562   ins_encode %{
13563     __ xorl($dst$$Register, $src$$Address);
13564   %}
13565   ins_pipe(ialu_reg_mem);
13566 %}
13567 
13568 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13569 %{
13570   predicate(UseAPX);
13571   match(Set dst (XorI src1 (LoadI src2)));
13572   effect(KILL cr);
13573   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13574 
13575   ins_cost(150);
13576   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13577   ins_encode %{
13578     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13579   %}
13580   ins_pipe(ialu_reg_mem);
13581 %}
13582 
13583 // Xor Memory with Register
13584 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13585 %{
13586   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13587   effect(KILL cr);
13588   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13589 
13590   ins_cost(150);
13591   format %{ "xorb    $dst, $src\t# byte" %}
13592   ins_encode %{
13593     __ xorb($dst$$Address, $src$$Register);
13594   %}
13595   ins_pipe(ialu_mem_reg);
13596 %}
13597 
13598 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13599 %{
13600   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13601   effect(KILL cr);
13602   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603 
13604   ins_cost(150);
13605   format %{ "xorl    $dst, $src\t# int" %}
13606   ins_encode %{
13607     __ xorl($dst$$Address, $src$$Register);
13608   %}
13609   ins_pipe(ialu_mem_reg);
13610 %}
13611 
13612 // Xor Memory with Immediate
13613 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13614 %{
13615   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13616   effect(KILL cr);
13617   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13618 
13619   ins_cost(125);
13620   format %{ "xorl    $dst, $src\t# int" %}
13621   ins_encode %{
13622     __ xorl($dst$$Address, $src$$constant);
13623   %}
13624   ins_pipe(ialu_mem_imm);
13625 %}
13626 
13627 
13628 // Long Logical Instructions
13629 
13630 // And Instructions
13631 // And Register with Register
13632 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13633 %{
13634   predicate(!UseAPX);
13635   match(Set dst (AndL dst src));
13636   effect(KILL cr);
13637   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13638 
13639   format %{ "andq    $dst, $src\t# long" %}
13640   ins_encode %{
13641     __ andq($dst$$Register, $src$$Register);
13642   %}
13643   ins_pipe(ialu_reg_reg);
13644 %}
13645 
13646 // And Register with Register using New Data Destination (NDD)
13647 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13648 %{
13649   predicate(UseAPX);
13650   match(Set dst (AndL src1 src2));
13651   effect(KILL cr);
13652   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13653 
13654   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13655   ins_encode %{
13656     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13657 
13658   %}
13659   ins_pipe(ialu_reg_reg);
13660 %}
13661 
13662 // And Register with Immediate 255
13663 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13664 %{
13665   match(Set dst (AndL src mask));
13666 
13667   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13668   ins_encode %{
13669     // movzbl zeroes out the upper 32-bit and does not need REX.W
13670     __ movzbl($dst$$Register, $src$$Register);
13671   %}
13672   ins_pipe(ialu_reg);
13673 %}
13674 
13675 // And Register with Immediate 65535
13676 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13677 %{
13678   match(Set dst (AndL src mask));
13679 
13680   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13681   ins_encode %{
13682     // movzwl zeroes out the upper 32-bit and does not need REX.W
13683     __ movzwl($dst$$Register, $src$$Register);
13684   %}
13685   ins_pipe(ialu_reg);
13686 %}
13687 
13688 // And Register with Immediate
13689 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13690 %{
13691   predicate(!UseAPX);
13692   match(Set dst (AndL dst src));
13693   effect(KILL cr);
13694   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13695 
13696   format %{ "andq    $dst, $src\t# long" %}
13697   ins_encode %{
13698     __ andq($dst$$Register, $src$$constant);
13699   %}
13700   ins_pipe(ialu_reg);
13701 %}
13702 
13703 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13704 %{
13705   predicate(UseAPX);
13706   match(Set dst (AndL src1 src2));
13707   effect(KILL cr);
13708   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13709 
13710   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13711   ins_encode %{
13712     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13713   %}
13714   ins_pipe(ialu_reg);
13715 %}
13716 
13717 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13718 %{
13719   predicate(UseAPX);
13720   match(Set dst (AndL (LoadL src1) src2));
13721   effect(KILL cr);
13722   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13723 
13724   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13725   ins_encode %{
13726     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13727   %}
13728   ins_pipe(ialu_reg);
13729 %}
13730 
13731 // And Register with Memory
13732 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13733 %{
13734   predicate(!UseAPX);
13735   match(Set dst (AndL dst (LoadL src)));
13736   effect(KILL cr);
13737   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13738 
13739   ins_cost(150);
13740   format %{ "andq    $dst, $src\t# long" %}
13741   ins_encode %{
13742     __ andq($dst$$Register, $src$$Address);
13743   %}
13744   ins_pipe(ialu_reg_mem);
13745 %}
13746 
13747 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13748 %{
13749   predicate(UseAPX);
13750   match(Set dst (AndL src1 (LoadL src2)));
13751   effect(KILL cr);
13752   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13753 
13754   ins_cost(150);
13755   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13756   ins_encode %{
13757     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13758   %}
13759   ins_pipe(ialu_reg_mem);
13760 %}
13761 
13762 // And Memory with Register
13763 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13764 %{
13765   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13766   effect(KILL cr);
13767   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13768 
13769   ins_cost(150);
13770   format %{ "andq    $dst, $src\t# long" %}
13771   ins_encode %{
13772     __ andq($dst$$Address, $src$$Register);
13773   %}
13774   ins_pipe(ialu_mem_reg);
13775 %}
13776 
13777 // And Memory with Immediate
13778 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13779 %{
13780   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13781   effect(KILL cr);
13782   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13783 
13784   ins_cost(125);
13785   format %{ "andq    $dst, $src\t# long" %}
13786   ins_encode %{
13787     __ andq($dst$$Address, $src$$constant);
13788   %}
13789   ins_pipe(ialu_mem_imm);
13790 %}
13791 
13792 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13793 %{
13794   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13795   // because AND/OR works well enough for 8/32-bit values.
13796   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13797 
13798   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13799   effect(KILL cr);
13800 
13801   ins_cost(125);
13802   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13803   ins_encode %{
13804     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13805   %}
13806   ins_pipe(ialu_mem_imm);
13807 %}
13808 
13809 // BMI1 instructions
13810 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13811   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13812   predicate(UseBMI1Instructions);
13813   effect(KILL cr);
13814   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13815 
13816   ins_cost(125);
13817   format %{ "andnq  $dst, $src1, $src2" %}
13818 
13819   ins_encode %{
13820     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13821   %}
13822   ins_pipe(ialu_reg_mem);
13823 %}
13824 
13825 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13826   match(Set dst (AndL (XorL src1 minus_1) src2));
13827   predicate(UseBMI1Instructions);
13828   effect(KILL cr);
13829   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13830 
13831   format %{ "andnq  $dst, $src1, $src2" %}
13832 
13833   ins_encode %{
13834   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13835   %}
13836   ins_pipe(ialu_reg_mem);
13837 %}
13838 
13839 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13840   match(Set dst (AndL (SubL imm_zero src) src));
13841   predicate(UseBMI1Instructions);
13842   effect(KILL cr);
13843   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13844 
13845   format %{ "blsiq  $dst, $src" %}
13846 
13847   ins_encode %{
13848     __ blsiq($dst$$Register, $src$$Register);
13849   %}
13850   ins_pipe(ialu_reg);
13851 %}
13852 
13853 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13854   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13855   predicate(UseBMI1Instructions);
13856   effect(KILL cr);
13857   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13858 
13859   ins_cost(125);
13860   format %{ "blsiq  $dst, $src" %}
13861 
13862   ins_encode %{
13863     __ blsiq($dst$$Register, $src$$Address);
13864   %}
13865   ins_pipe(ialu_reg_mem);
13866 %}
13867 
13868 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13869 %{
13870   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13871   predicate(UseBMI1Instructions);
13872   effect(KILL cr);
13873   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13874 
13875   ins_cost(125);
13876   format %{ "blsmskq $dst, $src" %}
13877 
13878   ins_encode %{
13879     __ blsmskq($dst$$Register, $src$$Address);
13880   %}
13881   ins_pipe(ialu_reg_mem);
13882 %}
13883 
13884 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13885 %{
13886   match(Set dst (XorL (AddL src minus_1) src));
13887   predicate(UseBMI1Instructions);
13888   effect(KILL cr);
13889   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13890 
13891   format %{ "blsmskq $dst, $src" %}
13892 
13893   ins_encode %{
13894     __ blsmskq($dst$$Register, $src$$Register);
13895   %}
13896 
13897   ins_pipe(ialu_reg);
13898 %}
13899 
13900 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13901 %{
13902   match(Set dst (AndL (AddL src minus_1) src) );
13903   predicate(UseBMI1Instructions);
13904   effect(KILL cr);
13905   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13906 
13907   format %{ "blsrq  $dst, $src" %}
13908 
13909   ins_encode %{
13910     __ blsrq($dst$$Register, $src$$Register);
13911   %}
13912 
13913   ins_pipe(ialu_reg);
13914 %}
13915 
13916 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13917 %{
13918   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13919   predicate(UseBMI1Instructions);
13920   effect(KILL cr);
13921   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13922 
13923   ins_cost(125);
13924   format %{ "blsrq  $dst, $src" %}
13925 
13926   ins_encode %{
13927     __ blsrq($dst$$Register, $src$$Address);
13928   %}
13929 
13930   ins_pipe(ialu_reg);
13931 %}
13932 
13933 // Or Instructions
13934 // Or Register with Register
13935 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13936 %{
13937   predicate(!UseAPX);
13938   match(Set dst (OrL dst src));
13939   effect(KILL cr);
13940   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13941 
13942   format %{ "orq     $dst, $src\t# long" %}
13943   ins_encode %{
13944     __ orq($dst$$Register, $src$$Register);
13945   %}
13946   ins_pipe(ialu_reg_reg);
13947 %}
13948 
13949 // Or Register with Register using New Data Destination (NDD)
13950 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13951 %{
13952   predicate(UseAPX);
13953   match(Set dst (OrL src1 src2));
13954   effect(KILL cr);
13955   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13956 
13957   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13958   ins_encode %{
13959     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13960 
13961   %}
13962   ins_pipe(ialu_reg_reg);
13963 %}
13964 
13965 // Use any_RegP to match R15 (TLS register) without spilling.
13966 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13967   match(Set dst (OrL dst (CastP2X src)));
13968   effect(KILL cr);
13969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970 
13971   format %{ "orq     $dst, $src\t# long" %}
13972   ins_encode %{
13973     __ orq($dst$$Register, $src$$Register);
13974   %}
13975   ins_pipe(ialu_reg_reg);
13976 %}
13977 
13978 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13979   match(Set dst (OrL src1 (CastP2X src2)));
13980   effect(KILL cr);
13981   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13982 
13983   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13984   ins_encode %{
13985     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13986   %}
13987   ins_pipe(ialu_reg_reg);
13988 %}
13989 
13990 // Or Register with Immediate
13991 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13992 %{
13993   predicate(!UseAPX);
13994   match(Set dst (OrL dst src));
13995   effect(KILL cr);
13996   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13997 
13998   format %{ "orq     $dst, $src\t# long" %}
13999   ins_encode %{
14000     __ orq($dst$$Register, $src$$constant);
14001   %}
14002   ins_pipe(ialu_reg);
14003 %}
14004 
14005 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14006 %{
14007   predicate(UseAPX);
14008   match(Set dst (OrL src1 src2));
14009   effect(KILL cr);
14010   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14011 
14012   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14013   ins_encode %{
14014     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14015   %}
14016   ins_pipe(ialu_reg);
14017 %}
14018 
14019 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14020 %{
14021   predicate(UseAPX);
14022   match(Set dst (OrL src1 src2));
14023   effect(KILL cr);
14024   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14025 
14026   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14027   ins_encode %{
14028     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14029   %}
14030   ins_pipe(ialu_reg);
14031 %}
14032 
14033 // Or Memory with Immediate
14034 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14035 %{
14036   predicate(UseAPX);
14037   match(Set dst (OrL (LoadL src1) src2));
14038   effect(KILL cr);
14039   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14040 
14041   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14042   ins_encode %{
14043     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14044   %}
14045   ins_pipe(ialu_reg);
14046 %}
14047 
14048 // Or Register with Memory
14049 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14050 %{
14051   predicate(!UseAPX);
14052   match(Set dst (OrL dst (LoadL src)));
14053   effect(KILL cr);
14054   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14055 
14056   ins_cost(150);
14057   format %{ "orq     $dst, $src\t# long" %}
14058   ins_encode %{
14059     __ orq($dst$$Register, $src$$Address);
14060   %}
14061   ins_pipe(ialu_reg_mem);
14062 %}
14063 
14064 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14065 %{
14066   predicate(UseAPX);
14067   match(Set dst (OrL src1 (LoadL src2)));
14068   effect(KILL cr);
14069   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14070 
14071   ins_cost(150);
14072   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14073   ins_encode %{
14074     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14075   %}
14076   ins_pipe(ialu_reg_mem);
14077 %}
14078 
14079 // Or Memory with Register
14080 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14081 %{
14082   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14083   effect(KILL cr);
14084   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14085 
14086   ins_cost(150);
14087   format %{ "orq     $dst, $src\t# long" %}
14088   ins_encode %{
14089     __ orq($dst$$Address, $src$$Register);
14090   %}
14091   ins_pipe(ialu_mem_reg);
14092 %}
14093 
14094 // Or Memory with Immediate
14095 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14096 %{
14097   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14098   effect(KILL cr);
14099   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14100 
14101   ins_cost(125);
14102   format %{ "orq     $dst, $src\t# long" %}
14103   ins_encode %{
14104     __ orq($dst$$Address, $src$$constant);
14105   %}
14106   ins_pipe(ialu_mem_imm);
14107 %}
14108 
14109 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14110 %{
14111   // con should be a pure 64-bit power of 2 immediate
14112   // because AND/OR works well enough for 8/32-bit values.
14113   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14114 
14115   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14116   effect(KILL cr);
14117 
14118   ins_cost(125);
14119   format %{ "btsq    $dst, log2($con)\t# long" %}
14120   ins_encode %{
14121     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14122   %}
14123   ins_pipe(ialu_mem_imm);
14124 %}
14125 
14126 // Xor Instructions
14127 // Xor Register with Register
14128 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14129 %{
14130   predicate(!UseAPX);
14131   match(Set dst (XorL dst src));
14132   effect(KILL cr);
14133   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14134 
14135   format %{ "xorq    $dst, $src\t# long" %}
14136   ins_encode %{
14137     __ xorq($dst$$Register, $src$$Register);
14138   %}
14139   ins_pipe(ialu_reg_reg);
14140 %}
14141 
14142 // Xor Register with Register using New Data Destination (NDD)
14143 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14144 %{
14145   predicate(UseAPX);
14146   match(Set dst (XorL src1 src2));
14147   effect(KILL cr);
14148   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14149 
14150   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14151   ins_encode %{
14152     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14153   %}
14154   ins_pipe(ialu_reg_reg);
14155 %}
14156 
14157 // Xor Register with Immediate -1
14158 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14159 %{
14160   predicate(!UseAPX);
14161   match(Set dst (XorL dst imm));
14162 
14163   format %{ "notq   $dst" %}
14164   ins_encode %{
14165      __ notq($dst$$Register);
14166   %}
14167   ins_pipe(ialu_reg);
14168 %}
14169 
14170 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14171 %{
14172   predicate(UseAPX);
14173   match(Set dst (XorL src imm));
14174   flag(PD::Flag_ndd_demotable_opr1);
14175 
14176   format %{ "enotq   $dst, $src" %}
14177   ins_encode %{
14178     __ enotq($dst$$Register, $src$$Register);
14179   %}
14180   ins_pipe(ialu_reg);
14181 %}
14182 
14183 // Xor Register with Immediate
14184 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14185 %{
14186   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14187   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14188   match(Set dst (XorL dst src));
14189   effect(KILL cr);
14190   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14191 
14192   format %{ "xorq    $dst, $src\t# long" %}
14193   ins_encode %{
14194     __ xorq($dst$$Register, $src$$constant);
14195   %}
14196   ins_pipe(ialu_reg);
14197 %}
14198 
14199 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14200 %{
14201   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14202   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14203   match(Set dst (XorL src1 src2));
14204   effect(KILL cr);
14205   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14206 
14207   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14208   ins_encode %{
14209     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14210   %}
14211   ins_pipe(ialu_reg);
14212 %}
14213 
14214 // Xor Memory with Immediate
14215 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14216 %{
14217   predicate(UseAPX);
14218   match(Set dst (XorL (LoadL src1) src2));
14219   effect(KILL cr);
14220   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14221   ins_cost(150);
14222 
14223   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14224   ins_encode %{
14225     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14226   %}
14227   ins_pipe(ialu_reg);
14228 %}
14229 
14230 // Xor Register with Memory
14231 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14232 %{
14233   predicate(!UseAPX);
14234   match(Set dst (XorL dst (LoadL src)));
14235   effect(KILL cr);
14236   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14237 
14238   ins_cost(150);
14239   format %{ "xorq    $dst, $src\t# long" %}
14240   ins_encode %{
14241     __ xorq($dst$$Register, $src$$Address);
14242   %}
14243   ins_pipe(ialu_reg_mem);
14244 %}
14245 
14246 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14247 %{
14248   predicate(UseAPX);
14249   match(Set dst (XorL src1 (LoadL src2)));
14250   effect(KILL cr);
14251   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14252 
14253   ins_cost(150);
14254   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14255   ins_encode %{
14256     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14257   %}
14258   ins_pipe(ialu_reg_mem);
14259 %}
14260 
14261 // Xor Memory with Register
14262 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14263 %{
14264   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14265   effect(KILL cr);
14266   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14267 
14268   ins_cost(150);
14269   format %{ "xorq    $dst, $src\t# long" %}
14270   ins_encode %{
14271     __ xorq($dst$$Address, $src$$Register);
14272   %}
14273   ins_pipe(ialu_mem_reg);
14274 %}
14275 
14276 // Xor Memory with Immediate
14277 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14278 %{
14279   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14280   effect(KILL cr);
14281   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14282 
14283   ins_cost(125);
14284   format %{ "xorq    $dst, $src\t# long" %}
14285   ins_encode %{
14286     __ xorq($dst$$Address, $src$$constant);
14287   %}
14288   ins_pipe(ialu_mem_imm);
14289 %}
14290 
14291 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14292 %{
14293   match(Set dst (CmpLTMask p q));
14294   effect(KILL cr);
14295 
14296   ins_cost(400);
14297   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14298             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14299             "negl    $dst" %}
14300   ins_encode %{
14301     __ cmpl($p$$Register, $q$$Register);
14302     __ setcc(Assembler::less, $dst$$Register);
14303     __ negl($dst$$Register);
14304   %}
14305   ins_pipe(pipe_slow);
14306 %}
14307 
14308 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14309 %{
14310   match(Set dst (CmpLTMask dst zero));
14311   effect(KILL cr);
14312 
14313   ins_cost(100);
14314   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14315   ins_encode %{
14316     __ sarl($dst$$Register, 31);
14317   %}
14318   ins_pipe(ialu_reg);
14319 %}
14320 
14321 /* Better to save a register than avoid a branch */
14322 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14323 %{
14324   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14325   effect(KILL cr);
14326   ins_cost(300);
14327   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14328             "jge     done\n\t"
14329             "addl    $p,$y\n"
14330             "done:   " %}
14331   ins_encode %{
14332     Register Rp = $p$$Register;
14333     Register Rq = $q$$Register;
14334     Register Ry = $y$$Register;
14335     Label done;
14336     __ subl(Rp, Rq);
14337     __ jccb(Assembler::greaterEqual, done);
14338     __ addl(Rp, Ry);
14339     __ bind(done);
14340   %}
14341   ins_pipe(pipe_cmplt);
14342 %}
14343 
14344 /* Better to save a register than avoid a branch */
14345 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14346 %{
14347   match(Set y (AndI (CmpLTMask p q) y));
14348   effect(KILL cr);
14349 
14350   ins_cost(300);
14351 
14352   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14353             "jlt     done\n\t"
14354             "xorl    $y, $y\n"
14355             "done:   " %}
14356   ins_encode %{
14357     Register Rp = $p$$Register;
14358     Register Rq = $q$$Register;
14359     Register Ry = $y$$Register;
14360     Label done;
14361     __ cmpl(Rp, Rq);
14362     __ jccb(Assembler::less, done);
14363     __ xorl(Ry, Ry);
14364     __ bind(done);
14365   %}
14366   ins_pipe(pipe_cmplt);
14367 %}
14368 
14369 
14370 //---------- FP Instructions------------------------------------------------
14371 
14372 // Really expensive, avoid
14373 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14374 %{
14375   match(Set cr (CmpF src1 src2));
14376 
14377   ins_cost(500);
14378   format %{ "ucomiss $src1, $src2\n\t"
14379             "jnp,s   exit\n\t"
14380             "pushfq\t# saw NaN, set CF\n\t"
14381             "andq    [rsp], #0xffffff2b\n\t"
14382             "popfq\n"
14383     "exit:" %}
14384   ins_encode %{
14385     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14386     emit_cmpfp_fixup(masm);
14387   %}
14388   ins_pipe(pipe_slow);
14389 %}
14390 
14391 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14392   match(Set cr (CmpF src1 src2));
14393 
14394   ins_cost(100);
14395   format %{ "ucomiss $src1, $src2" %}
14396   ins_encode %{
14397     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14398   %}
14399   ins_pipe(pipe_slow);
14400 %}
14401 
14402 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14403   match(Set cr (CmpF src1 src2));
14404 
14405   ins_cost(100);
14406   format %{ "evucomxss $src1, $src2" %}
14407   ins_encode %{
14408     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14409   %}
14410   ins_pipe(pipe_slow);
14411 %}
14412 
14413 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14414   match(Set cr (CmpF src1 (LoadF src2)));
14415 
14416   ins_cost(100);
14417   format %{ "ucomiss $src1, $src2" %}
14418   ins_encode %{
14419     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14420   %}
14421   ins_pipe(pipe_slow);
14422 %}
14423 
14424 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14425   match(Set cr (CmpF src1 (LoadF src2)));
14426 
14427   ins_cost(100);
14428   format %{ "evucomxss $src1, $src2" %}
14429   ins_encode %{
14430     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14431   %}
14432   ins_pipe(pipe_slow);
14433 %}
14434 
14435 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14436   match(Set cr (CmpF src con));
14437 
14438   ins_cost(100);
14439   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14440   ins_encode %{
14441     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14442   %}
14443   ins_pipe(pipe_slow);
14444 %}
14445 
14446 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14447   match(Set cr (CmpF src con));
14448 
14449   ins_cost(100);
14450   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14451   ins_encode %{
14452     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14453   %}
14454   ins_pipe(pipe_slow);
14455 %}
14456 
14457 // Really expensive, avoid
14458 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14459 %{
14460   match(Set cr (CmpD src1 src2));
14461 
14462   ins_cost(500);
14463   format %{ "ucomisd $src1, $src2\n\t"
14464             "jnp,s   exit\n\t"
14465             "pushfq\t# saw NaN, set CF\n\t"
14466             "andq    [rsp], #0xffffff2b\n\t"
14467             "popfq\n"
14468     "exit:" %}
14469   ins_encode %{
14470     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14471     emit_cmpfp_fixup(masm);
14472   %}
14473   ins_pipe(pipe_slow);
14474 %}
14475 
14476 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14477   match(Set cr (CmpD src1 src2));
14478 
14479   ins_cost(100);
14480   format %{ "ucomisd $src1, $src2 test" %}
14481   ins_encode %{
14482     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14483   %}
14484   ins_pipe(pipe_slow);
14485 %}
14486 
14487 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14488   match(Set cr (CmpD src1 src2));
14489 
14490   ins_cost(100);
14491   format %{ "evucomxsd $src1, $src2 test" %}
14492   ins_encode %{
14493     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14494   %}
14495   ins_pipe(pipe_slow);
14496 %}
14497 
14498 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14499   match(Set cr (CmpD src1 (LoadD src2)));
14500 
14501   ins_cost(100);
14502   format %{ "ucomisd $src1, $src2" %}
14503   ins_encode %{
14504     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14505   %}
14506   ins_pipe(pipe_slow);
14507 %}
14508 
14509 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14510   match(Set cr (CmpD src1 (LoadD src2)));
14511 
14512   ins_cost(100);
14513   format %{ "evucomxsd $src1, $src2" %}
14514   ins_encode %{
14515     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14516   %}
14517   ins_pipe(pipe_slow);
14518 %}
14519 
14520 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14521   match(Set cr (CmpD src con));
14522   ins_cost(100);
14523   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14524   ins_encode %{
14525     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14526   %}
14527   ins_pipe(pipe_slow);
14528 %}
14529 
14530 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14531   match(Set cr (CmpD src con));
14532 
14533   ins_cost(100);
14534   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14535   ins_encode %{
14536     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14537   %}
14538   ins_pipe(pipe_slow);
14539 %}
14540 
14541 // Compare into -1,0,1
14542 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14543 %{
14544   match(Set dst (CmpF3 src1 src2));
14545   effect(KILL cr);
14546 
14547   ins_cost(275);
14548   format %{ "ucomiss $src1, $src2\n\t"
14549             "movl    $dst, #-1\n\t"
14550             "jp,s    done\n\t"
14551             "jb,s    done\n\t"
14552             "setne   $dst\n\t"
14553             "movzbl  $dst, $dst\n"
14554     "done:" %}
14555   ins_encode %{
14556     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14557     emit_cmpfp3(masm, $dst$$Register);
14558   %}
14559   ins_pipe(pipe_slow);
14560 %}
14561 
14562 // Compare into -1,0,1
14563 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14564 %{
14565   match(Set dst (CmpF3 src1 (LoadF src2)));
14566   effect(KILL cr);
14567 
14568   ins_cost(275);
14569   format %{ "ucomiss $src1, $src2\n\t"
14570             "movl    $dst, #-1\n\t"
14571             "jp,s    done\n\t"
14572             "jb,s    done\n\t"
14573             "setne   $dst\n\t"
14574             "movzbl  $dst, $dst\n"
14575     "done:" %}
14576   ins_encode %{
14577     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14578     emit_cmpfp3(masm, $dst$$Register);
14579   %}
14580   ins_pipe(pipe_slow);
14581 %}
14582 
14583 // Compare into -1,0,1
14584 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14585   match(Set dst (CmpF3 src con));
14586   effect(KILL cr);
14587 
14588   ins_cost(275);
14589   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14590             "movl    $dst, #-1\n\t"
14591             "jp,s    done\n\t"
14592             "jb,s    done\n\t"
14593             "setne   $dst\n\t"
14594             "movzbl  $dst, $dst\n"
14595     "done:" %}
14596   ins_encode %{
14597     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14598     emit_cmpfp3(masm, $dst$$Register);
14599   %}
14600   ins_pipe(pipe_slow);
14601 %}
14602 
14603 // Compare into -1,0,1
14604 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14605 %{
14606   match(Set dst (CmpD3 src1 src2));
14607   effect(KILL cr);
14608 
14609   ins_cost(275);
14610   format %{ "ucomisd $src1, $src2\n\t"
14611             "movl    $dst, #-1\n\t"
14612             "jp,s    done\n\t"
14613             "jb,s    done\n\t"
14614             "setne   $dst\n\t"
14615             "movzbl  $dst, $dst\n"
14616     "done:" %}
14617   ins_encode %{
14618     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14619     emit_cmpfp3(masm, $dst$$Register);
14620   %}
14621   ins_pipe(pipe_slow);
14622 %}
14623 
14624 // Compare into -1,0,1
14625 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14626 %{
14627   match(Set dst (CmpD3 src1 (LoadD src2)));
14628   effect(KILL cr);
14629 
14630   ins_cost(275);
14631   format %{ "ucomisd $src1, $src2\n\t"
14632             "movl    $dst, #-1\n\t"
14633             "jp,s    done\n\t"
14634             "jb,s    done\n\t"
14635             "setne   $dst\n\t"
14636             "movzbl  $dst, $dst\n"
14637     "done:" %}
14638   ins_encode %{
14639     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14640     emit_cmpfp3(masm, $dst$$Register);
14641   %}
14642   ins_pipe(pipe_slow);
14643 %}
14644 
14645 // Compare into -1,0,1
14646 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14647   match(Set dst (CmpD3 src con));
14648   effect(KILL cr);
14649 
14650   ins_cost(275);
14651   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14652             "movl    $dst, #-1\n\t"
14653             "jp,s    done\n\t"
14654             "jb,s    done\n\t"
14655             "setne   $dst\n\t"
14656             "movzbl  $dst, $dst\n"
14657     "done:" %}
14658   ins_encode %{
14659     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14660     emit_cmpfp3(masm, $dst$$Register);
14661   %}
14662   ins_pipe(pipe_slow);
14663 %}
14664 
14665 //----------Arithmetic Conversion Instructions---------------------------------
14666 
14667 instruct convF2D_reg_reg(regD dst, regF src)
14668 %{
14669   match(Set dst (ConvF2D src));
14670 
14671   format %{ "cvtss2sd $dst, $src" %}
14672   ins_encode %{
14673     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14674   %}
14675   ins_pipe(pipe_slow); // XXX
14676 %}
14677 
14678 instruct convF2D_reg_mem(regD dst, memory src)
14679 %{
14680   predicate(UseAVX == 0);
14681   match(Set dst (ConvF2D (LoadF src)));
14682 
14683   format %{ "cvtss2sd $dst, $src" %}
14684   ins_encode %{
14685     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14686   %}
14687   ins_pipe(pipe_slow); // XXX
14688 %}
14689 
14690 instruct convD2F_reg_reg(regF dst, regD src)
14691 %{
14692   match(Set dst (ConvD2F src));
14693 
14694   format %{ "cvtsd2ss $dst, $src" %}
14695   ins_encode %{
14696     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14697   %}
14698   ins_pipe(pipe_slow); // XXX
14699 %}
14700 
14701 instruct convD2F_reg_mem(regF dst, memory src)
14702 %{
14703   predicate(UseAVX == 0);
14704   match(Set dst (ConvD2F (LoadD src)));
14705 
14706   format %{ "cvtsd2ss $dst, $src" %}
14707   ins_encode %{
14708     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14709   %}
14710   ins_pipe(pipe_slow); // XXX
14711 %}
14712 
14713 // XXX do mem variants
14714 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14715 %{
14716   predicate(!VM_Version::supports_avx10_2());
14717   match(Set dst (ConvF2I src));
14718   effect(KILL cr);
14719   format %{ "convert_f2i $dst, $src" %}
14720   ins_encode %{
14721     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14722   %}
14723   ins_pipe(pipe_slow);
14724 %}
14725 
14726 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14727 %{
14728   predicate(VM_Version::supports_avx10_2());
14729   match(Set dst (ConvF2I src));
14730   format %{ "evcvttss2sisl $dst, $src" %}
14731   ins_encode %{
14732     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14733   %}
14734   ins_pipe(pipe_slow);
14735 %}
14736 
14737 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14738 %{
14739   predicate(VM_Version::supports_avx10_2());
14740   match(Set dst (ConvF2I (LoadF src)));
14741   format %{ "evcvttss2sisl $dst, $src" %}
14742   ins_encode %{
14743     __ evcvttss2sisl($dst$$Register, $src$$Address);
14744   %}
14745   ins_pipe(pipe_slow);
14746 %}
14747 
14748 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14749 %{
14750   predicate(!VM_Version::supports_avx10_2());
14751   match(Set dst (ConvF2L src));
14752   effect(KILL cr);
14753   format %{ "convert_f2l $dst, $src"%}
14754   ins_encode %{
14755     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14756   %}
14757   ins_pipe(pipe_slow);
14758 %}
14759 
14760 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14761 %{
14762   predicate(VM_Version::supports_avx10_2());
14763   match(Set dst (ConvF2L src));
14764   format %{ "evcvttss2sisq $dst, $src" %}
14765   ins_encode %{
14766     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14767   %}
14768   ins_pipe(pipe_slow);
14769 %}
14770 
14771 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14772 %{
14773   predicate(VM_Version::supports_avx10_2());
14774   match(Set dst (ConvF2L (LoadF src)));
14775   format %{ "evcvttss2sisq $dst, $src" %}
14776   ins_encode %{
14777     __ evcvttss2sisq($dst$$Register, $src$$Address);
14778   %}
14779   ins_pipe(pipe_slow);
14780 %}
14781 
14782 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14783 %{
14784   predicate(!VM_Version::supports_avx10_2());
14785   match(Set dst (ConvD2I src));
14786   effect(KILL cr);
14787   format %{ "convert_d2i $dst, $src"%}
14788   ins_encode %{
14789     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14790   %}
14791   ins_pipe(pipe_slow);
14792 %}
14793 
14794 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14795 %{
14796   predicate(VM_Version::supports_avx10_2());
14797   match(Set dst (ConvD2I src));
14798   format %{ "evcvttsd2sisl $dst, $src" %}
14799   ins_encode %{
14800     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14801   %}
14802   ins_pipe(pipe_slow);
14803 %}
14804 
14805 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14806 %{
14807   predicate(VM_Version::supports_avx10_2());
14808   match(Set dst (ConvD2I (LoadD src)));
14809   format %{ "evcvttsd2sisl $dst, $src" %}
14810   ins_encode %{
14811     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14812   %}
14813   ins_pipe(pipe_slow);
14814 %}
14815 
14816 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14817 %{
14818   predicate(!VM_Version::supports_avx10_2());
14819   match(Set dst (ConvD2L src));
14820   effect(KILL cr);
14821   format %{ "convert_d2l $dst, $src"%}
14822   ins_encode %{
14823     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14824   %}
14825   ins_pipe(pipe_slow);
14826 %}
14827 
14828 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14829 %{
14830   predicate(VM_Version::supports_avx10_2());
14831   match(Set dst (ConvD2L src));
14832   format %{ "evcvttsd2sisq $dst, $src" %}
14833   ins_encode %{
14834     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14835   %}
14836   ins_pipe(pipe_slow);
14837 %}
14838 
14839 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14840 %{
14841   predicate(VM_Version::supports_avx10_2());
14842   match(Set dst (ConvD2L (LoadD src)));
14843   format %{ "evcvttsd2sisq $dst, $src" %}
14844   ins_encode %{
14845     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14846   %}
14847   ins_pipe(pipe_slow);
14848 %}
14849 
14850 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14851 %{
14852   match(Set dst (RoundD src));
14853   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14854   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14855   ins_encode %{
14856     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14857   %}
14858   ins_pipe(pipe_slow);
14859 %}
14860 
14861 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14862 %{
14863   match(Set dst (RoundF src));
14864   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14865   format %{ "round_float $dst,$src" %}
14866   ins_encode %{
14867     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14868   %}
14869   ins_pipe(pipe_slow);
14870 %}
14871 
14872 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14873 %{
14874   predicate(!UseXmmI2F);
14875   match(Set dst (ConvI2F src));
14876 
14877   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14878   ins_encode %{
14879     if (UseAVX > 0) {
14880       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14881     }
14882     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14883   %}
14884   ins_pipe(pipe_slow); // XXX
14885 %}
14886 
14887 instruct convI2F_reg_mem(regF dst, memory src)
14888 %{
14889   predicate(UseAVX == 0);
14890   match(Set dst (ConvI2F (LoadI src)));
14891 
14892   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14893   ins_encode %{
14894     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14895   %}
14896   ins_pipe(pipe_slow); // XXX
14897 %}
14898 
14899 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14900 %{
14901   predicate(!UseXmmI2D);
14902   match(Set dst (ConvI2D src));
14903 
14904   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14905   ins_encode %{
14906     if (UseAVX > 0) {
14907       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14908     }
14909     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14910   %}
14911   ins_pipe(pipe_slow); // XXX
14912 %}
14913 
14914 instruct convI2D_reg_mem(regD dst, memory src)
14915 %{
14916   predicate(UseAVX == 0);
14917   match(Set dst (ConvI2D (LoadI src)));
14918 
14919   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14920   ins_encode %{
14921     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14922   %}
14923   ins_pipe(pipe_slow); // XXX
14924 %}
14925 
14926 instruct convXI2F_reg(regF dst, rRegI src)
14927 %{
14928   predicate(UseXmmI2F);
14929   match(Set dst (ConvI2F src));
14930 
14931   format %{ "movdl $dst, $src\n\t"
14932             "cvtdq2psl $dst, $dst\t# i2f" %}
14933   ins_encode %{
14934     __ movdl($dst$$XMMRegister, $src$$Register);
14935     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14936   %}
14937   ins_pipe(pipe_slow); // XXX
14938 %}
14939 
14940 instruct convXI2D_reg(regD dst, rRegI src)
14941 %{
14942   predicate(UseXmmI2D);
14943   match(Set dst (ConvI2D src));
14944 
14945   format %{ "movdl $dst, $src\n\t"
14946             "cvtdq2pdl $dst, $dst\t# i2d" %}
14947   ins_encode %{
14948     __ movdl($dst$$XMMRegister, $src$$Register);
14949     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14950   %}
14951   ins_pipe(pipe_slow); // XXX
14952 %}
14953 
14954 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14955 %{
14956   match(Set dst (ConvL2F src));
14957 
14958   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14959   ins_encode %{
14960     if (UseAVX > 0) {
14961       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14962     }
14963     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14964   %}
14965   ins_pipe(pipe_slow); // XXX
14966 %}
14967 
14968 instruct convL2F_reg_mem(regF dst, memory src)
14969 %{
14970   predicate(UseAVX == 0);
14971   match(Set dst (ConvL2F (LoadL src)));
14972 
14973   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14974   ins_encode %{
14975     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14976   %}
14977   ins_pipe(pipe_slow); // XXX
14978 %}
14979 
14980 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14981 %{
14982   match(Set dst (ConvL2D src));
14983 
14984   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14985   ins_encode %{
14986     if (UseAVX > 0) {
14987       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14988     }
14989     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14990   %}
14991   ins_pipe(pipe_slow); // XXX
14992 %}
14993 
14994 instruct convL2D_reg_mem(regD dst, memory src)
14995 %{
14996   predicate(UseAVX == 0);
14997   match(Set dst (ConvL2D (LoadL src)));
14998 
14999   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15000   ins_encode %{
15001     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15002   %}
15003   ins_pipe(pipe_slow); // XXX
15004 %}
15005 
15006 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15007 %{
15008   match(Set dst (ConvI2L src));
15009 
15010   ins_cost(125);
15011   format %{ "movslq  $dst, $src\t# i2l" %}
15012   ins_encode %{
15013     __ movslq($dst$$Register, $src$$Register);
15014   %}
15015   ins_pipe(ialu_reg_reg);
15016 %}
15017 
15018 // Zero-extend convert int to long
15019 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15020 %{
15021   match(Set dst (AndL (ConvI2L src) mask));
15022 
15023   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15024   ins_encode %{
15025     if ($dst$$reg != $src$$reg) {
15026       __ movl($dst$$Register, $src$$Register);
15027     }
15028   %}
15029   ins_pipe(ialu_reg_reg);
15030 %}
15031 
15032 // Zero-extend convert int to long
15033 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15034 %{
15035   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15036 
15037   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15038   ins_encode %{
15039     __ movl($dst$$Register, $src$$Address);
15040   %}
15041   ins_pipe(ialu_reg_mem);
15042 %}
15043 
15044 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15045 %{
15046   match(Set dst (AndL src mask));
15047 
15048   format %{ "movl    $dst, $src\t# zero-extend long" %}
15049   ins_encode %{
15050     __ movl($dst$$Register, $src$$Register);
15051   %}
15052   ins_pipe(ialu_reg_reg);
15053 %}
15054 
15055 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15056 %{
15057   match(Set dst (ConvL2I src));
15058 
15059   format %{ "movl    $dst, $src\t# l2i" %}
15060   ins_encode %{
15061     __ movl($dst$$Register, $src$$Register);
15062   %}
15063   ins_pipe(ialu_reg_reg);
15064 %}
15065 
15066 
15067 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15068   match(Set dst (MoveF2I src));
15069   effect(DEF dst, USE src);
15070 
15071   ins_cost(125);
15072   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15073   ins_encode %{
15074     __ movl($dst$$Register, Address(rsp, $src$$disp));
15075   %}
15076   ins_pipe(ialu_reg_mem);
15077 %}
15078 
15079 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15080   match(Set dst (MoveI2F src));
15081   effect(DEF dst, USE src);
15082 
15083   ins_cost(125);
15084   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15085   ins_encode %{
15086     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15087   %}
15088   ins_pipe(pipe_slow);
15089 %}
15090 
15091 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15092   match(Set dst (MoveD2L src));
15093   effect(DEF dst, USE src);
15094 
15095   ins_cost(125);
15096   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15097   ins_encode %{
15098     __ movq($dst$$Register, Address(rsp, $src$$disp));
15099   %}
15100   ins_pipe(ialu_reg_mem);
15101 %}
15102 
15103 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15104   predicate(!UseXmmLoadAndClearUpper);
15105   match(Set dst (MoveL2D src));
15106   effect(DEF dst, USE src);
15107 
15108   ins_cost(125);
15109   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15110   ins_encode %{
15111     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15112   %}
15113   ins_pipe(pipe_slow);
15114 %}
15115 
15116 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15117   predicate(UseXmmLoadAndClearUpper);
15118   match(Set dst (MoveL2D src));
15119   effect(DEF dst, USE src);
15120 
15121   ins_cost(125);
15122   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15123   ins_encode %{
15124     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15125   %}
15126   ins_pipe(pipe_slow);
15127 %}
15128 
15129 
15130 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15131   match(Set dst (MoveF2I src));
15132   effect(DEF dst, USE src);
15133 
15134   ins_cost(95); // XXX
15135   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15136   ins_encode %{
15137     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15138   %}
15139   ins_pipe(pipe_slow);
15140 %}
15141 
15142 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15143   match(Set dst (MoveI2F src));
15144   effect(DEF dst, USE src);
15145 
15146   ins_cost(100);
15147   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15148   ins_encode %{
15149     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15150   %}
15151   ins_pipe( ialu_mem_reg );
15152 %}
15153 
15154 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15155   match(Set dst (MoveD2L src));
15156   effect(DEF dst, USE src);
15157 
15158   ins_cost(95); // XXX
15159   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15160   ins_encode %{
15161     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15162   %}
15163   ins_pipe(pipe_slow);
15164 %}
15165 
15166 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15167   match(Set dst (MoveL2D src));
15168   effect(DEF dst, USE src);
15169 
15170   ins_cost(100);
15171   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15172   ins_encode %{
15173     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15174   %}
15175   ins_pipe(ialu_mem_reg);
15176 %}
15177 
15178 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15179   match(Set dst (MoveF2I src));
15180   effect(DEF dst, USE src);
15181   ins_cost(85);
15182   format %{ "movd    $dst,$src\t# MoveF2I" %}
15183   ins_encode %{
15184     __ movdl($dst$$Register, $src$$XMMRegister);
15185   %}
15186   ins_pipe( pipe_slow );
15187 %}
15188 
15189 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15190   match(Set dst (MoveD2L src));
15191   effect(DEF dst, USE src);
15192   ins_cost(85);
15193   format %{ "movd    $dst,$src\t# MoveD2L" %}
15194   ins_encode %{
15195     __ movdq($dst$$Register, $src$$XMMRegister);
15196   %}
15197   ins_pipe( pipe_slow );
15198 %}
15199 
15200 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15201   match(Set dst (MoveI2F src));
15202   effect(DEF dst, USE src);
15203   ins_cost(100);
15204   format %{ "movd    $dst,$src\t# MoveI2F" %}
15205   ins_encode %{
15206     __ movdl($dst$$XMMRegister, $src$$Register);
15207   %}
15208   ins_pipe( pipe_slow );
15209 %}
15210 
15211 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15212   match(Set dst (MoveL2D src));
15213   effect(DEF dst, USE src);
15214   ins_cost(100);
15215   format %{ "movd    $dst,$src\t# MoveL2D" %}
15216   ins_encode %{
15217      __ movdq($dst$$XMMRegister, $src$$Register);
15218   %}
15219   ins_pipe( pipe_slow );
15220 %}
15221 
15222 // Fast clearing of an array
15223 // Small non-constant lenght ClearArray for non-AVX512 targets.
15224 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15225                   Universe dummy, rFlagsReg cr)
15226 %{
15227   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15228   match(Set dummy (ClearArray cnt base));
15229   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15230 
15231   format %{ $$template
15232     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15233     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15234     $$emit$$"jg      LARGE\n\t"
15235     $$emit$$"dec     rcx\n\t"
15236     $$emit$$"js      DONE\t# Zero length\n\t"
15237     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15238     $$emit$$"dec     rcx\n\t"
15239     $$emit$$"jge     LOOP\n\t"
15240     $$emit$$"jmp     DONE\n\t"
15241     $$emit$$"# LARGE:\n\t"
15242     if (UseFastStosb) {
15243        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15244        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15245     } else if (UseXMMForObjInit) {
15246        $$emit$$"mov     rdi,rax\n\t"
15247        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15248        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15249        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15250        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15251        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15252        $$emit$$"add     0x40,rax\n\t"
15253        $$emit$$"# L_zero_64_bytes:\n\t"
15254        $$emit$$"sub     0x8,rcx\n\t"
15255        $$emit$$"jge     L_loop\n\t"
15256        $$emit$$"add     0x4,rcx\n\t"
15257        $$emit$$"jl      L_tail\n\t"
15258        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15259        $$emit$$"add     0x20,rax\n\t"
15260        $$emit$$"sub     0x4,rcx\n\t"
15261        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15262        $$emit$$"add     0x4,rcx\n\t"
15263        $$emit$$"jle     L_end\n\t"
15264        $$emit$$"dec     rcx\n\t"
15265        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15266        $$emit$$"vmovq   xmm0,(rax)\n\t"
15267        $$emit$$"add     0x8,rax\n\t"
15268        $$emit$$"dec     rcx\n\t"
15269        $$emit$$"jge     L_sloop\n\t"
15270        $$emit$$"# L_end:\n\t"
15271     } else {
15272        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15273     }
15274     $$emit$$"# DONE"
15275   %}
15276   ins_encode %{
15277     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15278                  $tmp$$XMMRegister, false, knoreg);
15279   %}
15280   ins_pipe(pipe_slow);
15281 %}
15282 
15283 // Small non-constant length ClearArray for AVX512 targets.
15284 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15285                        Universe dummy, rFlagsReg cr)
15286 %{
15287   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15288   match(Set dummy (ClearArray cnt base));
15289   ins_cost(125);
15290   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291 
15292   format %{ $$template
15293     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15294     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15295     $$emit$$"jg      LARGE\n\t"
15296     $$emit$$"dec     rcx\n\t"
15297     $$emit$$"js      DONE\t# Zero length\n\t"
15298     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15299     $$emit$$"dec     rcx\n\t"
15300     $$emit$$"jge     LOOP\n\t"
15301     $$emit$$"jmp     DONE\n\t"
15302     $$emit$$"# LARGE:\n\t"
15303     if (UseFastStosb) {
15304        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15305        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15306     } else if (UseXMMForObjInit) {
15307        $$emit$$"mov     rdi,rax\n\t"
15308        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15309        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15310        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15311        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15312        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15313        $$emit$$"add     0x40,rax\n\t"
15314        $$emit$$"# L_zero_64_bytes:\n\t"
15315        $$emit$$"sub     0x8,rcx\n\t"
15316        $$emit$$"jge     L_loop\n\t"
15317        $$emit$$"add     0x4,rcx\n\t"
15318        $$emit$$"jl      L_tail\n\t"
15319        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15320        $$emit$$"add     0x20,rax\n\t"
15321        $$emit$$"sub     0x4,rcx\n\t"
15322        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15323        $$emit$$"add     0x4,rcx\n\t"
15324        $$emit$$"jle     L_end\n\t"
15325        $$emit$$"dec     rcx\n\t"
15326        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15327        $$emit$$"vmovq   xmm0,(rax)\n\t"
15328        $$emit$$"add     0x8,rax\n\t"
15329        $$emit$$"dec     rcx\n\t"
15330        $$emit$$"jge     L_sloop\n\t"
15331        $$emit$$"# L_end:\n\t"
15332     } else {
15333        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15334     }
15335     $$emit$$"# DONE"
15336   %}
15337   ins_encode %{
15338     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15339                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15340   %}
15341   ins_pipe(pipe_slow);
15342 %}
15343 
15344 // Large non-constant length ClearArray for non-AVX512 targets.
15345 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15346                         Universe dummy, rFlagsReg cr)
15347 %{
15348   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15349   match(Set dummy (ClearArray cnt base));
15350   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15351 
15352   format %{ $$template
15353     if (UseFastStosb) {
15354        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15355        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15356        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15357     } else if (UseXMMForObjInit) {
15358        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15359        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15360        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15361        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15362        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15363        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15364        $$emit$$"add     0x40,rax\n\t"
15365        $$emit$$"# L_zero_64_bytes:\n\t"
15366        $$emit$$"sub     0x8,rcx\n\t"
15367        $$emit$$"jge     L_loop\n\t"
15368        $$emit$$"add     0x4,rcx\n\t"
15369        $$emit$$"jl      L_tail\n\t"
15370        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15371        $$emit$$"add     0x20,rax\n\t"
15372        $$emit$$"sub     0x4,rcx\n\t"
15373        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15374        $$emit$$"add     0x4,rcx\n\t"
15375        $$emit$$"jle     L_end\n\t"
15376        $$emit$$"dec     rcx\n\t"
15377        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15378        $$emit$$"vmovq   xmm0,(rax)\n\t"
15379        $$emit$$"add     0x8,rax\n\t"
15380        $$emit$$"dec     rcx\n\t"
15381        $$emit$$"jge     L_sloop\n\t"
15382        $$emit$$"# L_end:\n\t"
15383     } else {
15384        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15385        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15386     }
15387   %}
15388   ins_encode %{
15389     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15390                  $tmp$$XMMRegister, true, knoreg);
15391   %}
15392   ins_pipe(pipe_slow);
15393 %}
15394 
15395 // Large non-constant length ClearArray for AVX512 targets.
15396 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15397                              Universe dummy, rFlagsReg cr)
15398 %{
15399   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15400   match(Set dummy (ClearArray cnt base));
15401   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15402 
15403   format %{ $$template
15404     if (UseFastStosb) {
15405        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15406        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15407        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15408     } else if (UseXMMForObjInit) {
15409        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15410        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15411        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15412        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15413        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15414        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15415        $$emit$$"add     0x40,rax\n\t"
15416        $$emit$$"# L_zero_64_bytes:\n\t"
15417        $$emit$$"sub     0x8,rcx\n\t"
15418        $$emit$$"jge     L_loop\n\t"
15419        $$emit$$"add     0x4,rcx\n\t"
15420        $$emit$$"jl      L_tail\n\t"
15421        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15422        $$emit$$"add     0x20,rax\n\t"
15423        $$emit$$"sub     0x4,rcx\n\t"
15424        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15425        $$emit$$"add     0x4,rcx\n\t"
15426        $$emit$$"jle     L_end\n\t"
15427        $$emit$$"dec     rcx\n\t"
15428        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15429        $$emit$$"vmovq   xmm0,(rax)\n\t"
15430        $$emit$$"add     0x8,rax\n\t"
15431        $$emit$$"dec     rcx\n\t"
15432        $$emit$$"jge     L_sloop\n\t"
15433        $$emit$$"# L_end:\n\t"
15434     } else {
15435        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15436        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15437     }
15438   %}
15439   ins_encode %{
15440     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15441                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15442   %}
15443   ins_pipe(pipe_slow);
15444 %}
15445 
15446 // Small constant length ClearArray for AVX512 targets.
15447 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15448 %{
15449   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15450   match(Set dummy (ClearArray cnt base));
15451   ins_cost(100);
15452   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15453   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15454   ins_encode %{
15455    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15456   %}
15457   ins_pipe(pipe_slow);
15458 %}
15459 
15460 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15461                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15462 %{
15463   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15464   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15465   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15466 
15467   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15468   ins_encode %{
15469     __ string_compare($str1$$Register, $str2$$Register,
15470                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15471                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15472   %}
15473   ins_pipe( pipe_slow );
15474 %}
15475 
15476 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15477                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15478 %{
15479   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15480   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15481   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15482 
15483   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15484   ins_encode %{
15485     __ string_compare($str1$$Register, $str2$$Register,
15486                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15487                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15488   %}
15489   ins_pipe( pipe_slow );
15490 %}
15491 
15492 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15493                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15494 %{
15495   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15496   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15497   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15498 
15499   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15500   ins_encode %{
15501     __ string_compare($str1$$Register, $str2$$Register,
15502                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15503                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15504   %}
15505   ins_pipe( pipe_slow );
15506 %}
15507 
15508 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15509                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15510 %{
15511   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15512   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15513   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15514 
15515   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15516   ins_encode %{
15517     __ string_compare($str1$$Register, $str2$$Register,
15518                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15519                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15520   %}
15521   ins_pipe( pipe_slow );
15522 %}
15523 
15524 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15525                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15526 %{
15527   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15528   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15529   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15530 
15531   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15532   ins_encode %{
15533     __ string_compare($str1$$Register, $str2$$Register,
15534                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15535                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15536   %}
15537   ins_pipe( pipe_slow );
15538 %}
15539 
15540 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15541                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15542 %{
15543   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15544   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15545   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15546 
15547   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15548   ins_encode %{
15549     __ string_compare($str1$$Register, $str2$$Register,
15550                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15551                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15552   %}
15553   ins_pipe( pipe_slow );
15554 %}
15555 
15556 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15557                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15558 %{
15559   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15560   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15561   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15562 
15563   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15564   ins_encode %{
15565     __ string_compare($str2$$Register, $str1$$Register,
15566                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15567                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15568   %}
15569   ins_pipe( pipe_slow );
15570 %}
15571 
15572 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15573                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15574 %{
15575   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15576   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15577   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15578 
15579   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15580   ins_encode %{
15581     __ string_compare($str2$$Register, $str1$$Register,
15582                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15583                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15584   %}
15585   ins_pipe( pipe_slow );
15586 %}
15587 
15588 // fast search of substring with known size.
15589 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15590                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15591 %{
15592   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15593   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15594   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15595 
15596   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15597   ins_encode %{
15598     int icnt2 = (int)$int_cnt2$$constant;
15599     if (icnt2 >= 16) {
15600       // IndexOf for constant substrings with size >= 16 elements
15601       // which don't need to be loaded through stack.
15602       __ string_indexofC8($str1$$Register, $str2$$Register,
15603                           $cnt1$$Register, $cnt2$$Register,
15604                           icnt2, $result$$Register,
15605                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15606     } else {
15607       // Small strings are loaded through stack if they cross page boundary.
15608       __ string_indexof($str1$$Register, $str2$$Register,
15609                         $cnt1$$Register, $cnt2$$Register,
15610                         icnt2, $result$$Register,
15611                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15612     }
15613   %}
15614   ins_pipe( pipe_slow );
15615 %}
15616 
15617 // fast search of substring with known size.
15618 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15619                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15620 %{
15621   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15622   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15623   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15624 
15625   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15626   ins_encode %{
15627     int icnt2 = (int)$int_cnt2$$constant;
15628     if (icnt2 >= 8) {
15629       // IndexOf for constant substrings with size >= 8 elements
15630       // which don't need to be loaded through stack.
15631       __ string_indexofC8($str1$$Register, $str2$$Register,
15632                           $cnt1$$Register, $cnt2$$Register,
15633                           icnt2, $result$$Register,
15634                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15635     } else {
15636       // Small strings are loaded through stack if they cross page boundary.
15637       __ string_indexof($str1$$Register, $str2$$Register,
15638                         $cnt1$$Register, $cnt2$$Register,
15639                         icnt2, $result$$Register,
15640                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15641     }
15642   %}
15643   ins_pipe( pipe_slow );
15644 %}
15645 
15646 // fast search of substring with known size.
15647 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15648                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15649 %{
15650   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15651   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15652   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15653 
15654   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15655   ins_encode %{
15656     int icnt2 = (int)$int_cnt2$$constant;
15657     if (icnt2 >= 8) {
15658       // IndexOf for constant substrings with size >= 8 elements
15659       // which don't need to be loaded through stack.
15660       __ string_indexofC8($str1$$Register, $str2$$Register,
15661                           $cnt1$$Register, $cnt2$$Register,
15662                           icnt2, $result$$Register,
15663                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15664     } else {
15665       // Small strings are loaded through stack if they cross page boundary.
15666       __ string_indexof($str1$$Register, $str2$$Register,
15667                         $cnt1$$Register, $cnt2$$Register,
15668                         icnt2, $result$$Register,
15669                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15670     }
15671   %}
15672   ins_pipe( pipe_slow );
15673 %}
15674 
15675 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15676                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15677 %{
15678   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15679   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15680   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15681 
15682   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15683   ins_encode %{
15684     __ string_indexof($str1$$Register, $str2$$Register,
15685                       $cnt1$$Register, $cnt2$$Register,
15686                       (-1), $result$$Register,
15687                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15688   %}
15689   ins_pipe( pipe_slow );
15690 %}
15691 
15692 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15693                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15694 %{
15695   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15696   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15697   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15698 
15699   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15700   ins_encode %{
15701     __ string_indexof($str1$$Register, $str2$$Register,
15702                       $cnt1$$Register, $cnt2$$Register,
15703                       (-1), $result$$Register,
15704                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15705   %}
15706   ins_pipe( pipe_slow );
15707 %}
15708 
15709 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15710                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15711 %{
15712   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15713   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15714   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15715 
15716   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15717   ins_encode %{
15718     __ string_indexof($str1$$Register, $str2$$Register,
15719                       $cnt1$$Register, $cnt2$$Register,
15720                       (-1), $result$$Register,
15721                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15722   %}
15723   ins_pipe( pipe_slow );
15724 %}
15725 
15726 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15727                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15728 %{
15729   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15730   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15731   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15732   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15733   ins_encode %{
15734     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15735                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15736   %}
15737   ins_pipe( pipe_slow );
15738 %}
15739 
15740 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15741                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15742 %{
15743   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15744   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15745   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15746   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15747   ins_encode %{
15748     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15749                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15750   %}
15751   ins_pipe( pipe_slow );
15752 %}
15753 
15754 // fast string equals
15755 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15756                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15757 %{
15758   predicate(!VM_Version::supports_avx512vlbw());
15759   match(Set result (StrEquals (Binary str1 str2) cnt));
15760   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15761 
15762   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15763   ins_encode %{
15764     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15765                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15766                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15767   %}
15768   ins_pipe( pipe_slow );
15769 %}
15770 
15771 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15772                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15773 %{
15774   predicate(VM_Version::supports_avx512vlbw());
15775   match(Set result (StrEquals (Binary str1 str2) cnt));
15776   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15777 
15778   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15779   ins_encode %{
15780     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15781                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15782                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15783   %}
15784   ins_pipe( pipe_slow );
15785 %}
15786 
15787 // fast array equals
15788 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15789                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15790 %{
15791   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15792   match(Set result (AryEq ary1 ary2));
15793   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15794 
15795   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15796   ins_encode %{
15797     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15798                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15799                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15800   %}
15801   ins_pipe( pipe_slow );
15802 %}
15803 
15804 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15805                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15806 %{
15807   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15808   match(Set result (AryEq ary1 ary2));
15809   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15810 
15811   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15812   ins_encode %{
15813     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15814                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15815                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15816   %}
15817   ins_pipe( pipe_slow );
15818 %}
15819 
15820 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15821                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15822 %{
15823   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15824   match(Set result (AryEq ary1 ary2));
15825   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15826 
15827   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15828   ins_encode %{
15829     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15830                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15831                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15832   %}
15833   ins_pipe( pipe_slow );
15834 %}
15835 
15836 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15837                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15838 %{
15839   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15840   match(Set result (AryEq ary1 ary2));
15841   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15842 
15843   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15844   ins_encode %{
15845     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15846                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15847                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15848   %}
15849   ins_pipe( pipe_slow );
15850 %}
15851 
15852 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15853                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15854                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15855                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15856                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15857 %{
15858   predicate(UseAVX >= 2);
15859   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15860   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15861          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15862          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15863          USE basic_type, KILL cr);
15864 
15865   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15866   ins_encode %{
15867     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15868                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15869                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15870                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15871                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15872                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15873                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15874   %}
15875   ins_pipe( pipe_slow );
15876 %}
15877 
15878 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15879                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15880 %{
15881   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15882   match(Set result (CountPositives ary1 len));
15883   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15884 
15885   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15886   ins_encode %{
15887     __ count_positives($ary1$$Register, $len$$Register,
15888                        $result$$Register, $tmp3$$Register,
15889                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15890   %}
15891   ins_pipe( pipe_slow );
15892 %}
15893 
15894 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15895                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15896 %{
15897   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15898   match(Set result (CountPositives ary1 len));
15899   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15900 
15901   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15902   ins_encode %{
15903     __ count_positives($ary1$$Register, $len$$Register,
15904                        $result$$Register, $tmp3$$Register,
15905                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15906   %}
15907   ins_pipe( pipe_slow );
15908 %}
15909 
15910 // fast char[] to byte[] compression
15911 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15912                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15913   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15914   match(Set result (StrCompressedCopy src (Binary dst len)));
15915   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15916          USE_KILL len, KILL tmp5, KILL cr);
15917 
15918   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15919   ins_encode %{
15920     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15921                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15922                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15923                            knoreg, knoreg);
15924   %}
15925   ins_pipe( pipe_slow );
15926 %}
15927 
15928 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15929                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15930   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15931   match(Set result (StrCompressedCopy src (Binary dst len)));
15932   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15933          USE_KILL len, KILL tmp5, KILL cr);
15934 
15935   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15936   ins_encode %{
15937     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15938                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15939                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15940                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15941   %}
15942   ins_pipe( pipe_slow );
15943 %}
15944 // fast byte[] to char[] inflation
15945 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15946                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15947   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15948   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15949   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15950 
15951   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15952   ins_encode %{
15953     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15954                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15955   %}
15956   ins_pipe( pipe_slow );
15957 %}
15958 
15959 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15960                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15961   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15962   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15963   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15964 
15965   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15966   ins_encode %{
15967     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15968                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15969   %}
15970   ins_pipe( pipe_slow );
15971 %}
15972 
15973 // encode char[] to byte[] in ISO_8859_1
15974 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15975                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15976                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15977   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15978   match(Set result (EncodeISOArray src (Binary dst len)));
15979   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15980 
15981   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15982   ins_encode %{
15983     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15984                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15985                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15986   %}
15987   ins_pipe( pipe_slow );
15988 %}
15989 
15990 // encode char[] to byte[] in ASCII
15991 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15992                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15993                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15994   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15995   match(Set result (EncodeISOArray src (Binary dst len)));
15996   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15997 
15998   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15999   ins_encode %{
16000     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16001                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16002                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16003   %}
16004   ins_pipe( pipe_slow );
16005 %}
16006 
16007 //----------Overflow Math Instructions-----------------------------------------
16008 
16009 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16010 %{
16011   match(Set cr (OverflowAddI op1 op2));
16012   effect(DEF cr, USE_KILL op1, USE op2);
16013 
16014   format %{ "addl    $op1, $op2\t# overflow check int" %}
16015 
16016   ins_encode %{
16017     __ addl($op1$$Register, $op2$$Register);
16018   %}
16019   ins_pipe(ialu_reg_reg);
16020 %}
16021 
16022 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16023 %{
16024   match(Set cr (OverflowAddI op1 op2));
16025   effect(DEF cr, USE_KILL op1, USE op2);
16026 
16027   format %{ "addl    $op1, $op2\t# overflow check int" %}
16028 
16029   ins_encode %{
16030     __ addl($op1$$Register, $op2$$constant);
16031   %}
16032   ins_pipe(ialu_reg_reg);
16033 %}
16034 
16035 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16036 %{
16037   match(Set cr (OverflowAddL op1 op2));
16038   effect(DEF cr, USE_KILL op1, USE op2);
16039 
16040   format %{ "addq    $op1, $op2\t# overflow check long" %}
16041   ins_encode %{
16042     __ addq($op1$$Register, $op2$$Register);
16043   %}
16044   ins_pipe(ialu_reg_reg);
16045 %}
16046 
16047 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16048 %{
16049   match(Set cr (OverflowAddL op1 op2));
16050   effect(DEF cr, USE_KILL op1, USE op2);
16051 
16052   format %{ "addq    $op1, $op2\t# overflow check long" %}
16053   ins_encode %{
16054     __ addq($op1$$Register, $op2$$constant);
16055   %}
16056   ins_pipe(ialu_reg_reg);
16057 %}
16058 
16059 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16060 %{
16061   match(Set cr (OverflowSubI op1 op2));
16062 
16063   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16064   ins_encode %{
16065     __ cmpl($op1$$Register, $op2$$Register);
16066   %}
16067   ins_pipe(ialu_reg_reg);
16068 %}
16069 
16070 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16071 %{
16072   match(Set cr (OverflowSubI op1 op2));
16073 
16074   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16075   ins_encode %{
16076     __ cmpl($op1$$Register, $op2$$constant);
16077   %}
16078   ins_pipe(ialu_reg_reg);
16079 %}
16080 
16081 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16082 %{
16083   match(Set cr (OverflowSubL op1 op2));
16084 
16085   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16086   ins_encode %{
16087     __ cmpq($op1$$Register, $op2$$Register);
16088   %}
16089   ins_pipe(ialu_reg_reg);
16090 %}
16091 
16092 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16093 %{
16094   match(Set cr (OverflowSubL op1 op2));
16095 
16096   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16097   ins_encode %{
16098     __ cmpq($op1$$Register, $op2$$constant);
16099   %}
16100   ins_pipe(ialu_reg_reg);
16101 %}
16102 
16103 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16104 %{
16105   match(Set cr (OverflowSubI zero op2));
16106   effect(DEF cr, USE_KILL op2);
16107 
16108   format %{ "negl    $op2\t# overflow check int" %}
16109   ins_encode %{
16110     __ negl($op2$$Register);
16111   %}
16112   ins_pipe(ialu_reg_reg);
16113 %}
16114 
16115 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16116 %{
16117   match(Set cr (OverflowSubL zero op2));
16118   effect(DEF cr, USE_KILL op2);
16119 
16120   format %{ "negq    $op2\t# overflow check long" %}
16121   ins_encode %{
16122     __ negq($op2$$Register);
16123   %}
16124   ins_pipe(ialu_reg_reg);
16125 %}
16126 
16127 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16128 %{
16129   match(Set cr (OverflowMulI op1 op2));
16130   effect(DEF cr, USE_KILL op1, USE op2);
16131 
16132   format %{ "imull    $op1, $op2\t# overflow check int" %}
16133   ins_encode %{
16134     __ imull($op1$$Register, $op2$$Register);
16135   %}
16136   ins_pipe(ialu_reg_reg_alu0);
16137 %}
16138 
16139 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16140 %{
16141   match(Set cr (OverflowMulI op1 op2));
16142   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16143 
16144   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16145   ins_encode %{
16146     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16147   %}
16148   ins_pipe(ialu_reg_reg_alu0);
16149 %}
16150 
16151 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16152 %{
16153   match(Set cr (OverflowMulL op1 op2));
16154   effect(DEF cr, USE_KILL op1, USE op2);
16155 
16156   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16157   ins_encode %{
16158     __ imulq($op1$$Register, $op2$$Register);
16159   %}
16160   ins_pipe(ialu_reg_reg_alu0);
16161 %}
16162 
16163 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16164 %{
16165   match(Set cr (OverflowMulL op1 op2));
16166   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16167 
16168   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16169   ins_encode %{
16170     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16171   %}
16172   ins_pipe(ialu_reg_reg_alu0);
16173 %}
16174 
16175 
16176 //----------Control Flow Instructions------------------------------------------
16177 // Signed compare Instructions
16178 
16179 // XXX more variants!!
16180 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16181 %{
16182   match(Set cr (CmpI op1 op2));
16183   effect(DEF cr, USE op1, USE op2);
16184 
16185   format %{ "cmpl    $op1, $op2" %}
16186   ins_encode %{
16187     __ cmpl($op1$$Register, $op2$$Register);
16188   %}
16189   ins_pipe(ialu_cr_reg_reg);
16190 %}
16191 
16192 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16193 %{
16194   match(Set cr (CmpI op1 op2));
16195 
16196   format %{ "cmpl    $op1, $op2" %}
16197   ins_encode %{
16198     __ cmpl($op1$$Register, $op2$$constant);
16199   %}
16200   ins_pipe(ialu_cr_reg_imm);
16201 %}
16202 
16203 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16204 %{
16205   match(Set cr (CmpI op1 (LoadI op2)));
16206 
16207   ins_cost(500); // XXX
16208   format %{ "cmpl    $op1, $op2" %}
16209   ins_encode %{
16210     __ cmpl($op1$$Register, $op2$$Address);
16211   %}
16212   ins_pipe(ialu_cr_reg_mem);
16213 %}
16214 
16215 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16216 %{
16217   match(Set cr (CmpI src zero));
16218 
16219   format %{ "testl   $src, $src" %}
16220   ins_encode %{
16221     __ testl($src$$Register, $src$$Register);
16222   %}
16223   ins_pipe(ialu_cr_reg_imm);
16224 %}
16225 
16226 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16227 %{
16228   match(Set cr (CmpI (AndI src con) zero));
16229 
16230   format %{ "testl   $src, $con" %}
16231   ins_encode %{
16232     __ testl($src$$Register, $con$$constant);
16233   %}
16234   ins_pipe(ialu_cr_reg_imm);
16235 %}
16236 
16237 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16238 %{
16239   match(Set cr (CmpI (AndI src1 src2) zero));
16240 
16241   format %{ "testl   $src1, $src2" %}
16242   ins_encode %{
16243     __ testl($src1$$Register, $src2$$Register);
16244   %}
16245   ins_pipe(ialu_cr_reg_imm);
16246 %}
16247 
16248 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16249 %{
16250   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16251 
16252   format %{ "testl   $src, $mem" %}
16253   ins_encode %{
16254     __ testl($src$$Register, $mem$$Address);
16255   %}
16256   ins_pipe(ialu_cr_reg_mem);
16257 %}
16258 
16259 // Unsigned compare Instructions; really, same as signed except they
16260 // produce an rFlagsRegU instead of rFlagsReg.
16261 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16262 %{
16263   match(Set cr (CmpU op1 op2));
16264 
16265   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16266   ins_encode %{
16267     __ cmpl($op1$$Register, $op2$$Register);
16268   %}
16269   ins_pipe(ialu_cr_reg_reg);
16270 %}
16271 
16272 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16273 %{
16274   match(Set cr (CmpU op1 op2));
16275 
16276   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16277   ins_encode %{
16278     __ cmpl($op1$$Register, $op2$$constant);
16279   %}
16280   ins_pipe(ialu_cr_reg_imm);
16281 %}
16282 
16283 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16284 %{
16285   match(Set cr (CmpU op1 (LoadI op2)));
16286 
16287   ins_cost(500); // XXX
16288   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16289   ins_encode %{
16290     __ cmpl($op1$$Register, $op2$$Address);
16291   %}
16292   ins_pipe(ialu_cr_reg_mem);
16293 %}
16294 
16295 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16296 %{
16297   match(Set cr (CmpU src zero));
16298 
16299   format %{ "testl   $src, $src\t# unsigned" %}
16300   ins_encode %{
16301     __ testl($src$$Register, $src$$Register);
16302   %}
16303   ins_pipe(ialu_cr_reg_imm);
16304 %}
16305 
16306 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16307 %{
16308   match(Set cr (CmpP op1 op2));
16309 
16310   format %{ "cmpq    $op1, $op2\t# ptr" %}
16311   ins_encode %{
16312     __ cmpq($op1$$Register, $op2$$Register);
16313   %}
16314   ins_pipe(ialu_cr_reg_reg);
16315 %}
16316 
16317 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16318 %{
16319   match(Set cr (CmpP op1 (LoadP op2)));
16320   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16321 
16322   ins_cost(500); // XXX
16323   format %{ "cmpq    $op1, $op2\t# ptr" %}
16324   ins_encode %{
16325     __ cmpq($op1$$Register, $op2$$Address);
16326   %}
16327   ins_pipe(ialu_cr_reg_mem);
16328 %}
16329 
16330 // XXX this is generalized by compP_rReg_mem???
16331 // Compare raw pointer (used in out-of-heap check).
16332 // Only works because non-oop pointers must be raw pointers
16333 // and raw pointers have no anti-dependencies.
16334 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16335 %{
16336   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16337             n->in(2)->as_Load()->barrier_data() == 0);
16338   match(Set cr (CmpP op1 (LoadP op2)));
16339 
16340   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16341   ins_encode %{
16342     __ cmpq($op1$$Register, $op2$$Address);
16343   %}
16344   ins_pipe(ialu_cr_reg_mem);
16345 %}
16346 
16347 // This will generate a signed flags result. This should be OK since
16348 // any compare to a zero should be eq/neq.
16349 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16350 %{
16351   match(Set cr (CmpP src zero));
16352 
16353   format %{ "testq   $src, $src\t# ptr" %}
16354   ins_encode %{
16355     __ testq($src$$Register, $src$$Register);
16356   %}
16357   ins_pipe(ialu_cr_reg_imm);
16358 %}
16359 
16360 // This will generate a signed flags result. This should be OK since
16361 // any compare to a zero should be eq/neq.
16362 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16363 %{
16364   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16365             n->in(1)->as_Load()->barrier_data() == 0);
16366   match(Set cr (CmpP (LoadP op) zero));
16367 
16368   ins_cost(500); // XXX
16369   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16370   ins_encode %{
16371     __ testq($op$$Address, 0xFFFFFFFF);
16372   %}
16373   ins_pipe(ialu_cr_reg_imm);
16374 %}
16375 
16376 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16377 %{
16378   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16379             n->in(1)->as_Load()->barrier_data() == 0);
16380   match(Set cr (CmpP (LoadP mem) zero));
16381 
16382   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16383   ins_encode %{
16384     __ cmpq(r12, $mem$$Address);
16385   %}
16386   ins_pipe(ialu_cr_reg_mem);
16387 %}
16388 
16389 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16390 %{
16391   match(Set cr (CmpN op1 op2));
16392 
16393   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16394   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16395   ins_pipe(ialu_cr_reg_reg);
16396 %}
16397 
16398 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16399 %{
16400   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16401   match(Set cr (CmpN src (LoadN mem)));
16402 
16403   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16404   ins_encode %{
16405     __ cmpl($src$$Register, $mem$$Address);
16406   %}
16407   ins_pipe(ialu_cr_reg_mem);
16408 %}
16409 
16410 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16411   match(Set cr (CmpN op1 op2));
16412 
16413   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16414   ins_encode %{
16415     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16416   %}
16417   ins_pipe(ialu_cr_reg_imm);
16418 %}
16419 
16420 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16421 %{
16422   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16423   match(Set cr (CmpN src (LoadN mem)));
16424 
16425   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16426   ins_encode %{
16427     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16428   %}
16429   ins_pipe(ialu_cr_reg_mem);
16430 %}
16431 
16432 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16433   match(Set cr (CmpN op1 op2));
16434 
16435   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16436   ins_encode %{
16437     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16438   %}
16439   ins_pipe(ialu_cr_reg_imm);
16440 %}
16441 
16442 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16443 %{
16444   predicate(!UseCompactObjectHeaders);
16445   match(Set cr (CmpN src (LoadNKlass mem)));
16446 
16447   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16448   ins_encode %{
16449     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16450   %}
16451   ins_pipe(ialu_cr_reg_mem);
16452 %}
16453 
16454 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16455   match(Set cr (CmpN src zero));
16456 
16457   format %{ "testl   $src, $src\t# compressed ptr" %}
16458   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16459   ins_pipe(ialu_cr_reg_imm);
16460 %}
16461 
16462 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16463 %{
16464   predicate(CompressedOops::base() != nullptr &&
16465             n->in(1)->as_Load()->barrier_data() == 0);
16466   match(Set cr (CmpN (LoadN mem) zero));
16467 
16468   ins_cost(500); // XXX
16469   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16470   ins_encode %{
16471     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16472   %}
16473   ins_pipe(ialu_cr_reg_mem);
16474 %}
16475 
16476 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16477 %{
16478   predicate(CompressedOops::base() == nullptr &&
16479             n->in(1)->as_Load()->barrier_data() == 0);
16480   match(Set cr (CmpN (LoadN mem) zero));
16481 
16482   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16483   ins_encode %{
16484     __ cmpl(r12, $mem$$Address);
16485   %}
16486   ins_pipe(ialu_cr_reg_mem);
16487 %}
16488 
16489 // Yanked all unsigned pointer compare operations.
16490 // Pointer compares are done with CmpP which is already unsigned.
16491 
16492 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16493 %{
16494   match(Set cr (CmpL op1 op2));
16495 
16496   format %{ "cmpq    $op1, $op2" %}
16497   ins_encode %{
16498     __ cmpq($op1$$Register, $op2$$Register);
16499   %}
16500   ins_pipe(ialu_cr_reg_reg);
16501 %}
16502 
16503 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16504 %{
16505   match(Set cr (CmpL op1 op2));
16506 
16507   format %{ "cmpq    $op1, $op2" %}
16508   ins_encode %{
16509     __ cmpq($op1$$Register, $op2$$constant);
16510   %}
16511   ins_pipe(ialu_cr_reg_imm);
16512 %}
16513 
16514 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16515 %{
16516   match(Set cr (CmpL op1 (LoadL op2)));
16517 
16518   format %{ "cmpq    $op1, $op2" %}
16519   ins_encode %{
16520     __ cmpq($op1$$Register, $op2$$Address);
16521   %}
16522   ins_pipe(ialu_cr_reg_mem);
16523 %}
16524 
16525 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16526 %{
16527   match(Set cr (CmpL src zero));
16528 
16529   format %{ "testq   $src, $src" %}
16530   ins_encode %{
16531     __ testq($src$$Register, $src$$Register);
16532   %}
16533   ins_pipe(ialu_cr_reg_imm);
16534 %}
16535 
16536 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16537 %{
16538   match(Set cr (CmpL (AndL src con) zero));
16539 
16540   format %{ "testq   $src, $con\t# long" %}
16541   ins_encode %{
16542     __ testq($src$$Register, $con$$constant);
16543   %}
16544   ins_pipe(ialu_cr_reg_imm);
16545 %}
16546 
16547 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16548 %{
16549   match(Set cr (CmpL (AndL src1 src2) zero));
16550 
16551   format %{ "testq   $src1, $src2\t# long" %}
16552   ins_encode %{
16553     __ testq($src1$$Register, $src2$$Register);
16554   %}
16555   ins_pipe(ialu_cr_reg_imm);
16556 %}
16557 
16558 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16559 %{
16560   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16561 
16562   format %{ "testq   $src, $mem" %}
16563   ins_encode %{
16564     __ testq($src$$Register, $mem$$Address);
16565   %}
16566   ins_pipe(ialu_cr_reg_mem);
16567 %}
16568 
16569 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16570 %{
16571   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16572 
16573   format %{ "testq   $src, $mem" %}
16574   ins_encode %{
16575     __ testq($src$$Register, $mem$$Address);
16576   %}
16577   ins_pipe(ialu_cr_reg_mem);
16578 %}
16579 
16580 // Manifest a CmpU result in an integer register.  Very painful.
16581 // This is the test to avoid.
16582 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16583 %{
16584   match(Set dst (CmpU3 src1 src2));
16585   effect(KILL flags);
16586 
16587   ins_cost(275); // XXX
16588   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16589             "movl    $dst, -1\n\t"
16590             "jb,u    done\n\t"
16591             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16592     "done:" %}
16593   ins_encode %{
16594     Label done;
16595     __ cmpl($src1$$Register, $src2$$Register);
16596     __ movl($dst$$Register, -1);
16597     __ jccb(Assembler::below, done);
16598     __ setcc(Assembler::notZero, $dst$$Register);
16599     __ bind(done);
16600   %}
16601   ins_pipe(pipe_slow);
16602 %}
16603 
16604 // Manifest a CmpL result in an integer register.  Very painful.
16605 // This is the test to avoid.
16606 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16607 %{
16608   match(Set dst (CmpL3 src1 src2));
16609   effect(KILL flags);
16610 
16611   ins_cost(275); // XXX
16612   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16613             "movl    $dst, -1\n\t"
16614             "jl,s    done\n\t"
16615             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16616     "done:" %}
16617   ins_encode %{
16618     Label done;
16619     __ cmpq($src1$$Register, $src2$$Register);
16620     __ movl($dst$$Register, -1);
16621     __ jccb(Assembler::less, done);
16622     __ setcc(Assembler::notZero, $dst$$Register);
16623     __ bind(done);
16624   %}
16625   ins_pipe(pipe_slow);
16626 %}
16627 
16628 // Manifest a CmpUL result in an integer register.  Very painful.
16629 // This is the test to avoid.
16630 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16631 %{
16632   match(Set dst (CmpUL3 src1 src2));
16633   effect(KILL flags);
16634 
16635   ins_cost(275); // XXX
16636   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16637             "movl    $dst, -1\n\t"
16638             "jb,u    done\n\t"
16639             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16640     "done:" %}
16641   ins_encode %{
16642     Label done;
16643     __ cmpq($src1$$Register, $src2$$Register);
16644     __ movl($dst$$Register, -1);
16645     __ jccb(Assembler::below, done);
16646     __ setcc(Assembler::notZero, $dst$$Register);
16647     __ bind(done);
16648   %}
16649   ins_pipe(pipe_slow);
16650 %}
16651 
16652 // Unsigned long compare Instructions; really, same as signed long except they
16653 // produce an rFlagsRegU instead of rFlagsReg.
16654 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16655 %{
16656   match(Set cr (CmpUL op1 op2));
16657 
16658   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16659   ins_encode %{
16660     __ cmpq($op1$$Register, $op2$$Register);
16661   %}
16662   ins_pipe(ialu_cr_reg_reg);
16663 %}
16664 
16665 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16666 %{
16667   match(Set cr (CmpUL op1 op2));
16668 
16669   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16670   ins_encode %{
16671     __ cmpq($op1$$Register, $op2$$constant);
16672   %}
16673   ins_pipe(ialu_cr_reg_imm);
16674 %}
16675 
16676 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16677 %{
16678   match(Set cr (CmpUL op1 (LoadL op2)));
16679 
16680   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16681   ins_encode %{
16682     __ cmpq($op1$$Register, $op2$$Address);
16683   %}
16684   ins_pipe(ialu_cr_reg_mem);
16685 %}
16686 
16687 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16688 %{
16689   match(Set cr (CmpUL src zero));
16690 
16691   format %{ "testq   $src, $src\t# unsigned" %}
16692   ins_encode %{
16693     __ testq($src$$Register, $src$$Register);
16694   %}
16695   ins_pipe(ialu_cr_reg_imm);
16696 %}
16697 
16698 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16699 %{
16700   match(Set cr (CmpI (LoadB mem) imm));
16701 
16702   ins_cost(125);
16703   format %{ "cmpb    $mem, $imm" %}
16704   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16705   ins_pipe(ialu_cr_reg_mem);
16706 %}
16707 
16708 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16709 %{
16710   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16711 
16712   ins_cost(125);
16713   format %{ "testb   $mem, $imm\t# ubyte" %}
16714   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16715   ins_pipe(ialu_cr_reg_mem);
16716 %}
16717 
16718 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16719 %{
16720   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16721 
16722   ins_cost(125);
16723   format %{ "testb   $mem, $imm\t# byte" %}
16724   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16725   ins_pipe(ialu_cr_reg_mem);
16726 %}
16727 
16728 //----------Max and Min--------------------------------------------------------
16729 // Min Instructions
16730 
16731 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16732 %{
16733   predicate(!UseAPX);
16734   effect(USE_DEF dst, USE src, USE cr);
16735 
16736   format %{ "cmovlgt $dst, $src\t# min" %}
16737   ins_encode %{
16738     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16739   %}
16740   ins_pipe(pipe_cmov_reg);
16741 %}
16742 
16743 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16744 %{
16745   predicate(UseAPX);
16746   effect(DEF dst, USE src1, USE src2, USE cr);
16747 
16748   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16749   ins_encode %{
16750     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16751   %}
16752   ins_pipe(pipe_cmov_reg);
16753 %}
16754 
16755 instruct minI_rReg(rRegI dst, rRegI src)
16756 %{
16757   predicate(!UseAPX);
16758   match(Set dst (MinI dst src));
16759 
16760   ins_cost(200);
16761   expand %{
16762     rFlagsReg cr;
16763     compI_rReg(cr, dst, src);
16764     cmovI_reg_g(dst, src, cr);
16765   %}
16766 %}
16767 
16768 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16769 %{
16770   predicate(UseAPX);
16771   match(Set dst (MinI src1 src2));
16772   effect(DEF dst, USE src1, USE src2);
16773   flag(PD::Flag_ndd_demotable_opr1);
16774 
16775   ins_cost(200);
16776   expand %{
16777     rFlagsReg cr;
16778     compI_rReg(cr, src1, src2);
16779     cmovI_reg_g_ndd(dst, src1, src2, cr);
16780   %}
16781 %}
16782 
16783 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16784 %{
16785   predicate(!UseAPX);
16786   effect(USE_DEF dst, USE src, USE cr);
16787 
16788   format %{ "cmovllt $dst, $src\t# max" %}
16789   ins_encode %{
16790     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16791   %}
16792   ins_pipe(pipe_cmov_reg);
16793 %}
16794 
16795 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16796 %{
16797   predicate(UseAPX);
16798   effect(DEF dst, USE src1, USE src2, USE cr);
16799 
16800   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16801   ins_encode %{
16802     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16803   %}
16804   ins_pipe(pipe_cmov_reg);
16805 %}
16806 
16807 instruct maxI_rReg(rRegI dst, rRegI src)
16808 %{
16809   predicate(!UseAPX);
16810   match(Set dst (MaxI dst src));
16811 
16812   ins_cost(200);
16813   expand %{
16814     rFlagsReg cr;
16815     compI_rReg(cr, dst, src);
16816     cmovI_reg_l(dst, src, cr);
16817   %}
16818 %}
16819 
16820 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16821 %{
16822   predicate(UseAPX);
16823   match(Set dst (MaxI src1 src2));
16824   effect(DEF dst, USE src1, USE src2);
16825   flag(PD::Flag_ndd_demotable_opr1);
16826 
16827   ins_cost(200);
16828   expand %{
16829     rFlagsReg cr;
16830     compI_rReg(cr, src1, src2);
16831     cmovI_reg_l_ndd(dst, src1, src2, cr);
16832   %}
16833 %}
16834 
16835 // ============================================================================
16836 // Branch Instructions
16837 
16838 // Jump Direct - Label defines a relative address from JMP+1
16839 instruct jmpDir(label labl)
16840 %{
16841   match(Goto);
16842   effect(USE labl);
16843 
16844   ins_cost(300);
16845   format %{ "jmp     $labl" %}
16846   size(5);
16847   ins_encode %{
16848     Label* L = $labl$$label;
16849     __ jmp(*L, false); // Always long jump
16850   %}
16851   ins_pipe(pipe_jmp);
16852 %}
16853 
16854 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16855 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16856 %{
16857   match(If cop cr);
16858   effect(USE labl);
16859 
16860   ins_cost(300);
16861   format %{ "j$cop     $labl" %}
16862   size(6);
16863   ins_encode %{
16864     Label* L = $labl$$label;
16865     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16866   %}
16867   ins_pipe(pipe_jcc);
16868 %}
16869 
16870 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16871 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16872 %{
16873   match(CountedLoopEnd cop cr);
16874   effect(USE labl);
16875 
16876   ins_cost(300);
16877   format %{ "j$cop     $labl\t# loop end" %}
16878   size(6);
16879   ins_encode %{
16880     Label* L = $labl$$label;
16881     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16882   %}
16883   ins_pipe(pipe_jcc);
16884 %}
16885 
16886 // Jump Direct Conditional - using unsigned comparison
16887 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16888   match(If cop cmp);
16889   effect(USE labl);
16890 
16891   ins_cost(300);
16892   format %{ "j$cop,u   $labl" %}
16893   size(6);
16894   ins_encode %{
16895     Label* L = $labl$$label;
16896     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16897   %}
16898   ins_pipe(pipe_jcc);
16899 %}
16900 
16901 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16902   match(If cop cmp);
16903   effect(USE labl);
16904 
16905   ins_cost(200);
16906   format %{ "j$cop,u   $labl" %}
16907   size(6);
16908   ins_encode %{
16909     Label* L = $labl$$label;
16910     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16911   %}
16912   ins_pipe(pipe_jcc);
16913 %}
16914 
16915 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16916   match(If cop cmp);
16917   effect(USE labl);
16918 
16919   ins_cost(200);
16920   format %{ $$template
16921     if ($cop$$cmpcode == Assembler::notEqual) {
16922       $$emit$$"jp,u    $labl\n\t"
16923       $$emit$$"j$cop,u   $labl"
16924     } else {
16925       $$emit$$"jp,u    done\n\t"
16926       $$emit$$"j$cop,u   $labl\n\t"
16927       $$emit$$"done:"
16928     }
16929   %}
16930   ins_encode %{
16931     Label* l = $labl$$label;
16932     if ($cop$$cmpcode == Assembler::notEqual) {
16933       __ jcc(Assembler::parity, *l, false);
16934       __ jcc(Assembler::notEqual, *l, false);
16935     } else if ($cop$$cmpcode == Assembler::equal) {
16936       Label done;
16937       __ jccb(Assembler::parity, done);
16938       __ jcc(Assembler::equal, *l, false);
16939       __ bind(done);
16940     } else {
16941        ShouldNotReachHere();
16942     }
16943   %}
16944   ins_pipe(pipe_jcc);
16945 %}
16946 
16947 // Jump Direct Conditional - using signed and unsigned comparison
16948 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16949   match(If cop cmp);
16950   effect(USE labl);
16951 
16952   ins_cost(200);
16953   format %{ "j$cop,su   $labl" %}
16954   size(6);
16955   ins_encode %{
16956     Label* L = $labl$$label;
16957     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16958   %}
16959   ins_pipe(pipe_jcc);
16960 %}
16961 
16962 // ============================================================================
16963 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16964 // superklass array for an instance of the superklass.  Set a hidden
16965 // internal cache on a hit (cache is checked with exposed code in
16966 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16967 // encoding ALSO sets flags.
16968 
16969 instruct partialSubtypeCheck(rdi_RegP result,
16970                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16971                              rFlagsReg cr)
16972 %{
16973   match(Set result (PartialSubtypeCheck sub super));
16974   predicate(!UseSecondarySupersTable);
16975   effect(KILL rcx, KILL cr);
16976 
16977   ins_cost(1100);  // slightly larger than the next version
16978   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16979             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16980             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16981             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16982             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16983             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16984             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16985     "miss:\t" %}
16986 
16987   ins_encode %{
16988     Label miss;
16989     // NB: Callers may assume that, when $result is a valid register,
16990     // check_klass_subtype_slow_path_linear sets it to a nonzero
16991     // value.
16992     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16993                                             $rcx$$Register, $result$$Register,
16994                                             nullptr, &miss,
16995                                             /*set_cond_codes:*/ true);
16996     __ xorptr($result$$Register, $result$$Register);
16997     __ bind(miss);
16998   %}
16999 
17000   ins_pipe(pipe_slow);
17001 %}
17002 
17003 // ============================================================================
17004 // Two versions of hashtable-based partialSubtypeCheck, both used when
17005 // we need to search for a super class in the secondary supers array.
17006 // The first is used when we don't know _a priori_ the class being
17007 // searched for. The second, far more common, is used when we do know:
17008 // this is used for instanceof, checkcast, and any case where C2 can
17009 // determine it by constant propagation.
17010 
17011 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17012                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17013                                        rFlagsReg cr)
17014 %{
17015   match(Set result (PartialSubtypeCheck sub super));
17016   predicate(UseSecondarySupersTable);
17017   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17018 
17019   ins_cost(1000);
17020   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17021 
17022   ins_encode %{
17023     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17024 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17025   %}
17026 
17027   ins_pipe(pipe_slow);
17028 %}
17029 
17030 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17031                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17032                                        rFlagsReg cr)
17033 %{
17034   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17035   predicate(UseSecondarySupersTable);
17036   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17037 
17038   ins_cost(700);  // smaller than the next version
17039   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17040 
17041   ins_encode %{
17042     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17043     if (InlineSecondarySupersTest) {
17044       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17045                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17046                                        super_klass_slot);
17047     } else {
17048       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17049     }
17050   %}
17051 
17052   ins_pipe(pipe_slow);
17053 %}
17054 
17055 // ============================================================================
17056 // Branch Instructions -- short offset versions
17057 //
17058 // These instructions are used to replace jumps of a long offset (the default
17059 // match) with jumps of a shorter offset.  These instructions are all tagged
17060 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17061 // match rules in general matching.  Instead, the ADLC generates a conversion
17062 // method in the MachNode which can be used to do in-place replacement of the
17063 // long variant with the shorter variant.  The compiler will determine if a
17064 // branch can be taken by the is_short_branch_offset() predicate in the machine
17065 // specific code section of the file.
17066 
17067 // Jump Direct - Label defines a relative address from JMP+1
17068 instruct jmpDir_short(label labl) %{
17069   match(Goto);
17070   effect(USE labl);
17071 
17072   ins_cost(300);
17073   format %{ "jmp,s   $labl" %}
17074   size(2);
17075   ins_encode %{
17076     Label* L = $labl$$label;
17077     __ jmpb(*L);
17078   %}
17079   ins_pipe(pipe_jmp);
17080   ins_short_branch(1);
17081 %}
17082 
17083 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17084 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17085   match(If cop cr);
17086   effect(USE labl);
17087 
17088   ins_cost(300);
17089   format %{ "j$cop,s   $labl" %}
17090   size(2);
17091   ins_encode %{
17092     Label* L = $labl$$label;
17093     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17094   %}
17095   ins_pipe(pipe_jcc);
17096   ins_short_branch(1);
17097 %}
17098 
17099 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17100 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17101   match(CountedLoopEnd cop cr);
17102   effect(USE labl);
17103 
17104   ins_cost(300);
17105   format %{ "j$cop,s   $labl\t# loop end" %}
17106   size(2);
17107   ins_encode %{
17108     Label* L = $labl$$label;
17109     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17110   %}
17111   ins_pipe(pipe_jcc);
17112   ins_short_branch(1);
17113 %}
17114 
17115 // Jump Direct Conditional - using unsigned comparison
17116 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17117   match(If cop cmp);
17118   effect(USE labl);
17119 
17120   ins_cost(300);
17121   format %{ "j$cop,us  $labl" %}
17122   size(2);
17123   ins_encode %{
17124     Label* L = $labl$$label;
17125     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17126   %}
17127   ins_pipe(pipe_jcc);
17128   ins_short_branch(1);
17129 %}
17130 
17131 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17132   match(If cop cmp);
17133   effect(USE labl);
17134 
17135   ins_cost(300);
17136   format %{ "j$cop,us  $labl" %}
17137   size(2);
17138   ins_encode %{
17139     Label* L = $labl$$label;
17140     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17141   %}
17142   ins_pipe(pipe_jcc);
17143   ins_short_branch(1);
17144 %}
17145 
17146 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17147   match(If cop cmp);
17148   effect(USE labl);
17149 
17150   ins_cost(300);
17151   format %{ $$template
17152     if ($cop$$cmpcode == Assembler::notEqual) {
17153       $$emit$$"jp,u,s  $labl\n\t"
17154       $$emit$$"j$cop,u,s  $labl"
17155     } else {
17156       $$emit$$"jp,u,s  done\n\t"
17157       $$emit$$"j$cop,u,s  $labl\n\t"
17158       $$emit$$"done:"
17159     }
17160   %}
17161   size(4);
17162   ins_encode %{
17163     Label* l = $labl$$label;
17164     if ($cop$$cmpcode == Assembler::notEqual) {
17165       __ jccb(Assembler::parity, *l);
17166       __ jccb(Assembler::notEqual, *l);
17167     } else if ($cop$$cmpcode == Assembler::equal) {
17168       Label done;
17169       __ jccb(Assembler::parity, done);
17170       __ jccb(Assembler::equal, *l);
17171       __ bind(done);
17172     } else {
17173        ShouldNotReachHere();
17174     }
17175   %}
17176   ins_pipe(pipe_jcc);
17177   ins_short_branch(1);
17178 %}
17179 
17180 // Jump Direct Conditional - using signed and unsigned comparison
17181 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17182   match(If cop cmp);
17183   effect(USE labl);
17184 
17185   ins_cost(300);
17186   format %{ "j$cop,sus  $labl" %}
17187   size(2);
17188   ins_encode %{
17189     Label* L = $labl$$label;
17190     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17191   %}
17192   ins_pipe(pipe_jcc);
17193   ins_short_branch(1);
17194 %}
17195 
17196 // ============================================================================
17197 // inlined locking and unlocking
17198 
17199 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17200   match(Set cr (FastLock object box));
17201   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17202   ins_cost(300);
17203   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17204   ins_encode %{
17205     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17206   %}
17207   ins_pipe(pipe_slow);
17208 %}
17209 
17210 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17211   match(Set cr (FastUnlock object rax_reg));
17212   effect(TEMP tmp, USE_KILL rax_reg);
17213   ins_cost(300);
17214   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17215   ins_encode %{
17216     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17217   %}
17218   ins_pipe(pipe_slow);
17219 %}
17220 
17221 
17222 // ============================================================================
17223 // Safepoint Instructions
17224 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17225 %{
17226   match(SafePoint poll);
17227   effect(KILL cr, USE poll);
17228 
17229   format %{ "testl   rax, [$poll]\t"
17230             "# Safepoint: poll for GC" %}
17231   ins_cost(125);
17232   ins_encode %{
17233     __ relocate(relocInfo::poll_type);
17234     address pre_pc = __ pc();
17235     __ testl(rax, Address($poll$$Register, 0));
17236     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17237   %}
17238   ins_pipe(ialu_reg_mem);
17239 %}
17240 
17241 instruct mask_all_evexL(kReg dst, rRegL src) %{
17242   match(Set dst (MaskAll src));
17243   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17244   ins_encode %{
17245     int mask_len = Matcher::vector_length(this);
17246     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17247   %}
17248   ins_pipe( pipe_slow );
17249 %}
17250 
17251 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17252   predicate(Matcher::vector_length(n) > 32);
17253   match(Set dst (MaskAll src));
17254   effect(TEMP tmp);
17255   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17256   ins_encode %{
17257     int mask_len = Matcher::vector_length(this);
17258     __ movslq($tmp$$Register, $src$$Register);
17259     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17260   %}
17261   ins_pipe( pipe_slow );
17262 %}
17263 
17264 // ============================================================================
17265 // Procedure Call/Return Instructions
17266 // Call Java Static Instruction
17267 // Note: If this code changes, the corresponding ret_addr_offset() and
17268 //       compute_padding() functions will have to be adjusted.
17269 instruct CallStaticJavaDirect(method meth) %{
17270   match(CallStaticJava);
17271   effect(USE meth);
17272 
17273   ins_cost(300);
17274   format %{ "call,static " %}
17275   opcode(0xE8); /* E8 cd */
17276   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17277   ins_pipe(pipe_slow);
17278   ins_alignment(4);
17279 %}
17280 
17281 // Call Java Dynamic Instruction
17282 // Note: If this code changes, the corresponding ret_addr_offset() and
17283 //       compute_padding() functions will have to be adjusted.
17284 instruct CallDynamicJavaDirect(method meth)
17285 %{
17286   match(CallDynamicJava);
17287   effect(USE meth);
17288 
17289   ins_cost(300);
17290   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17291             "call,dynamic " %}
17292   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17293   ins_pipe(pipe_slow);
17294   ins_alignment(4);
17295 %}
17296 
17297 // Call Runtime Instruction
17298 instruct CallRuntimeDirect(method meth)
17299 %{
17300   match(CallRuntime);
17301   effect(USE meth);
17302 
17303   ins_cost(300);
17304   format %{ "call,runtime " %}
17305   ins_encode(clear_avx, Java_To_Runtime(meth));
17306   ins_pipe(pipe_slow);
17307 %}
17308 
17309 // Call runtime without safepoint
17310 instruct CallLeafDirect(method meth)
17311 %{
17312   match(CallLeaf);
17313   effect(USE meth);
17314 
17315   ins_cost(300);
17316   format %{ "call_leaf,runtime " %}
17317   ins_encode(clear_avx, Java_To_Runtime(meth));
17318   ins_pipe(pipe_slow);
17319 %}
17320 
17321 // Call runtime without safepoint and with vector arguments
17322 instruct CallLeafDirectVector(method meth)
17323 %{
17324   match(CallLeafVector);
17325   effect(USE meth);
17326 
17327   ins_cost(300);
17328   format %{ "call_leaf,vector " %}
17329   ins_encode(Java_To_Runtime(meth));
17330   ins_pipe(pipe_slow);
17331 %}
17332 
17333 // Call runtime without safepoint
17334 instruct CallLeafNoFPDirect(method meth)
17335 %{
17336   match(CallLeafNoFP);
17337   effect(USE meth);
17338 
17339   ins_cost(300);
17340   format %{ "call_leaf_nofp,runtime " %}
17341   ins_encode(clear_avx, Java_To_Runtime(meth));
17342   ins_pipe(pipe_slow);
17343 %}
17344 
17345 // Return Instruction
17346 // Remove the return address & jump to it.
17347 // Notice: We always emit a nop after a ret to make sure there is room
17348 // for safepoint patching
17349 instruct Ret()
17350 %{
17351   match(Return);
17352 
17353   format %{ "ret" %}
17354   ins_encode %{
17355     __ ret(0);
17356   %}
17357   ins_pipe(pipe_jmp);
17358 %}
17359 
17360 // Tail Call; Jump from runtime stub to Java code.
17361 // Also known as an 'interprocedural jump'.
17362 // Target of jump will eventually return to caller.
17363 // TailJump below removes the return address.
17364 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17365 // emitted just above the TailCall which has reset rbp to the caller state.
17366 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17367 %{
17368   match(TailCall jump_target method_ptr);
17369 
17370   ins_cost(300);
17371   format %{ "jmp     $jump_target\t# rbx holds method" %}
17372   ins_encode %{
17373     __ jmp($jump_target$$Register);
17374   %}
17375   ins_pipe(pipe_jmp);
17376 %}
17377 
17378 // Tail Jump; remove the return address; jump to target.
17379 // TailCall above leaves the return address around.
17380 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17381 %{
17382   match(TailJump jump_target ex_oop);
17383 
17384   ins_cost(300);
17385   format %{ "popq    rdx\t# pop return address\n\t"
17386             "jmp     $jump_target" %}
17387   ins_encode %{
17388     __ popq(as_Register(RDX_enc));
17389     __ jmp($jump_target$$Register);
17390   %}
17391   ins_pipe(pipe_jmp);
17392 %}
17393 
17394 // Forward exception.
17395 instruct ForwardExceptionjmp()
17396 %{
17397   match(ForwardException);
17398 
17399   format %{ "jmp     forward_exception_stub" %}
17400   ins_encode %{
17401     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17402   %}
17403   ins_pipe(pipe_jmp);
17404 %}
17405 
17406 // Create exception oop: created by stack-crawling runtime code.
17407 // Created exception is now available to this handler, and is setup
17408 // just prior to jumping to this handler.  No code emitted.
17409 instruct CreateException(rax_RegP ex_oop)
17410 %{
17411   match(Set ex_oop (CreateEx));
17412 
17413   size(0);
17414   // use the following format syntax
17415   format %{ "# exception oop is in rax; no code emitted" %}
17416   ins_encode();
17417   ins_pipe(empty);
17418 %}
17419 
17420 // Rethrow exception:
17421 // The exception oop will come in the first argument position.
17422 // Then JUMP (not call) to the rethrow stub code.
17423 instruct RethrowException()
17424 %{
17425   match(Rethrow);
17426 
17427   // use the following format syntax
17428   format %{ "jmp     rethrow_stub" %}
17429   ins_encode %{
17430     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17431   %}
17432   ins_pipe(pipe_jmp);
17433 %}
17434 
17435 // ============================================================================
17436 // This name is KNOWN by the ADLC and cannot be changed.
17437 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17438 // for this guy.
17439 instruct tlsLoadP(r15_RegP dst) %{
17440   match(Set dst (ThreadLocal));
17441   effect(DEF dst);
17442 
17443   size(0);
17444   format %{ "# TLS is in R15" %}
17445   ins_encode( /*empty encoding*/ );
17446   ins_pipe(ialu_reg_reg);
17447 %}
17448 
17449 instruct addF_reg(regF dst, regF src) %{
17450   predicate(UseAVX == 0);
17451   match(Set dst (AddF dst src));
17452 
17453   format %{ "addss   $dst, $src" %}
17454   ins_cost(150);
17455   ins_encode %{
17456     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17457   %}
17458   ins_pipe(pipe_slow);
17459 %}
17460 
17461 instruct addF_mem(regF dst, memory src) %{
17462   predicate(UseAVX == 0);
17463   match(Set dst (AddF dst (LoadF src)));
17464 
17465   format %{ "addss   $dst, $src" %}
17466   ins_cost(150);
17467   ins_encode %{
17468     __ addss($dst$$XMMRegister, $src$$Address);
17469   %}
17470   ins_pipe(pipe_slow);
17471 %}
17472 
17473 instruct addF_imm(regF dst, immF con) %{
17474   predicate(UseAVX == 0);
17475   match(Set dst (AddF dst con));
17476   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17477   ins_cost(150);
17478   ins_encode %{
17479     __ addss($dst$$XMMRegister, $constantaddress($con));
17480   %}
17481   ins_pipe(pipe_slow);
17482 %}
17483 
17484 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17485   predicate(UseAVX > 0);
17486   match(Set dst (AddF src1 src2));
17487 
17488   format %{ "vaddss  $dst, $src1, $src2" %}
17489   ins_cost(150);
17490   ins_encode %{
17491     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17492   %}
17493   ins_pipe(pipe_slow);
17494 %}
17495 
17496 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17497   predicate(UseAVX > 0);
17498   match(Set dst (AddF src1 (LoadF src2)));
17499 
17500   format %{ "vaddss  $dst, $src1, $src2" %}
17501   ins_cost(150);
17502   ins_encode %{
17503     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17504   %}
17505   ins_pipe(pipe_slow);
17506 %}
17507 
17508 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17509   predicate(UseAVX > 0);
17510   match(Set dst (AddF src con));
17511 
17512   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17513   ins_cost(150);
17514   ins_encode %{
17515     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17516   %}
17517   ins_pipe(pipe_slow);
17518 %}
17519 
17520 instruct addD_reg(regD dst, regD src) %{
17521   predicate(UseAVX == 0);
17522   match(Set dst (AddD dst src));
17523 
17524   format %{ "addsd   $dst, $src" %}
17525   ins_cost(150);
17526   ins_encode %{
17527     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17528   %}
17529   ins_pipe(pipe_slow);
17530 %}
17531 
17532 instruct addD_mem(regD dst, memory src) %{
17533   predicate(UseAVX == 0);
17534   match(Set dst (AddD dst (LoadD src)));
17535 
17536   format %{ "addsd   $dst, $src" %}
17537   ins_cost(150);
17538   ins_encode %{
17539     __ addsd($dst$$XMMRegister, $src$$Address);
17540   %}
17541   ins_pipe(pipe_slow);
17542 %}
17543 
17544 instruct addD_imm(regD dst, immD con) %{
17545   predicate(UseAVX == 0);
17546   match(Set dst (AddD dst con));
17547   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17548   ins_cost(150);
17549   ins_encode %{
17550     __ addsd($dst$$XMMRegister, $constantaddress($con));
17551   %}
17552   ins_pipe(pipe_slow);
17553 %}
17554 
17555 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17556   predicate(UseAVX > 0);
17557   match(Set dst (AddD src1 src2));
17558 
17559   format %{ "vaddsd  $dst, $src1, $src2" %}
17560   ins_cost(150);
17561   ins_encode %{
17562     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17563   %}
17564   ins_pipe(pipe_slow);
17565 %}
17566 
17567 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17568   predicate(UseAVX > 0);
17569   match(Set dst (AddD src1 (LoadD src2)));
17570 
17571   format %{ "vaddsd  $dst, $src1, $src2" %}
17572   ins_cost(150);
17573   ins_encode %{
17574     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17575   %}
17576   ins_pipe(pipe_slow);
17577 %}
17578 
17579 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17580   predicate(UseAVX > 0);
17581   match(Set dst (AddD src con));
17582 
17583   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17584   ins_cost(150);
17585   ins_encode %{
17586     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17587   %}
17588   ins_pipe(pipe_slow);
17589 %}
17590 
17591 instruct subF_reg(regF dst, regF src) %{
17592   predicate(UseAVX == 0);
17593   match(Set dst (SubF dst src));
17594 
17595   format %{ "subss   $dst, $src" %}
17596   ins_cost(150);
17597   ins_encode %{
17598     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17599   %}
17600   ins_pipe(pipe_slow);
17601 %}
17602 
17603 instruct subF_mem(regF dst, memory src) %{
17604   predicate(UseAVX == 0);
17605   match(Set dst (SubF dst (LoadF src)));
17606 
17607   format %{ "subss   $dst, $src" %}
17608   ins_cost(150);
17609   ins_encode %{
17610     __ subss($dst$$XMMRegister, $src$$Address);
17611   %}
17612   ins_pipe(pipe_slow);
17613 %}
17614 
17615 instruct subF_imm(regF dst, immF con) %{
17616   predicate(UseAVX == 0);
17617   match(Set dst (SubF dst con));
17618   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17619   ins_cost(150);
17620   ins_encode %{
17621     __ subss($dst$$XMMRegister, $constantaddress($con));
17622   %}
17623   ins_pipe(pipe_slow);
17624 %}
17625 
17626 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17627   predicate(UseAVX > 0);
17628   match(Set dst (SubF src1 src2));
17629 
17630   format %{ "vsubss  $dst, $src1, $src2" %}
17631   ins_cost(150);
17632   ins_encode %{
17633     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17634   %}
17635   ins_pipe(pipe_slow);
17636 %}
17637 
17638 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17639   predicate(UseAVX > 0);
17640   match(Set dst (SubF src1 (LoadF src2)));
17641 
17642   format %{ "vsubss  $dst, $src1, $src2" %}
17643   ins_cost(150);
17644   ins_encode %{
17645     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17646   %}
17647   ins_pipe(pipe_slow);
17648 %}
17649 
17650 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17651   predicate(UseAVX > 0);
17652   match(Set dst (SubF src con));
17653 
17654   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17655   ins_cost(150);
17656   ins_encode %{
17657     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17658   %}
17659   ins_pipe(pipe_slow);
17660 %}
17661 
17662 instruct subD_reg(regD dst, regD src) %{
17663   predicate(UseAVX == 0);
17664   match(Set dst (SubD dst src));
17665 
17666   format %{ "subsd   $dst, $src" %}
17667   ins_cost(150);
17668   ins_encode %{
17669     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17670   %}
17671   ins_pipe(pipe_slow);
17672 %}
17673 
17674 instruct subD_mem(regD dst, memory src) %{
17675   predicate(UseAVX == 0);
17676   match(Set dst (SubD dst (LoadD src)));
17677 
17678   format %{ "subsd   $dst, $src" %}
17679   ins_cost(150);
17680   ins_encode %{
17681     __ subsd($dst$$XMMRegister, $src$$Address);
17682   %}
17683   ins_pipe(pipe_slow);
17684 %}
17685 
17686 instruct subD_imm(regD dst, immD con) %{
17687   predicate(UseAVX == 0);
17688   match(Set dst (SubD dst con));
17689   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17690   ins_cost(150);
17691   ins_encode %{
17692     __ subsd($dst$$XMMRegister, $constantaddress($con));
17693   %}
17694   ins_pipe(pipe_slow);
17695 %}
17696 
17697 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17698   predicate(UseAVX > 0);
17699   match(Set dst (SubD src1 src2));
17700 
17701   format %{ "vsubsd  $dst, $src1, $src2" %}
17702   ins_cost(150);
17703   ins_encode %{
17704     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17705   %}
17706   ins_pipe(pipe_slow);
17707 %}
17708 
17709 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17710   predicate(UseAVX > 0);
17711   match(Set dst (SubD src1 (LoadD src2)));
17712 
17713   format %{ "vsubsd  $dst, $src1, $src2" %}
17714   ins_cost(150);
17715   ins_encode %{
17716     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17717   %}
17718   ins_pipe(pipe_slow);
17719 %}
17720 
17721 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17722   predicate(UseAVX > 0);
17723   match(Set dst (SubD src con));
17724 
17725   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17726   ins_cost(150);
17727   ins_encode %{
17728     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17729   %}
17730   ins_pipe(pipe_slow);
17731 %}
17732 
17733 instruct mulF_reg(regF dst, regF src) %{
17734   predicate(UseAVX == 0);
17735   match(Set dst (MulF dst src));
17736 
17737   format %{ "mulss   $dst, $src" %}
17738   ins_cost(150);
17739   ins_encode %{
17740     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17741   %}
17742   ins_pipe(pipe_slow);
17743 %}
17744 
17745 instruct mulF_mem(regF dst, memory src) %{
17746   predicate(UseAVX == 0);
17747   match(Set dst (MulF dst (LoadF src)));
17748 
17749   format %{ "mulss   $dst, $src" %}
17750   ins_cost(150);
17751   ins_encode %{
17752     __ mulss($dst$$XMMRegister, $src$$Address);
17753   %}
17754   ins_pipe(pipe_slow);
17755 %}
17756 
17757 instruct mulF_imm(regF dst, immF con) %{
17758   predicate(UseAVX == 0);
17759   match(Set dst (MulF dst con));
17760   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17761   ins_cost(150);
17762   ins_encode %{
17763     __ mulss($dst$$XMMRegister, $constantaddress($con));
17764   %}
17765   ins_pipe(pipe_slow);
17766 %}
17767 
17768 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17769   predicate(UseAVX > 0);
17770   match(Set dst (MulF src1 src2));
17771 
17772   format %{ "vmulss  $dst, $src1, $src2" %}
17773   ins_cost(150);
17774   ins_encode %{
17775     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17776   %}
17777   ins_pipe(pipe_slow);
17778 %}
17779 
17780 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17781   predicate(UseAVX > 0);
17782   match(Set dst (MulF src1 (LoadF src2)));
17783 
17784   format %{ "vmulss  $dst, $src1, $src2" %}
17785   ins_cost(150);
17786   ins_encode %{
17787     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17788   %}
17789   ins_pipe(pipe_slow);
17790 %}
17791 
17792 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17793   predicate(UseAVX > 0);
17794   match(Set dst (MulF src con));
17795 
17796   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17797   ins_cost(150);
17798   ins_encode %{
17799     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17800   %}
17801   ins_pipe(pipe_slow);
17802 %}
17803 
17804 instruct mulD_reg(regD dst, regD src) %{
17805   predicate(UseAVX == 0);
17806   match(Set dst (MulD dst src));
17807 
17808   format %{ "mulsd   $dst, $src" %}
17809   ins_cost(150);
17810   ins_encode %{
17811     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17812   %}
17813   ins_pipe(pipe_slow);
17814 %}
17815 
17816 instruct mulD_mem(regD dst, memory src) %{
17817   predicate(UseAVX == 0);
17818   match(Set dst (MulD dst (LoadD src)));
17819 
17820   format %{ "mulsd   $dst, $src" %}
17821   ins_cost(150);
17822   ins_encode %{
17823     __ mulsd($dst$$XMMRegister, $src$$Address);
17824   %}
17825   ins_pipe(pipe_slow);
17826 %}
17827 
17828 instruct mulD_imm(regD dst, immD con) %{
17829   predicate(UseAVX == 0);
17830   match(Set dst (MulD dst con));
17831   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17832   ins_cost(150);
17833   ins_encode %{
17834     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17835   %}
17836   ins_pipe(pipe_slow);
17837 %}
17838 
17839 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17840   predicate(UseAVX > 0);
17841   match(Set dst (MulD src1 src2));
17842 
17843   format %{ "vmulsd  $dst, $src1, $src2" %}
17844   ins_cost(150);
17845   ins_encode %{
17846     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17847   %}
17848   ins_pipe(pipe_slow);
17849 %}
17850 
17851 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17852   predicate(UseAVX > 0);
17853   match(Set dst (MulD src1 (LoadD src2)));
17854 
17855   format %{ "vmulsd  $dst, $src1, $src2" %}
17856   ins_cost(150);
17857   ins_encode %{
17858     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17859   %}
17860   ins_pipe(pipe_slow);
17861 %}
17862 
17863 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17864   predicate(UseAVX > 0);
17865   match(Set dst (MulD src con));
17866 
17867   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17868   ins_cost(150);
17869   ins_encode %{
17870     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17871   %}
17872   ins_pipe(pipe_slow);
17873 %}
17874 
17875 instruct divF_reg(regF dst, regF src) %{
17876   predicate(UseAVX == 0);
17877   match(Set dst (DivF dst src));
17878 
17879   format %{ "divss   $dst, $src" %}
17880   ins_cost(150);
17881   ins_encode %{
17882     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17883   %}
17884   ins_pipe(pipe_slow);
17885 %}
17886 
17887 instruct divF_mem(regF dst, memory src) %{
17888   predicate(UseAVX == 0);
17889   match(Set dst (DivF dst (LoadF src)));
17890 
17891   format %{ "divss   $dst, $src" %}
17892   ins_cost(150);
17893   ins_encode %{
17894     __ divss($dst$$XMMRegister, $src$$Address);
17895   %}
17896   ins_pipe(pipe_slow);
17897 %}
17898 
17899 instruct divF_imm(regF dst, immF con) %{
17900   predicate(UseAVX == 0);
17901   match(Set dst (DivF dst con));
17902   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17903   ins_cost(150);
17904   ins_encode %{
17905     __ divss($dst$$XMMRegister, $constantaddress($con));
17906   %}
17907   ins_pipe(pipe_slow);
17908 %}
17909 
17910 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17911   predicate(UseAVX > 0);
17912   match(Set dst (DivF src1 src2));
17913 
17914   format %{ "vdivss  $dst, $src1, $src2" %}
17915   ins_cost(150);
17916   ins_encode %{
17917     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17918   %}
17919   ins_pipe(pipe_slow);
17920 %}
17921 
17922 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17923   predicate(UseAVX > 0);
17924   match(Set dst (DivF src1 (LoadF src2)));
17925 
17926   format %{ "vdivss  $dst, $src1, $src2" %}
17927   ins_cost(150);
17928   ins_encode %{
17929     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17930   %}
17931   ins_pipe(pipe_slow);
17932 %}
17933 
17934 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17935   predicate(UseAVX > 0);
17936   match(Set dst (DivF src con));
17937 
17938   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17939   ins_cost(150);
17940   ins_encode %{
17941     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17942   %}
17943   ins_pipe(pipe_slow);
17944 %}
17945 
17946 instruct divD_reg(regD dst, regD src) %{
17947   predicate(UseAVX == 0);
17948   match(Set dst (DivD dst src));
17949 
17950   format %{ "divsd   $dst, $src" %}
17951   ins_cost(150);
17952   ins_encode %{
17953     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17954   %}
17955   ins_pipe(pipe_slow);
17956 %}
17957 
17958 instruct divD_mem(regD dst, memory src) %{
17959   predicate(UseAVX == 0);
17960   match(Set dst (DivD dst (LoadD src)));
17961 
17962   format %{ "divsd   $dst, $src" %}
17963   ins_cost(150);
17964   ins_encode %{
17965     __ divsd($dst$$XMMRegister, $src$$Address);
17966   %}
17967   ins_pipe(pipe_slow);
17968 %}
17969 
17970 instruct divD_imm(regD dst, immD con) %{
17971   predicate(UseAVX == 0);
17972   match(Set dst (DivD dst con));
17973   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17974   ins_cost(150);
17975   ins_encode %{
17976     __ divsd($dst$$XMMRegister, $constantaddress($con));
17977   %}
17978   ins_pipe(pipe_slow);
17979 %}
17980 
17981 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17982   predicate(UseAVX > 0);
17983   match(Set dst (DivD src1 src2));
17984 
17985   format %{ "vdivsd  $dst, $src1, $src2" %}
17986   ins_cost(150);
17987   ins_encode %{
17988     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17989   %}
17990   ins_pipe(pipe_slow);
17991 %}
17992 
17993 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17994   predicate(UseAVX > 0);
17995   match(Set dst (DivD src1 (LoadD src2)));
17996 
17997   format %{ "vdivsd  $dst, $src1, $src2" %}
17998   ins_cost(150);
17999   ins_encode %{
18000     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18001   %}
18002   ins_pipe(pipe_slow);
18003 %}
18004 
18005 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18006   predicate(UseAVX > 0);
18007   match(Set dst (DivD src con));
18008 
18009   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18010   ins_cost(150);
18011   ins_encode %{
18012     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18013   %}
18014   ins_pipe(pipe_slow);
18015 %}
18016 
18017 instruct absF_reg(regF dst) %{
18018   predicate(UseAVX == 0);
18019   match(Set dst (AbsF dst));
18020   ins_cost(150);
18021   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18022   ins_encode %{
18023     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18024   %}
18025   ins_pipe(pipe_slow);
18026 %}
18027 
18028 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18029   predicate(UseAVX > 0);
18030   match(Set dst (AbsF src));
18031   ins_cost(150);
18032   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18033   ins_encode %{
18034     int vlen_enc = Assembler::AVX_128bit;
18035     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18036               ExternalAddress(float_signmask()), vlen_enc);
18037   %}
18038   ins_pipe(pipe_slow);
18039 %}
18040 
18041 instruct absD_reg(regD dst) %{
18042   predicate(UseAVX == 0);
18043   match(Set dst (AbsD dst));
18044   ins_cost(150);
18045   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18046             "# abs double by sign masking" %}
18047   ins_encode %{
18048     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18049   %}
18050   ins_pipe(pipe_slow);
18051 %}
18052 
18053 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18054   predicate(UseAVX > 0);
18055   match(Set dst (AbsD src));
18056   ins_cost(150);
18057   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18058             "# abs double by sign masking" %}
18059   ins_encode %{
18060     int vlen_enc = Assembler::AVX_128bit;
18061     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18062               ExternalAddress(double_signmask()), vlen_enc);
18063   %}
18064   ins_pipe(pipe_slow);
18065 %}
18066 
18067 instruct negF_reg(regF dst) %{
18068   predicate(UseAVX == 0);
18069   match(Set dst (NegF dst));
18070   ins_cost(150);
18071   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18072   ins_encode %{
18073     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18074   %}
18075   ins_pipe(pipe_slow);
18076 %}
18077 
18078 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18079   predicate(UseAVX > 0);
18080   match(Set dst (NegF src));
18081   ins_cost(150);
18082   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18083   ins_encode %{
18084     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18085                  ExternalAddress(float_signflip()));
18086   %}
18087   ins_pipe(pipe_slow);
18088 %}
18089 
18090 instruct negD_reg(regD dst) %{
18091   predicate(UseAVX == 0);
18092   match(Set dst (NegD dst));
18093   ins_cost(150);
18094   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18095             "# neg double by sign flipping" %}
18096   ins_encode %{
18097     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18098   %}
18099   ins_pipe(pipe_slow);
18100 %}
18101 
18102 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18103   predicate(UseAVX > 0);
18104   match(Set dst (NegD src));
18105   ins_cost(150);
18106   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18107             "# neg double by sign flipping" %}
18108   ins_encode %{
18109     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18110                  ExternalAddress(double_signflip()));
18111   %}
18112   ins_pipe(pipe_slow);
18113 %}
18114 
18115 // sqrtss instruction needs destination register to be pre initialized for best performance
18116 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18117 instruct sqrtF_reg(regF dst) %{
18118   match(Set dst (SqrtF dst));
18119   format %{ "sqrtss  $dst, $dst" %}
18120   ins_encode %{
18121     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18122   %}
18123   ins_pipe(pipe_slow);
18124 %}
18125 
18126 // sqrtsd instruction needs destination register to be pre initialized for best performance
18127 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18128 instruct sqrtD_reg(regD dst) %{
18129   match(Set dst (SqrtD dst));
18130   format %{ "sqrtsd  $dst, $dst" %}
18131   ins_encode %{
18132     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18133   %}
18134   ins_pipe(pipe_slow);
18135 %}
18136 
18137 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18138   effect(TEMP tmp);
18139   match(Set dst (ConvF2HF src));
18140   ins_cost(125);
18141   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18142   ins_encode %{
18143     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18144   %}
18145   ins_pipe( pipe_slow );
18146 %}
18147 
18148 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18149   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18150   effect(TEMP ktmp, TEMP rtmp);
18151   match(Set mem (StoreC mem (ConvF2HF src)));
18152   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18153   ins_encode %{
18154     __ movl($rtmp$$Register, 0x1);
18155     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18156     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18157   %}
18158   ins_pipe( pipe_slow );
18159 %}
18160 
18161 instruct vconvF2HF(vec dst, vec src) %{
18162   match(Set dst (VectorCastF2HF src));
18163   format %{ "vector_conv_F2HF $dst $src" %}
18164   ins_encode %{
18165     int vlen_enc = vector_length_encoding(this, $src);
18166     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18167   %}
18168   ins_pipe( pipe_slow );
18169 %}
18170 
18171 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18172   predicate(n->as_StoreVector()->memory_size() >= 16);
18173   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18174   format %{ "vcvtps2ph $mem,$src" %}
18175   ins_encode %{
18176     int vlen_enc = vector_length_encoding(this, $src);
18177     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18178   %}
18179   ins_pipe( pipe_slow );
18180 %}
18181 
18182 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18183   match(Set dst (ConvHF2F src));
18184   format %{ "vcvtph2ps $dst,$src" %}
18185   ins_encode %{
18186     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18187   %}
18188   ins_pipe( pipe_slow );
18189 %}
18190 
18191 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18192   match(Set dst (VectorCastHF2F (LoadVector mem)));
18193   format %{ "vcvtph2ps $dst,$mem" %}
18194   ins_encode %{
18195     int vlen_enc = vector_length_encoding(this);
18196     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18197   %}
18198   ins_pipe( pipe_slow );
18199 %}
18200 
18201 instruct vconvHF2F(vec dst, vec src) %{
18202   match(Set dst (VectorCastHF2F src));
18203   ins_cost(125);
18204   format %{ "vector_conv_HF2F $dst,$src" %}
18205   ins_encode %{
18206     int vlen_enc = vector_length_encoding(this);
18207     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18208   %}
18209   ins_pipe( pipe_slow );
18210 %}
18211 
18212 // ---------------------------------------- VectorReinterpret ------------------------------------
18213 instruct reinterpret_mask(kReg dst) %{
18214   predicate(n->bottom_type()->isa_vectmask() &&
18215             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18216   match(Set dst (VectorReinterpret dst));
18217   ins_cost(125);
18218   format %{ "vector_reinterpret $dst\t!" %}
18219   ins_encode %{
18220     // empty
18221   %}
18222   ins_pipe( pipe_slow );
18223 %}
18224 
18225 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18226   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18227             n->bottom_type()->isa_vectmask() &&
18228             n->in(1)->bottom_type()->isa_vectmask() &&
18229             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18230             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18231   match(Set dst (VectorReinterpret src));
18232   effect(TEMP xtmp);
18233   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18234   ins_encode %{
18235      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18236      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18237      assert(src_sz == dst_sz , "src and dst size mismatch");
18238      int vlen_enc = vector_length_encoding(src_sz);
18239      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18240      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18241   %}
18242   ins_pipe( pipe_slow );
18243 %}
18244 
18245 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18246   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18247             n->bottom_type()->isa_vectmask() &&
18248             n->in(1)->bottom_type()->isa_vectmask() &&
18249             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18250              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18251             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18252   match(Set dst (VectorReinterpret src));
18253   effect(TEMP xtmp);
18254   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18255   ins_encode %{
18256      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18257      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18258      assert(src_sz == dst_sz , "src and dst size mismatch");
18259      int vlen_enc = vector_length_encoding(src_sz);
18260      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18261      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18262   %}
18263   ins_pipe( pipe_slow );
18264 %}
18265 
18266 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18267   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18268             n->bottom_type()->isa_vectmask() &&
18269             n->in(1)->bottom_type()->isa_vectmask() &&
18270             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18271              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18272             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18273   match(Set dst (VectorReinterpret src));
18274   effect(TEMP xtmp);
18275   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18276   ins_encode %{
18277      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18278      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18279      assert(src_sz == dst_sz , "src and dst size mismatch");
18280      int vlen_enc = vector_length_encoding(src_sz);
18281      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18282      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18283   %}
18284   ins_pipe( pipe_slow );
18285 %}
18286 
18287 instruct reinterpret(vec dst) %{
18288   predicate(!n->bottom_type()->isa_vectmask() &&
18289             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18290   match(Set dst (VectorReinterpret dst));
18291   ins_cost(125);
18292   format %{ "vector_reinterpret $dst\t!" %}
18293   ins_encode %{
18294     // empty
18295   %}
18296   ins_pipe( pipe_slow );
18297 %}
18298 
18299 instruct reinterpret_expand(vec dst, vec src) %{
18300   predicate(UseAVX == 0 &&
18301             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18302   match(Set dst (VectorReinterpret src));
18303   ins_cost(125);
18304   effect(TEMP dst);
18305   format %{ "vector_reinterpret_expand $dst,$src" %}
18306   ins_encode %{
18307     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18308     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18309 
18310     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18311     if (src_vlen_in_bytes == 4) {
18312       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18313     } else {
18314       assert(src_vlen_in_bytes == 8, "");
18315       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18316     }
18317     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18318   %}
18319   ins_pipe( pipe_slow );
18320 %}
18321 
18322 instruct vreinterpret_expand4(legVec dst, vec src) %{
18323   predicate(UseAVX > 0 &&
18324             !n->bottom_type()->isa_vectmask() &&
18325             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18326             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18327   match(Set dst (VectorReinterpret src));
18328   ins_cost(125);
18329   format %{ "vector_reinterpret_expand $dst,$src" %}
18330   ins_encode %{
18331     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18332   %}
18333   ins_pipe( pipe_slow );
18334 %}
18335 
18336 
18337 instruct vreinterpret_expand(legVec dst, vec src) %{
18338   predicate(UseAVX > 0 &&
18339             !n->bottom_type()->isa_vectmask() &&
18340             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18341             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18342   match(Set dst (VectorReinterpret src));
18343   ins_cost(125);
18344   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18345   ins_encode %{
18346     switch (Matcher::vector_length_in_bytes(this, $src)) {
18347       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18348       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18349       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18350       default: ShouldNotReachHere();
18351     }
18352   %}
18353   ins_pipe( pipe_slow );
18354 %}
18355 
18356 instruct reinterpret_shrink(vec dst, legVec src) %{
18357   predicate(!n->bottom_type()->isa_vectmask() &&
18358             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18359   match(Set dst (VectorReinterpret src));
18360   ins_cost(125);
18361   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18362   ins_encode %{
18363     switch (Matcher::vector_length_in_bytes(this)) {
18364       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18365       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18366       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18367       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18368       default: ShouldNotReachHere();
18369     }
18370   %}
18371   ins_pipe( pipe_slow );
18372 %}
18373 
18374 // ----------------------------------------------------------------------------------------------------
18375 
18376 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18377   match(Set dst (RoundDoubleMode src rmode));
18378   format %{ "roundsd $dst,$src" %}
18379   ins_cost(150);
18380   ins_encode %{
18381     assert(UseSSE >= 4, "required");
18382     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18383       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18384     }
18385     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18386   %}
18387   ins_pipe(pipe_slow);
18388 %}
18389 
18390 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18391   match(Set dst (RoundDoubleMode con rmode));
18392   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18393   ins_cost(150);
18394   ins_encode %{
18395     assert(UseSSE >= 4, "required");
18396     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18397   %}
18398   ins_pipe(pipe_slow);
18399 %}
18400 
18401 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18402   predicate(Matcher::vector_length(n) < 8);
18403   match(Set dst (RoundDoubleModeV src rmode));
18404   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18405   ins_encode %{
18406     assert(UseAVX > 0, "required");
18407     int vlen_enc = vector_length_encoding(this);
18408     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18409   %}
18410   ins_pipe( pipe_slow );
18411 %}
18412 
18413 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18414   predicate(Matcher::vector_length(n) == 8);
18415   match(Set dst (RoundDoubleModeV src rmode));
18416   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18417   ins_encode %{
18418     assert(UseAVX > 2, "required");
18419     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18420   %}
18421   ins_pipe( pipe_slow );
18422 %}
18423 
18424 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18425   predicate(Matcher::vector_length(n) < 8);
18426   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18427   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18428   ins_encode %{
18429     assert(UseAVX > 0, "required");
18430     int vlen_enc = vector_length_encoding(this);
18431     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18432   %}
18433   ins_pipe( pipe_slow );
18434 %}
18435 
18436 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18437   predicate(Matcher::vector_length(n) == 8);
18438   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18439   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18440   ins_encode %{
18441     assert(UseAVX > 2, "required");
18442     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18443   %}
18444   ins_pipe( pipe_slow );
18445 %}
18446 
18447 instruct onspinwait() %{
18448   match(OnSpinWait);
18449   ins_cost(200);
18450 
18451   format %{
18452     $$template
18453     $$emit$$"pause\t! membar_onspinwait"
18454   %}
18455   ins_encode %{
18456     __ pause();
18457   %}
18458   ins_pipe(pipe_slow);
18459 %}
18460 
18461 // a * b + c
18462 instruct fmaD_reg(regD a, regD b, regD c) %{
18463   match(Set c (FmaD  c (Binary a b)));
18464   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18465   ins_cost(150);
18466   ins_encode %{
18467     assert(UseFMA, "Needs FMA instructions support.");
18468     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18469   %}
18470   ins_pipe( pipe_slow );
18471 %}
18472 
18473 // a * b + c
18474 instruct fmaF_reg(regF a, regF b, regF c) %{
18475   match(Set c (FmaF  c (Binary a b)));
18476   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18477   ins_cost(150);
18478   ins_encode %{
18479     assert(UseFMA, "Needs FMA instructions support.");
18480     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18481   %}
18482   ins_pipe( pipe_slow );
18483 %}
18484 
18485 // ====================VECTOR INSTRUCTIONS=====================================
18486 
18487 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18488 instruct MoveVec2Leg(legVec dst, vec src) %{
18489   match(Set dst src);
18490   format %{ "" %}
18491   ins_encode %{
18492     ShouldNotReachHere();
18493   %}
18494   ins_pipe( fpu_reg_reg );
18495 %}
18496 
18497 instruct MoveLeg2Vec(vec dst, legVec src) %{
18498   match(Set dst src);
18499   format %{ "" %}
18500   ins_encode %{
18501     ShouldNotReachHere();
18502   %}
18503   ins_pipe( fpu_reg_reg );
18504 %}
18505 
18506 // ============================================================================
18507 
18508 // Load vectors generic operand pattern
18509 instruct loadV(vec dst, memory mem) %{
18510   match(Set dst (LoadVector mem));
18511   ins_cost(125);
18512   format %{ "load_vector $dst,$mem" %}
18513   ins_encode %{
18514     BasicType bt = Matcher::vector_element_basic_type(this);
18515     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18516   %}
18517   ins_pipe( pipe_slow );
18518 %}
18519 
18520 // Store vectors generic operand pattern.
18521 instruct storeV(memory mem, vec src) %{
18522   match(Set mem (StoreVector mem src));
18523   ins_cost(145);
18524   format %{ "store_vector $mem,$src\n\t" %}
18525   ins_encode %{
18526     switch (Matcher::vector_length_in_bytes(this, $src)) {
18527       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18528       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18529       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18530       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18531       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18532       default: ShouldNotReachHere();
18533     }
18534   %}
18535   ins_pipe( pipe_slow );
18536 %}
18537 
18538 // ---------------------------------------- Gather ------------------------------------
18539 
18540 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18541 
18542 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18543   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18544             Matcher::vector_length_in_bytes(n) <= 32);
18545   match(Set dst (LoadVectorGather mem idx));
18546   effect(TEMP dst, TEMP tmp, TEMP mask);
18547   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18548   ins_encode %{
18549     int vlen_enc = vector_length_encoding(this);
18550     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18551     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18552     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18553     __ lea($tmp$$Register, $mem$$Address);
18554     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18555   %}
18556   ins_pipe( pipe_slow );
18557 %}
18558 
18559 
18560 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18561   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18562             !is_subword_type(Matcher::vector_element_basic_type(n)));
18563   match(Set dst (LoadVectorGather mem idx));
18564   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18565   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18566   ins_encode %{
18567     int vlen_enc = vector_length_encoding(this);
18568     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18569     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18570     __ lea($tmp$$Register, $mem$$Address);
18571     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18572   %}
18573   ins_pipe( pipe_slow );
18574 %}
18575 
18576 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18577   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18578             !is_subword_type(Matcher::vector_element_basic_type(n)));
18579   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18580   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18581   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18582   ins_encode %{
18583     assert(UseAVX > 2, "sanity");
18584     int vlen_enc = vector_length_encoding(this);
18585     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18586     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18587     // Note: Since gather instruction partially updates the opmask register used
18588     // for predication hense moving mask operand to a temporary.
18589     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18590     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18591     __ lea($tmp$$Register, $mem$$Address);
18592     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18593   %}
18594   ins_pipe( pipe_slow );
18595 %}
18596 
18597 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18598   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18599   match(Set dst (LoadVectorGather mem idx_base));
18600   effect(TEMP tmp, TEMP rtmp);
18601   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18602   ins_encode %{
18603     int vlen_enc = vector_length_encoding(this);
18604     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18605     __ lea($tmp$$Register, $mem$$Address);
18606     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18607   %}
18608   ins_pipe( pipe_slow );
18609 %}
18610 
18611 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18612                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18613   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18614   match(Set dst (LoadVectorGather mem idx_base));
18615   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18616   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18617   ins_encode %{
18618     int vlen_enc = vector_length_encoding(this);
18619     int vector_len = Matcher::vector_length(this);
18620     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18621     __ lea($tmp$$Register, $mem$$Address);
18622     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18623     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18624                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18625   %}
18626   ins_pipe( pipe_slow );
18627 %}
18628 
18629 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18630   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18631   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18632   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18633   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18634   ins_encode %{
18635     int vlen_enc = vector_length_encoding(this);
18636     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18637     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18638     __ lea($tmp$$Register, $mem$$Address);
18639     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18640     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18641   %}
18642   ins_pipe( pipe_slow );
18643 %}
18644 
18645 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18646                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18647   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18648   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18649   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18650   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18651   ins_encode %{
18652     int vlen_enc = vector_length_encoding(this);
18653     int vector_len = Matcher::vector_length(this);
18654     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18655     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18656     __ lea($tmp$$Register, $mem$$Address);
18657     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18658     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18659     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18660                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18661   %}
18662   ins_pipe( pipe_slow );
18663 %}
18664 
18665 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18666   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18667   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18668   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18669   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18670   ins_encode %{
18671     int vlen_enc = vector_length_encoding(this);
18672     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18673     __ lea($tmp$$Register, $mem$$Address);
18674     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18675     if (elem_bt == T_SHORT) {
18676       __ movl($mask_idx$$Register, 0x55555555);
18677       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18678     }
18679     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18680     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18681   %}
18682   ins_pipe( pipe_slow );
18683 %}
18684 
18685 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18686                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18687   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18688   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18689   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18690   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18691   ins_encode %{
18692     int vlen_enc = vector_length_encoding(this);
18693     int vector_len = Matcher::vector_length(this);
18694     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18695     __ lea($tmp$$Register, $mem$$Address);
18696     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18697     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18698     if (elem_bt == T_SHORT) {
18699       __ movl($mask_idx$$Register, 0x55555555);
18700       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18701     }
18702     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18703     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18704                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18705   %}
18706   ins_pipe( pipe_slow );
18707 %}
18708 
18709 // ====================Scatter=======================================
18710 
18711 // Scatter INT, LONG, FLOAT, DOUBLE
18712 
18713 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18714   predicate(UseAVX > 2);
18715   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18716   effect(TEMP tmp, TEMP ktmp);
18717   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18718   ins_encode %{
18719     int vlen_enc = vector_length_encoding(this, $src);
18720     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18721 
18722     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18723     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18724 
18725     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18726     __ lea($tmp$$Register, $mem$$Address);
18727     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18728   %}
18729   ins_pipe( pipe_slow );
18730 %}
18731 
18732 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18733   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18734   effect(TEMP tmp, TEMP ktmp);
18735   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18736   ins_encode %{
18737     int vlen_enc = vector_length_encoding(this, $src);
18738     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18739     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18740     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18741     // Note: Since scatter instruction partially updates the opmask register used
18742     // for predication hense moving mask operand to a temporary.
18743     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18744     __ lea($tmp$$Register, $mem$$Address);
18745     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18746   %}
18747   ins_pipe( pipe_slow );
18748 %}
18749 
18750 // ====================REPLICATE=======================================
18751 
18752 // Replicate byte scalar to be vector
18753 instruct vReplB_reg(vec dst, rRegI src) %{
18754   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18755   match(Set dst (Replicate src));
18756   format %{ "replicateB $dst,$src" %}
18757   ins_encode %{
18758     uint vlen = Matcher::vector_length(this);
18759     if (UseAVX >= 2) {
18760       int vlen_enc = vector_length_encoding(this);
18761       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18762         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18763         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18764       } else {
18765         __ movdl($dst$$XMMRegister, $src$$Register);
18766         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18767       }
18768     } else {
18769        assert(UseAVX < 2, "");
18770       __ movdl($dst$$XMMRegister, $src$$Register);
18771       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18772       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18773       if (vlen >= 16) {
18774         assert(vlen == 16, "");
18775         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18776       }
18777     }
18778   %}
18779   ins_pipe( pipe_slow );
18780 %}
18781 
18782 instruct ReplB_mem(vec dst, memory mem) %{
18783   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18784   match(Set dst (Replicate (LoadB mem)));
18785   format %{ "replicateB $dst,$mem" %}
18786   ins_encode %{
18787     int vlen_enc = vector_length_encoding(this);
18788     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18789   %}
18790   ins_pipe( pipe_slow );
18791 %}
18792 
18793 // ====================ReplicateS=======================================
18794 
18795 instruct vReplS_reg(vec dst, rRegI src) %{
18796   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18797   match(Set dst (Replicate src));
18798   format %{ "replicateS $dst,$src" %}
18799   ins_encode %{
18800     uint vlen = Matcher::vector_length(this);
18801     int vlen_enc = vector_length_encoding(this);
18802     if (UseAVX >= 2) {
18803       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18804         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18805         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18806       } else {
18807         __ movdl($dst$$XMMRegister, $src$$Register);
18808         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18809       }
18810     } else {
18811       assert(UseAVX < 2, "");
18812       __ movdl($dst$$XMMRegister, $src$$Register);
18813       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18814       if (vlen >= 8) {
18815         assert(vlen == 8, "");
18816         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18817       }
18818     }
18819   %}
18820   ins_pipe( pipe_slow );
18821 %}
18822 
18823 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18824   match(Set dst (Replicate con));
18825   effect(TEMP rtmp);
18826   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18827   ins_encode %{
18828     int vlen_enc = vector_length_encoding(this);
18829     BasicType bt = Matcher::vector_element_basic_type(this);
18830     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18831     __ movl($rtmp$$Register, $con$$constant);
18832     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18833   %}
18834   ins_pipe( pipe_slow );
18835 %}
18836 
18837 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18838   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18839   match(Set dst (Replicate src));
18840   effect(TEMP rtmp);
18841   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18842   ins_encode %{
18843     int vlen_enc = vector_length_encoding(this);
18844     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18845     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18846   %}
18847   ins_pipe( pipe_slow );
18848 %}
18849 
18850 instruct ReplS_mem(vec dst, memory mem) %{
18851   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18852   match(Set dst (Replicate (LoadS mem)));
18853   format %{ "replicateS $dst,$mem" %}
18854   ins_encode %{
18855     int vlen_enc = vector_length_encoding(this);
18856     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18857   %}
18858   ins_pipe( pipe_slow );
18859 %}
18860 
18861 // ====================ReplicateI=======================================
18862 
18863 instruct ReplI_reg(vec dst, rRegI src) %{
18864   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18865   match(Set dst (Replicate src));
18866   format %{ "replicateI $dst,$src" %}
18867   ins_encode %{
18868     uint vlen = Matcher::vector_length(this);
18869     int vlen_enc = vector_length_encoding(this);
18870     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18871       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18872     } else if (VM_Version::supports_avx2()) {
18873       __ movdl($dst$$XMMRegister, $src$$Register);
18874       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18875     } else {
18876       __ movdl($dst$$XMMRegister, $src$$Register);
18877       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18878     }
18879   %}
18880   ins_pipe( pipe_slow );
18881 %}
18882 
18883 instruct ReplI_mem(vec dst, memory mem) %{
18884   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18885   match(Set dst (Replicate (LoadI mem)));
18886   format %{ "replicateI $dst,$mem" %}
18887   ins_encode %{
18888     int vlen_enc = vector_length_encoding(this);
18889     if (VM_Version::supports_avx2()) {
18890       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18891     } else if (VM_Version::supports_avx()) {
18892       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18893     } else {
18894       __ movdl($dst$$XMMRegister, $mem$$Address);
18895       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18896     }
18897   %}
18898   ins_pipe( pipe_slow );
18899 %}
18900 
18901 instruct ReplI_imm(vec dst, immI con) %{
18902   predicate(Matcher::is_non_long_integral_vector(n));
18903   match(Set dst (Replicate con));
18904   format %{ "replicateI $dst,$con" %}
18905   ins_encode %{
18906     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18907                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18908                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18909     BasicType bt = Matcher::vector_element_basic_type(this);
18910     int vlen = Matcher::vector_length_in_bytes(this);
18911     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18912   %}
18913   ins_pipe( pipe_slow );
18914 %}
18915 
18916 // Replicate scalar zero to be vector
18917 instruct ReplI_zero(vec dst, immI_0 zero) %{
18918   predicate(Matcher::is_non_long_integral_vector(n));
18919   match(Set dst (Replicate zero));
18920   format %{ "replicateI $dst,$zero" %}
18921   ins_encode %{
18922     int vlen_enc = vector_length_encoding(this);
18923     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18924       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18925     } else {
18926       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18927     }
18928   %}
18929   ins_pipe( fpu_reg_reg );
18930 %}
18931 
18932 instruct ReplI_M1(vec dst, immI_M1 con) %{
18933   predicate(Matcher::is_non_long_integral_vector(n));
18934   match(Set dst (Replicate con));
18935   format %{ "vallones $dst" %}
18936   ins_encode %{
18937     int vector_len = vector_length_encoding(this);
18938     __ vallones($dst$$XMMRegister, vector_len);
18939   %}
18940   ins_pipe( pipe_slow );
18941 %}
18942 
18943 // ====================ReplicateL=======================================
18944 
18945 // Replicate long (8 byte) scalar to be vector
18946 instruct ReplL_reg(vec dst, rRegL src) %{
18947   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18948   match(Set dst (Replicate src));
18949   format %{ "replicateL $dst,$src" %}
18950   ins_encode %{
18951     int vlen = Matcher::vector_length(this);
18952     int vlen_enc = vector_length_encoding(this);
18953     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18954       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18955     } else if (VM_Version::supports_avx2()) {
18956       __ movdq($dst$$XMMRegister, $src$$Register);
18957       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18958     } else {
18959       __ movdq($dst$$XMMRegister, $src$$Register);
18960       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18961     }
18962   %}
18963   ins_pipe( pipe_slow );
18964 %}
18965 
18966 instruct ReplL_mem(vec dst, memory mem) %{
18967   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18968   match(Set dst (Replicate (LoadL mem)));
18969   format %{ "replicateL $dst,$mem" %}
18970   ins_encode %{
18971     int vlen_enc = vector_length_encoding(this);
18972     if (VM_Version::supports_avx2()) {
18973       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18974     } else if (VM_Version::supports_sse3()) {
18975       __ movddup($dst$$XMMRegister, $mem$$Address);
18976     } else {
18977       __ movq($dst$$XMMRegister, $mem$$Address);
18978       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18979     }
18980   %}
18981   ins_pipe( pipe_slow );
18982 %}
18983 
18984 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18985 instruct ReplL_imm(vec dst, immL con) %{
18986   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18987   match(Set dst (Replicate con));
18988   format %{ "replicateL $dst,$con" %}
18989   ins_encode %{
18990     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18991     int vlen = Matcher::vector_length_in_bytes(this);
18992     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18993   %}
18994   ins_pipe( pipe_slow );
18995 %}
18996 
18997 instruct ReplL_zero(vec dst, immL0 zero) %{
18998   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18999   match(Set dst (Replicate zero));
19000   format %{ "replicateL $dst,$zero" %}
19001   ins_encode %{
19002     int vlen_enc = vector_length_encoding(this);
19003     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19004       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19005     } else {
19006       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19007     }
19008   %}
19009   ins_pipe( fpu_reg_reg );
19010 %}
19011 
19012 instruct ReplL_M1(vec dst, immL_M1 con) %{
19013   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19014   match(Set dst (Replicate con));
19015   format %{ "vallones $dst" %}
19016   ins_encode %{
19017     int vector_len = vector_length_encoding(this);
19018     __ vallones($dst$$XMMRegister, vector_len);
19019   %}
19020   ins_pipe( pipe_slow );
19021 %}
19022 
19023 // ====================ReplicateF=======================================
19024 
19025 instruct vReplF_reg(vec dst, vlRegF src) %{
19026   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19027   match(Set dst (Replicate src));
19028   format %{ "replicateF $dst,$src" %}
19029   ins_encode %{
19030     uint vlen = Matcher::vector_length(this);
19031     int vlen_enc = vector_length_encoding(this);
19032     if (vlen <= 4) {
19033       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19034     } else if (VM_Version::supports_avx2()) {
19035       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19036     } else {
19037       assert(vlen == 8, "sanity");
19038       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19039       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19040     }
19041   %}
19042   ins_pipe( pipe_slow );
19043 %}
19044 
19045 instruct ReplF_reg(vec dst, vlRegF src) %{
19046   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19047   match(Set dst (Replicate src));
19048   format %{ "replicateF $dst,$src" %}
19049   ins_encode %{
19050     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19051   %}
19052   ins_pipe( pipe_slow );
19053 %}
19054 
19055 instruct ReplF_mem(vec dst, memory mem) %{
19056   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19057   match(Set dst (Replicate (LoadF mem)));
19058   format %{ "replicateF $dst,$mem" %}
19059   ins_encode %{
19060     int vlen_enc = vector_length_encoding(this);
19061     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19062   %}
19063   ins_pipe( pipe_slow );
19064 %}
19065 
19066 // Replicate float scalar immediate to be vector by loading from const table.
19067 instruct ReplF_imm(vec dst, immF con) %{
19068   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19069   match(Set dst (Replicate con));
19070   format %{ "replicateF $dst,$con" %}
19071   ins_encode %{
19072     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19073                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19074     int vlen = Matcher::vector_length_in_bytes(this);
19075     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19076   %}
19077   ins_pipe( pipe_slow );
19078 %}
19079 
19080 instruct ReplF_zero(vec dst, immF0 zero) %{
19081   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19082   match(Set dst (Replicate zero));
19083   format %{ "replicateF $dst,$zero" %}
19084   ins_encode %{
19085     int vlen_enc = vector_length_encoding(this);
19086     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19087       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19088     } else {
19089       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19090     }
19091   %}
19092   ins_pipe( fpu_reg_reg );
19093 %}
19094 
19095 // ====================ReplicateD=======================================
19096 
19097 // Replicate double (8 bytes) scalar to be vector
19098 instruct vReplD_reg(vec dst, vlRegD src) %{
19099   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19100   match(Set dst (Replicate src));
19101   format %{ "replicateD $dst,$src" %}
19102   ins_encode %{
19103     uint vlen = Matcher::vector_length(this);
19104     int vlen_enc = vector_length_encoding(this);
19105     if (vlen <= 2) {
19106       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19107     } else if (VM_Version::supports_avx2()) {
19108       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19109     } else {
19110       assert(vlen == 4, "sanity");
19111       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19112       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19113     }
19114   %}
19115   ins_pipe( pipe_slow );
19116 %}
19117 
19118 instruct ReplD_reg(vec dst, vlRegD src) %{
19119   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19120   match(Set dst (Replicate src));
19121   format %{ "replicateD $dst,$src" %}
19122   ins_encode %{
19123     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19124   %}
19125   ins_pipe( pipe_slow );
19126 %}
19127 
19128 instruct ReplD_mem(vec dst, memory mem) %{
19129   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19130   match(Set dst (Replicate (LoadD mem)));
19131   format %{ "replicateD $dst,$mem" %}
19132   ins_encode %{
19133     if (Matcher::vector_length(this) >= 4) {
19134       int vlen_enc = vector_length_encoding(this);
19135       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19136     } else {
19137       __ movddup($dst$$XMMRegister, $mem$$Address);
19138     }
19139   %}
19140   ins_pipe( pipe_slow );
19141 %}
19142 
19143 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19144 instruct ReplD_imm(vec dst, immD con) %{
19145   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19146   match(Set dst (Replicate con));
19147   format %{ "replicateD $dst,$con" %}
19148   ins_encode %{
19149     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19150     int vlen = Matcher::vector_length_in_bytes(this);
19151     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19152   %}
19153   ins_pipe( pipe_slow );
19154 %}
19155 
19156 instruct ReplD_zero(vec dst, immD0 zero) %{
19157   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19158   match(Set dst (Replicate zero));
19159   format %{ "replicateD $dst,$zero" %}
19160   ins_encode %{
19161     int vlen_enc = vector_length_encoding(this);
19162     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19163       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19164     } else {
19165       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19166     }
19167   %}
19168   ins_pipe( fpu_reg_reg );
19169 %}
19170 
19171 // ====================VECTOR INSERT=======================================
19172 
19173 instruct insert(vec dst, rRegI val, immU8 idx) %{
19174   predicate(Matcher::vector_length_in_bytes(n) < 32);
19175   match(Set dst (VectorInsert (Binary dst val) idx));
19176   format %{ "vector_insert $dst,$val,$idx" %}
19177   ins_encode %{
19178     assert(UseSSE >= 4, "required");
19179     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19180 
19181     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19182 
19183     assert(is_integral_type(elem_bt), "");
19184     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19185 
19186     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19187   %}
19188   ins_pipe( pipe_slow );
19189 %}
19190 
19191 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19192   predicate(Matcher::vector_length_in_bytes(n) == 32);
19193   match(Set dst (VectorInsert (Binary src val) idx));
19194   effect(TEMP vtmp);
19195   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19196   ins_encode %{
19197     int vlen_enc = Assembler::AVX_256bit;
19198     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19199     int elem_per_lane = 16/type2aelembytes(elem_bt);
19200     int log2epr = log2(elem_per_lane);
19201 
19202     assert(is_integral_type(elem_bt), "sanity");
19203     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19204 
19205     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19206     uint y_idx = ($idx$$constant >> log2epr) & 1;
19207     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19208     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19209     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19210   %}
19211   ins_pipe( pipe_slow );
19212 %}
19213 
19214 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19215   predicate(Matcher::vector_length_in_bytes(n) == 64);
19216   match(Set dst (VectorInsert (Binary src val) idx));
19217   effect(TEMP vtmp);
19218   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19219   ins_encode %{
19220     assert(UseAVX > 2, "sanity");
19221 
19222     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19223     int elem_per_lane = 16/type2aelembytes(elem_bt);
19224     int log2epr = log2(elem_per_lane);
19225 
19226     assert(is_integral_type(elem_bt), "");
19227     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19228 
19229     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19230     uint y_idx = ($idx$$constant >> log2epr) & 3;
19231     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19232     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19233     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19234   %}
19235   ins_pipe( pipe_slow );
19236 %}
19237 
19238 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19239   predicate(Matcher::vector_length(n) == 2);
19240   match(Set dst (VectorInsert (Binary dst val) idx));
19241   format %{ "vector_insert $dst,$val,$idx" %}
19242   ins_encode %{
19243     assert(UseSSE >= 4, "required");
19244     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19245     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19246 
19247     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19248   %}
19249   ins_pipe( pipe_slow );
19250 %}
19251 
19252 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19253   predicate(Matcher::vector_length(n) == 4);
19254   match(Set dst (VectorInsert (Binary src val) idx));
19255   effect(TEMP vtmp);
19256   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19257   ins_encode %{
19258     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19259     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19260 
19261     uint x_idx = $idx$$constant & right_n_bits(1);
19262     uint y_idx = ($idx$$constant >> 1) & 1;
19263     int vlen_enc = Assembler::AVX_256bit;
19264     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19265     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19266     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19267   %}
19268   ins_pipe( pipe_slow );
19269 %}
19270 
19271 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19272   predicate(Matcher::vector_length(n) == 8);
19273   match(Set dst (VectorInsert (Binary src val) idx));
19274   effect(TEMP vtmp);
19275   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19276   ins_encode %{
19277     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19278     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19279 
19280     uint x_idx = $idx$$constant & right_n_bits(1);
19281     uint y_idx = ($idx$$constant >> 1) & 3;
19282     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19283     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19284     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19285   %}
19286   ins_pipe( pipe_slow );
19287 %}
19288 
19289 instruct insertF(vec dst, regF val, immU8 idx) %{
19290   predicate(Matcher::vector_length(n) < 8);
19291   match(Set dst (VectorInsert (Binary dst val) idx));
19292   format %{ "vector_insert $dst,$val,$idx" %}
19293   ins_encode %{
19294     assert(UseSSE >= 4, "sanity");
19295 
19296     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19297     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19298 
19299     uint x_idx = $idx$$constant & right_n_bits(2);
19300     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19301   %}
19302   ins_pipe( pipe_slow );
19303 %}
19304 
19305 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19306   predicate(Matcher::vector_length(n) >= 8);
19307   match(Set dst (VectorInsert (Binary src val) idx));
19308   effect(TEMP vtmp);
19309   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19310   ins_encode %{
19311     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19312     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19313 
19314     int vlen = Matcher::vector_length(this);
19315     uint x_idx = $idx$$constant & right_n_bits(2);
19316     if (vlen == 8) {
19317       uint y_idx = ($idx$$constant >> 2) & 1;
19318       int vlen_enc = Assembler::AVX_256bit;
19319       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19320       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19321       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19322     } else {
19323       assert(vlen == 16, "sanity");
19324       uint y_idx = ($idx$$constant >> 2) & 3;
19325       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19326       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19327       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19328     }
19329   %}
19330   ins_pipe( pipe_slow );
19331 %}
19332 
19333 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19334   predicate(Matcher::vector_length(n) == 2);
19335   match(Set dst (VectorInsert (Binary dst val) idx));
19336   effect(TEMP tmp);
19337   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19338   ins_encode %{
19339     assert(UseSSE >= 4, "sanity");
19340     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19341     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19342 
19343     __ movq($tmp$$Register, $val$$XMMRegister);
19344     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19345   %}
19346   ins_pipe( pipe_slow );
19347 %}
19348 
19349 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19350   predicate(Matcher::vector_length(n) == 4);
19351   match(Set dst (VectorInsert (Binary src val) idx));
19352   effect(TEMP vtmp, TEMP tmp);
19353   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19354   ins_encode %{
19355     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19356     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19357 
19358     uint x_idx = $idx$$constant & right_n_bits(1);
19359     uint y_idx = ($idx$$constant >> 1) & 1;
19360     int vlen_enc = Assembler::AVX_256bit;
19361     __ movq($tmp$$Register, $val$$XMMRegister);
19362     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19363     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19364     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19365   %}
19366   ins_pipe( pipe_slow );
19367 %}
19368 
19369 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19370   predicate(Matcher::vector_length(n) == 8);
19371   match(Set dst (VectorInsert (Binary src val) idx));
19372   effect(TEMP tmp, TEMP vtmp);
19373   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19374   ins_encode %{
19375     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19376     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19377 
19378     uint x_idx = $idx$$constant & right_n_bits(1);
19379     uint y_idx = ($idx$$constant >> 1) & 3;
19380     __ movq($tmp$$Register, $val$$XMMRegister);
19381     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19382     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19383     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19384   %}
19385   ins_pipe( pipe_slow );
19386 %}
19387 
19388 // ====================REDUCTION ARITHMETIC=======================================
19389 
19390 // =======================Int Reduction==========================================
19391 
19392 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19393   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19394   match(Set dst (AddReductionVI src1 src2));
19395   match(Set dst (MulReductionVI src1 src2));
19396   match(Set dst (AndReductionV  src1 src2));
19397   match(Set dst ( OrReductionV  src1 src2));
19398   match(Set dst (XorReductionV  src1 src2));
19399   match(Set dst (MinReductionV  src1 src2));
19400   match(Set dst (MaxReductionV  src1 src2));
19401   match(Set dst (UMinReductionV  src1 src2));
19402   match(Set dst (UMaxReductionV  src1 src2));
19403   effect(TEMP vtmp1, TEMP vtmp2);
19404   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19405   ins_encode %{
19406     int opcode = this->ideal_Opcode();
19407     int vlen = Matcher::vector_length(this, $src2);
19408     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19409   %}
19410   ins_pipe( pipe_slow );
19411 %}
19412 
19413 // =======================Long Reduction==========================================
19414 
19415 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19416   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19417   match(Set dst (AddReductionVL src1 src2));
19418   match(Set dst (MulReductionVL src1 src2));
19419   match(Set dst (AndReductionV  src1 src2));
19420   match(Set dst ( OrReductionV  src1 src2));
19421   match(Set dst (XorReductionV  src1 src2));
19422   match(Set dst (MinReductionV  src1 src2));
19423   match(Set dst (MaxReductionV  src1 src2));
19424   match(Set dst (UMinReductionV  src1 src2));
19425   match(Set dst (UMaxReductionV  src1 src2));
19426   effect(TEMP vtmp1, TEMP vtmp2);
19427   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19428   ins_encode %{
19429     int opcode = this->ideal_Opcode();
19430     int vlen = Matcher::vector_length(this, $src2);
19431     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19432   %}
19433   ins_pipe( pipe_slow );
19434 %}
19435 
19436 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19437   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19438   match(Set dst (AddReductionVL src1 src2));
19439   match(Set dst (MulReductionVL src1 src2));
19440   match(Set dst (AndReductionV  src1 src2));
19441   match(Set dst ( OrReductionV  src1 src2));
19442   match(Set dst (XorReductionV  src1 src2));
19443   match(Set dst (MinReductionV  src1 src2));
19444   match(Set dst (MaxReductionV  src1 src2));
19445   match(Set dst (UMinReductionV  src1 src2));
19446   match(Set dst (UMaxReductionV  src1 src2));
19447   effect(TEMP vtmp1, TEMP vtmp2);
19448   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449   ins_encode %{
19450     int opcode = this->ideal_Opcode();
19451     int vlen = Matcher::vector_length(this, $src2);
19452     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453   %}
19454   ins_pipe( pipe_slow );
19455 %}
19456 
19457 // =======================Float Reduction==========================================
19458 
19459 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19460   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19461   match(Set dst (AddReductionVF dst src));
19462   match(Set dst (MulReductionVF dst src));
19463   effect(TEMP dst, TEMP vtmp);
19464   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19465   ins_encode %{
19466     int opcode = this->ideal_Opcode();
19467     int vlen = Matcher::vector_length(this, $src);
19468     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19469   %}
19470   ins_pipe( pipe_slow );
19471 %}
19472 
19473 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19474   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19475   match(Set dst (AddReductionVF dst src));
19476   match(Set dst (MulReductionVF dst src));
19477   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19478   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19479   ins_encode %{
19480     int opcode = this->ideal_Opcode();
19481     int vlen = Matcher::vector_length(this, $src);
19482     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19483   %}
19484   ins_pipe( pipe_slow );
19485 %}
19486 
19487 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19488   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19489   match(Set dst (AddReductionVF dst src));
19490   match(Set dst (MulReductionVF dst src));
19491   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19492   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19493   ins_encode %{
19494     int opcode = this->ideal_Opcode();
19495     int vlen = Matcher::vector_length(this, $src);
19496     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19497   %}
19498   ins_pipe( pipe_slow );
19499 %}
19500 
19501 
19502 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19503   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19504   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19505   // src1 contains reduction identity
19506   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19507   match(Set dst (AddReductionVF src1 src2));
19508   match(Set dst (MulReductionVF src1 src2));
19509   effect(TEMP dst);
19510   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19511   ins_encode %{
19512     int opcode = this->ideal_Opcode();
19513     int vlen = Matcher::vector_length(this, $src2);
19514     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19515   %}
19516   ins_pipe( pipe_slow );
19517 %}
19518 
19519 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19520   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19521   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19522   // src1 contains reduction identity
19523   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19524   match(Set dst (AddReductionVF src1 src2));
19525   match(Set dst (MulReductionVF src1 src2));
19526   effect(TEMP dst, TEMP vtmp);
19527   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19528   ins_encode %{
19529     int opcode = this->ideal_Opcode();
19530     int vlen = Matcher::vector_length(this, $src2);
19531     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19532   %}
19533   ins_pipe( pipe_slow );
19534 %}
19535 
19536 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19537   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19538   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19539   // src1 contains reduction identity
19540   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19541   match(Set dst (AddReductionVF src1 src2));
19542   match(Set dst (MulReductionVF src1 src2));
19543   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19544   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19545   ins_encode %{
19546     int opcode = this->ideal_Opcode();
19547     int vlen = Matcher::vector_length(this, $src2);
19548     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19549   %}
19550   ins_pipe( pipe_slow );
19551 %}
19552 
19553 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19554   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19555   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19556   // src1 contains reduction identity
19557   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19558   match(Set dst (AddReductionVF src1 src2));
19559   match(Set dst (MulReductionVF src1 src2));
19560   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19561   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19562   ins_encode %{
19563     int opcode = this->ideal_Opcode();
19564     int vlen = Matcher::vector_length(this, $src2);
19565     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19566   %}
19567   ins_pipe( pipe_slow );
19568 %}
19569 
19570 // =======================Double Reduction==========================================
19571 
19572 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19573   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19574   match(Set dst (AddReductionVD dst src));
19575   match(Set dst (MulReductionVD dst src));
19576   effect(TEMP dst, TEMP vtmp);
19577   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19578   ins_encode %{
19579     int opcode = this->ideal_Opcode();
19580     int vlen = Matcher::vector_length(this, $src);
19581     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19582 %}
19583   ins_pipe( pipe_slow );
19584 %}
19585 
19586 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19587   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19588   match(Set dst (AddReductionVD dst src));
19589   match(Set dst (MulReductionVD dst src));
19590   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19591   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19592   ins_encode %{
19593     int opcode = this->ideal_Opcode();
19594     int vlen = Matcher::vector_length(this, $src);
19595     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19596   %}
19597   ins_pipe( pipe_slow );
19598 %}
19599 
19600 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19601   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19602   match(Set dst (AddReductionVD dst src));
19603   match(Set dst (MulReductionVD dst src));
19604   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19605   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19606   ins_encode %{
19607     int opcode = this->ideal_Opcode();
19608     int vlen = Matcher::vector_length(this, $src);
19609     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19610   %}
19611   ins_pipe( pipe_slow );
19612 %}
19613 
19614 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19615   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19616   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19617   // src1 contains reduction identity
19618   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19619   match(Set dst (AddReductionVD src1 src2));
19620   match(Set dst (MulReductionVD src1 src2));
19621   effect(TEMP dst);
19622   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19623   ins_encode %{
19624     int opcode = this->ideal_Opcode();
19625     int vlen = Matcher::vector_length(this, $src2);
19626     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19627 %}
19628   ins_pipe( pipe_slow );
19629 %}
19630 
19631 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19632   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19633   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19634   // src1 contains reduction identity
19635   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19636   match(Set dst (AddReductionVD src1 src2));
19637   match(Set dst (MulReductionVD src1 src2));
19638   effect(TEMP dst, TEMP vtmp);
19639   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19640   ins_encode %{
19641     int opcode = this->ideal_Opcode();
19642     int vlen = Matcher::vector_length(this, $src2);
19643     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19644   %}
19645   ins_pipe( pipe_slow );
19646 %}
19647 
19648 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19649   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19650   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19651   // src1 contains reduction identity
19652   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19653   match(Set dst (AddReductionVD src1 src2));
19654   match(Set dst (MulReductionVD src1 src2));
19655   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19656   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19657   ins_encode %{
19658     int opcode = this->ideal_Opcode();
19659     int vlen = Matcher::vector_length(this, $src2);
19660     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19661   %}
19662   ins_pipe( pipe_slow );
19663 %}
19664 
19665 // =======================Byte Reduction==========================================
19666 
19667 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19668   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19669   match(Set dst (AddReductionVI src1 src2));
19670   match(Set dst (AndReductionV  src1 src2));
19671   match(Set dst ( OrReductionV  src1 src2));
19672   match(Set dst (XorReductionV  src1 src2));
19673   match(Set dst (MinReductionV  src1 src2));
19674   match(Set dst (MaxReductionV  src1 src2));
19675   match(Set dst (UMinReductionV  src1 src2));
19676   match(Set dst (UMaxReductionV  src1 src2));
19677   effect(TEMP vtmp1, TEMP vtmp2);
19678   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19679   ins_encode %{
19680     int opcode = this->ideal_Opcode();
19681     int vlen = Matcher::vector_length(this, $src2);
19682     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19683   %}
19684   ins_pipe( pipe_slow );
19685 %}
19686 
19687 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19688   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19689   match(Set dst (AddReductionVI src1 src2));
19690   match(Set dst (AndReductionV  src1 src2));
19691   match(Set dst ( OrReductionV  src1 src2));
19692   match(Set dst (XorReductionV  src1 src2));
19693   match(Set dst (MinReductionV  src1 src2));
19694   match(Set dst (MaxReductionV  src1 src2));
19695   match(Set dst (UMinReductionV  src1 src2));
19696   match(Set dst (UMaxReductionV  src1 src2));
19697   effect(TEMP vtmp1, TEMP vtmp2);
19698   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19699   ins_encode %{
19700     int opcode = this->ideal_Opcode();
19701     int vlen = Matcher::vector_length(this, $src2);
19702     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19703   %}
19704   ins_pipe( pipe_slow );
19705 %}
19706 
19707 // =======================Short Reduction==========================================
19708 
19709 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19710   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19711   match(Set dst (AddReductionVI src1 src2));
19712   match(Set dst (MulReductionVI src1 src2));
19713   match(Set dst (AndReductionV  src1 src2));
19714   match(Set dst ( OrReductionV  src1 src2));
19715   match(Set dst (XorReductionV  src1 src2));
19716   match(Set dst (MinReductionV  src1 src2));
19717   match(Set dst (MaxReductionV  src1 src2));
19718   match(Set dst (UMinReductionV  src1 src2));
19719   match(Set dst (UMaxReductionV  src1 src2));
19720   effect(TEMP vtmp1, TEMP vtmp2);
19721   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19722   ins_encode %{
19723     int opcode = this->ideal_Opcode();
19724     int vlen = Matcher::vector_length(this, $src2);
19725     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19726   %}
19727   ins_pipe( pipe_slow );
19728 %}
19729 
19730 // =======================Mul Reduction==========================================
19731 
19732 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19733   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19734             Matcher::vector_length(n->in(2)) <= 32); // src2
19735   match(Set dst (MulReductionVI src1 src2));
19736   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19737   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19738   ins_encode %{
19739     int opcode = this->ideal_Opcode();
19740     int vlen = Matcher::vector_length(this, $src2);
19741     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19742   %}
19743   ins_pipe( pipe_slow );
19744 %}
19745 
19746 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19747   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19748             Matcher::vector_length(n->in(2)) == 64); // src2
19749   match(Set dst (MulReductionVI src1 src2));
19750   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19751   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19752   ins_encode %{
19753     int opcode = this->ideal_Opcode();
19754     int vlen = Matcher::vector_length(this, $src2);
19755     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19756   %}
19757   ins_pipe( pipe_slow );
19758 %}
19759 
19760 //--------------------Min/Max Float Reduction --------------------
19761 // Float Min Reduction
19762 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19763                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19764   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19765             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19766              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19767             Matcher::vector_length(n->in(2)) == 2);
19768   match(Set dst (MinReductionV src1 src2));
19769   match(Set dst (MaxReductionV src1 src2));
19770   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19771   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19772   ins_encode %{
19773     assert(UseAVX > 0, "sanity");
19774 
19775     int opcode = this->ideal_Opcode();
19776     int vlen = Matcher::vector_length(this, $src2);
19777     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19778                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19779   %}
19780   ins_pipe( pipe_slow );
19781 %}
19782 
19783 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19784                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19785   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19786             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19787              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19788             Matcher::vector_length(n->in(2)) >= 4);
19789   match(Set dst (MinReductionV src1 src2));
19790   match(Set dst (MaxReductionV src1 src2));
19791   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19792   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19793   ins_encode %{
19794     assert(UseAVX > 0, "sanity");
19795 
19796     int opcode = this->ideal_Opcode();
19797     int vlen = Matcher::vector_length(this, $src2);
19798     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19799                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19800   %}
19801   ins_pipe( pipe_slow );
19802 %}
19803 
19804 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19805                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19806   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19807             Matcher::vector_length(n->in(2)) == 2);
19808   match(Set dst (MinReductionV dst src));
19809   match(Set dst (MaxReductionV dst src));
19810   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19811   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19812   ins_encode %{
19813     assert(UseAVX > 0, "sanity");
19814 
19815     int opcode = this->ideal_Opcode();
19816     int vlen = Matcher::vector_length(this, $src);
19817     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19818                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19819   %}
19820   ins_pipe( pipe_slow );
19821 %}
19822 
19823 
19824 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19825                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19826   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19827             Matcher::vector_length(n->in(2)) >= 4);
19828   match(Set dst (MinReductionV dst src));
19829   match(Set dst (MaxReductionV dst src));
19830   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19831   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19832   ins_encode %{
19833     assert(UseAVX > 0, "sanity");
19834 
19835     int opcode = this->ideal_Opcode();
19836     int vlen = Matcher::vector_length(this, $src);
19837     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19838                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19839   %}
19840   ins_pipe( pipe_slow );
19841 %}
19842 
19843 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19844   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19845             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19846              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19847             Matcher::vector_length(n->in(2)) == 2);
19848   match(Set dst (MinReductionV src1 src2));
19849   match(Set dst (MaxReductionV src1 src2));
19850   effect(TEMP dst, TEMP xtmp1);
19851   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19852   ins_encode %{
19853     int opcode = this->ideal_Opcode();
19854     int vlen = Matcher::vector_length(this, $src2);
19855     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19856                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19857   %}
19858   ins_pipe( pipe_slow );
19859 %}
19860 
19861 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19862   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19863             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19864              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19865             Matcher::vector_length(n->in(2)) >= 4);
19866   match(Set dst (MinReductionV src1 src2));
19867   match(Set dst (MaxReductionV src1 src2));
19868   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19869   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19870   ins_encode %{
19871     int opcode = this->ideal_Opcode();
19872     int vlen = Matcher::vector_length(this, $src2);
19873     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19874                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19875   %}
19876   ins_pipe( pipe_slow );
19877 %}
19878 
19879 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19880   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19881             Matcher::vector_length(n->in(2)) == 2);
19882   match(Set dst (MinReductionV dst src));
19883   match(Set dst (MaxReductionV dst src));
19884   effect(TEMP dst, TEMP xtmp1);
19885   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19886   ins_encode %{
19887     int opcode = this->ideal_Opcode();
19888     int vlen = Matcher::vector_length(this, $src);
19889     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19890                          $xtmp1$$XMMRegister);
19891   %}
19892   ins_pipe( pipe_slow );
19893 %}
19894 
19895 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19896   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19897             Matcher::vector_length(n->in(2)) >= 4);
19898   match(Set dst (MinReductionV dst src));
19899   match(Set dst (MaxReductionV dst src));
19900   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19901   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19902   ins_encode %{
19903     int opcode = this->ideal_Opcode();
19904     int vlen = Matcher::vector_length(this, $src);
19905     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19906                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19907   %}
19908   ins_pipe( pipe_slow );
19909 %}
19910 
19911 //--------------------Min Double Reduction --------------------
19912 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19913                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19914   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19915             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19916              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19917             Matcher::vector_length(n->in(2)) == 2);
19918   match(Set dst (MinReductionV src1 src2));
19919   match(Set dst (MaxReductionV src1 src2));
19920   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19921   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19922   ins_encode %{
19923     assert(UseAVX > 0, "sanity");
19924 
19925     int opcode = this->ideal_Opcode();
19926     int vlen = Matcher::vector_length(this, $src2);
19927     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19928                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19929   %}
19930   ins_pipe( pipe_slow );
19931 %}
19932 
19933 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19934                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19935   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19936             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19937              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19938             Matcher::vector_length(n->in(2)) >= 4);
19939   match(Set dst (MinReductionV src1 src2));
19940   match(Set dst (MaxReductionV src1 src2));
19941   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19942   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19943   ins_encode %{
19944     assert(UseAVX > 0, "sanity");
19945 
19946     int opcode = this->ideal_Opcode();
19947     int vlen = Matcher::vector_length(this, $src2);
19948     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19949                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19950   %}
19951   ins_pipe( pipe_slow );
19952 %}
19953 
19954 
19955 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19956                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19957   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19958             Matcher::vector_length(n->in(2)) == 2);
19959   match(Set dst (MinReductionV dst src));
19960   match(Set dst (MaxReductionV dst src));
19961   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19962   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19963   ins_encode %{
19964     assert(UseAVX > 0, "sanity");
19965 
19966     int opcode = this->ideal_Opcode();
19967     int vlen = Matcher::vector_length(this, $src);
19968     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19969                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19970   %}
19971   ins_pipe( pipe_slow );
19972 %}
19973 
19974 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19975                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19976   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19977             Matcher::vector_length(n->in(2)) >= 4);
19978   match(Set dst (MinReductionV dst src));
19979   match(Set dst (MaxReductionV dst src));
19980   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19981   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19982   ins_encode %{
19983     assert(UseAVX > 0, "sanity");
19984 
19985     int opcode = this->ideal_Opcode();
19986     int vlen = Matcher::vector_length(this, $src);
19987     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19988                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19989   %}
19990   ins_pipe( pipe_slow );
19991 %}
19992 
19993 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19994   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19995             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19996              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19997             Matcher::vector_length(n->in(2)) == 2);
19998   match(Set dst (MinReductionV src1 src2));
19999   match(Set dst (MaxReductionV src1 src2));
20000   effect(TEMP dst, TEMP xtmp1);
20001   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20002   ins_encode %{
20003     int opcode = this->ideal_Opcode();
20004     int vlen = Matcher::vector_length(this, $src2);
20005     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20006                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20007   %}
20008   ins_pipe( pipe_slow );
20009 %}
20010 
20011 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20012   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20013             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20014              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20015             Matcher::vector_length(n->in(2)) >= 4);
20016   match(Set dst (MinReductionV src1 src2));
20017   match(Set dst (MaxReductionV src1 src2));
20018   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20019   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20020   ins_encode %{
20021     int opcode = this->ideal_Opcode();
20022     int vlen = Matcher::vector_length(this, $src2);
20023     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20024                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20025   %}
20026   ins_pipe( pipe_slow );
20027 %}
20028 
20029 
20030 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20031   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20032             Matcher::vector_length(n->in(2)) == 2);
20033   match(Set dst (MinReductionV dst src));
20034   match(Set dst (MaxReductionV dst src));
20035   effect(TEMP dst, TEMP xtmp1);
20036   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20037   ins_encode %{
20038     int opcode = this->ideal_Opcode();
20039     int vlen = Matcher::vector_length(this, $src);
20040     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20041                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20042   %}
20043   ins_pipe( pipe_slow );
20044 %}
20045 
20046 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20047   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20048             Matcher::vector_length(n->in(2)) >= 4);
20049   match(Set dst (MinReductionV dst src));
20050   match(Set dst (MaxReductionV dst src));
20051   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20052   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20053   ins_encode %{
20054     int opcode = this->ideal_Opcode();
20055     int vlen = Matcher::vector_length(this, $src);
20056     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20057                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20058   %}
20059   ins_pipe( pipe_slow );
20060 %}
20061 
20062 // ====================VECTOR ARITHMETIC=======================================
20063 
20064 // --------------------------------- ADD --------------------------------------
20065 
20066 // Bytes vector add
20067 instruct vaddB(vec dst, vec src) %{
20068   predicate(UseAVX == 0);
20069   match(Set dst (AddVB dst src));
20070   format %{ "paddb   $dst,$src\t! add packedB" %}
20071   ins_encode %{
20072     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20073   %}
20074   ins_pipe( pipe_slow );
20075 %}
20076 
20077 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20078   predicate(UseAVX > 0);
20079   match(Set dst (AddVB src1 src2));
20080   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20081   ins_encode %{
20082     int vlen_enc = vector_length_encoding(this);
20083     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20084   %}
20085   ins_pipe( pipe_slow );
20086 %}
20087 
20088 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20089   predicate((UseAVX > 0) &&
20090             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20091   match(Set dst (AddVB src (LoadVector mem)));
20092   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20093   ins_encode %{
20094     int vlen_enc = vector_length_encoding(this);
20095     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20096   %}
20097   ins_pipe( pipe_slow );
20098 %}
20099 
20100 // Shorts/Chars vector add
20101 instruct vaddS(vec dst, vec src) %{
20102   predicate(UseAVX == 0);
20103   match(Set dst (AddVS dst src));
20104   format %{ "paddw   $dst,$src\t! add packedS" %}
20105   ins_encode %{
20106     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20107   %}
20108   ins_pipe( pipe_slow );
20109 %}
20110 
20111 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20112   predicate(UseAVX > 0);
20113   match(Set dst (AddVS src1 src2));
20114   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20115   ins_encode %{
20116     int vlen_enc = vector_length_encoding(this);
20117     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20118   %}
20119   ins_pipe( pipe_slow );
20120 %}
20121 
20122 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20123   predicate((UseAVX > 0) &&
20124             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20125   match(Set dst (AddVS src (LoadVector mem)));
20126   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20127   ins_encode %{
20128     int vlen_enc = vector_length_encoding(this);
20129     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20130   %}
20131   ins_pipe( pipe_slow );
20132 %}
20133 
20134 // Integers vector add
20135 instruct vaddI(vec dst, vec src) %{
20136   predicate(UseAVX == 0);
20137   match(Set dst (AddVI dst src));
20138   format %{ "paddd   $dst,$src\t! add packedI" %}
20139   ins_encode %{
20140     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20141   %}
20142   ins_pipe( pipe_slow );
20143 %}
20144 
20145 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20146   predicate(UseAVX > 0);
20147   match(Set dst (AddVI src1 src2));
20148   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20149   ins_encode %{
20150     int vlen_enc = vector_length_encoding(this);
20151     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20152   %}
20153   ins_pipe( pipe_slow );
20154 %}
20155 
20156 
20157 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20158   predicate((UseAVX > 0) &&
20159             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20160   match(Set dst (AddVI src (LoadVector mem)));
20161   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20162   ins_encode %{
20163     int vlen_enc = vector_length_encoding(this);
20164     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20165   %}
20166   ins_pipe( pipe_slow );
20167 %}
20168 
20169 // Longs vector add
20170 instruct vaddL(vec dst, vec src) %{
20171   predicate(UseAVX == 0);
20172   match(Set dst (AddVL dst src));
20173   format %{ "paddq   $dst,$src\t! add packedL" %}
20174   ins_encode %{
20175     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20176   %}
20177   ins_pipe( pipe_slow );
20178 %}
20179 
20180 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20181   predicate(UseAVX > 0);
20182   match(Set dst (AddVL src1 src2));
20183   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20184   ins_encode %{
20185     int vlen_enc = vector_length_encoding(this);
20186     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20187   %}
20188   ins_pipe( pipe_slow );
20189 %}
20190 
20191 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20192   predicate((UseAVX > 0) &&
20193             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20194   match(Set dst (AddVL src (LoadVector mem)));
20195   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20196   ins_encode %{
20197     int vlen_enc = vector_length_encoding(this);
20198     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20199   %}
20200   ins_pipe( pipe_slow );
20201 %}
20202 
20203 // Floats vector add
20204 instruct vaddF(vec dst, vec src) %{
20205   predicate(UseAVX == 0);
20206   match(Set dst (AddVF dst src));
20207   format %{ "addps   $dst,$src\t! add packedF" %}
20208   ins_encode %{
20209     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20210   %}
20211   ins_pipe( pipe_slow );
20212 %}
20213 
20214 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20215   predicate(UseAVX > 0);
20216   match(Set dst (AddVF src1 src2));
20217   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20218   ins_encode %{
20219     int vlen_enc = vector_length_encoding(this);
20220     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221   %}
20222   ins_pipe( pipe_slow );
20223 %}
20224 
20225 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20226   predicate((UseAVX > 0) &&
20227             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20228   match(Set dst (AddVF src (LoadVector mem)));
20229   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20230   ins_encode %{
20231     int vlen_enc = vector_length_encoding(this);
20232     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20233   %}
20234   ins_pipe( pipe_slow );
20235 %}
20236 
20237 // Doubles vector add
20238 instruct vaddD(vec dst, vec src) %{
20239   predicate(UseAVX == 0);
20240   match(Set dst (AddVD dst src));
20241   format %{ "addpd   $dst,$src\t! add packedD" %}
20242   ins_encode %{
20243     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20244   %}
20245   ins_pipe( pipe_slow );
20246 %}
20247 
20248 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20249   predicate(UseAVX > 0);
20250   match(Set dst (AddVD src1 src2));
20251   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20252   ins_encode %{
20253     int vlen_enc = vector_length_encoding(this);
20254     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255   %}
20256   ins_pipe( pipe_slow );
20257 %}
20258 
20259 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20260   predicate((UseAVX > 0) &&
20261             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20262   match(Set dst (AddVD src (LoadVector mem)));
20263   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20264   ins_encode %{
20265     int vlen_enc = vector_length_encoding(this);
20266     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20267   %}
20268   ins_pipe( pipe_slow );
20269 %}
20270 
20271 // --------------------------------- SUB --------------------------------------
20272 
20273 // Bytes vector sub
20274 instruct vsubB(vec dst, vec src) %{
20275   predicate(UseAVX == 0);
20276   match(Set dst (SubVB dst src));
20277   format %{ "psubb   $dst,$src\t! sub packedB" %}
20278   ins_encode %{
20279     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20280   %}
20281   ins_pipe( pipe_slow );
20282 %}
20283 
20284 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20285   predicate(UseAVX > 0);
20286   match(Set dst (SubVB src1 src2));
20287   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20288   ins_encode %{
20289     int vlen_enc = vector_length_encoding(this);
20290     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20291   %}
20292   ins_pipe( pipe_slow );
20293 %}
20294 
20295 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20296   predicate((UseAVX > 0) &&
20297             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20298   match(Set dst (SubVB src (LoadVector mem)));
20299   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20300   ins_encode %{
20301     int vlen_enc = vector_length_encoding(this);
20302     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20303   %}
20304   ins_pipe( pipe_slow );
20305 %}
20306 
20307 // Shorts/Chars vector sub
20308 instruct vsubS(vec dst, vec src) %{
20309   predicate(UseAVX == 0);
20310   match(Set dst (SubVS dst src));
20311   format %{ "psubw   $dst,$src\t! sub packedS" %}
20312   ins_encode %{
20313     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20314   %}
20315   ins_pipe( pipe_slow );
20316 %}
20317 
20318 
20319 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20320   predicate(UseAVX > 0);
20321   match(Set dst (SubVS src1 src2));
20322   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20323   ins_encode %{
20324     int vlen_enc = vector_length_encoding(this);
20325     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20326   %}
20327   ins_pipe( pipe_slow );
20328 %}
20329 
20330 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20331   predicate((UseAVX > 0) &&
20332             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20333   match(Set dst (SubVS src (LoadVector mem)));
20334   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20335   ins_encode %{
20336     int vlen_enc = vector_length_encoding(this);
20337     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20338   %}
20339   ins_pipe( pipe_slow );
20340 %}
20341 
20342 // Integers vector sub
20343 instruct vsubI(vec dst, vec src) %{
20344   predicate(UseAVX == 0);
20345   match(Set dst (SubVI dst src));
20346   format %{ "psubd   $dst,$src\t! sub packedI" %}
20347   ins_encode %{
20348     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20349   %}
20350   ins_pipe( pipe_slow );
20351 %}
20352 
20353 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20354   predicate(UseAVX > 0);
20355   match(Set dst (SubVI src1 src2));
20356   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20357   ins_encode %{
20358     int vlen_enc = vector_length_encoding(this);
20359     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20360   %}
20361   ins_pipe( pipe_slow );
20362 %}
20363 
20364 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20365   predicate((UseAVX > 0) &&
20366             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20367   match(Set dst (SubVI src (LoadVector mem)));
20368   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20369   ins_encode %{
20370     int vlen_enc = vector_length_encoding(this);
20371     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20372   %}
20373   ins_pipe( pipe_slow );
20374 %}
20375 
20376 // Longs vector sub
20377 instruct vsubL(vec dst, vec src) %{
20378   predicate(UseAVX == 0);
20379   match(Set dst (SubVL dst src));
20380   format %{ "psubq   $dst,$src\t! sub packedL" %}
20381   ins_encode %{
20382     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20383   %}
20384   ins_pipe( pipe_slow );
20385 %}
20386 
20387 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20388   predicate(UseAVX > 0);
20389   match(Set dst (SubVL src1 src2));
20390   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20391   ins_encode %{
20392     int vlen_enc = vector_length_encoding(this);
20393     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20394   %}
20395   ins_pipe( pipe_slow );
20396 %}
20397 
20398 
20399 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20400   predicate((UseAVX > 0) &&
20401             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20402   match(Set dst (SubVL src (LoadVector mem)));
20403   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20404   ins_encode %{
20405     int vlen_enc = vector_length_encoding(this);
20406     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20407   %}
20408   ins_pipe( pipe_slow );
20409 %}
20410 
20411 // Floats vector sub
20412 instruct vsubF(vec dst, vec src) %{
20413   predicate(UseAVX == 0);
20414   match(Set dst (SubVF dst src));
20415   format %{ "subps   $dst,$src\t! sub packedF" %}
20416   ins_encode %{
20417     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20418   %}
20419   ins_pipe( pipe_slow );
20420 %}
20421 
20422 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20423   predicate(UseAVX > 0);
20424   match(Set dst (SubVF src1 src2));
20425   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20426   ins_encode %{
20427     int vlen_enc = vector_length_encoding(this);
20428     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429   %}
20430   ins_pipe( pipe_slow );
20431 %}
20432 
20433 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20434   predicate((UseAVX > 0) &&
20435             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20436   match(Set dst (SubVF src (LoadVector mem)));
20437   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20438   ins_encode %{
20439     int vlen_enc = vector_length_encoding(this);
20440     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20441   %}
20442   ins_pipe( pipe_slow );
20443 %}
20444 
20445 // Doubles vector sub
20446 instruct vsubD(vec dst, vec src) %{
20447   predicate(UseAVX == 0);
20448   match(Set dst (SubVD dst src));
20449   format %{ "subpd   $dst,$src\t! sub packedD" %}
20450   ins_encode %{
20451     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20452   %}
20453   ins_pipe( pipe_slow );
20454 %}
20455 
20456 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20457   predicate(UseAVX > 0);
20458   match(Set dst (SubVD src1 src2));
20459   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20460   ins_encode %{
20461     int vlen_enc = vector_length_encoding(this);
20462     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463   %}
20464   ins_pipe( pipe_slow );
20465 %}
20466 
20467 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20468   predicate((UseAVX > 0) &&
20469             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470   match(Set dst (SubVD src (LoadVector mem)));
20471   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20472   ins_encode %{
20473     int vlen_enc = vector_length_encoding(this);
20474     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475   %}
20476   ins_pipe( pipe_slow );
20477 %}
20478 
20479 // --------------------------------- MUL --------------------------------------
20480 
20481 // Byte vector mul
20482 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20483   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20484   match(Set dst (MulVB src1 src2));
20485   effect(TEMP dst, TEMP xtmp);
20486   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20487   ins_encode %{
20488     assert(UseSSE > 3, "required");
20489     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20490     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20491     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20492     __ psllw($dst$$XMMRegister, 8);
20493     __ psrlw($dst$$XMMRegister, 8);
20494     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20495   %}
20496   ins_pipe( pipe_slow );
20497 %}
20498 
20499 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20500   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20501   match(Set dst (MulVB src1 src2));
20502   effect(TEMP dst, TEMP xtmp);
20503   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20504   ins_encode %{
20505     assert(UseSSE > 3, "required");
20506     // Odd-index elements
20507     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20508     __ psrlw($dst$$XMMRegister, 8);
20509     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20510     __ psrlw($xtmp$$XMMRegister, 8);
20511     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20512     __ psllw($dst$$XMMRegister, 8);
20513     // Even-index elements
20514     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20515     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20516     __ psllw($xtmp$$XMMRegister, 8);
20517     __ psrlw($xtmp$$XMMRegister, 8);
20518     // Combine
20519     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20520   %}
20521   ins_pipe( pipe_slow );
20522 %}
20523 
20524 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20525   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20526   match(Set dst (MulVB src1 src2));
20527   effect(TEMP xtmp1, TEMP xtmp2);
20528   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20529   ins_encode %{
20530     int vlen_enc = vector_length_encoding(this);
20531     // Odd-index elements
20532     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20533     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20534     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20535     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20536     // Even-index elements
20537     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20538     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20539     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20540     // Combine
20541     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20542   %}
20543   ins_pipe( pipe_slow );
20544 %}
20545 
20546 // Shorts/Chars vector mul
20547 instruct vmulS(vec dst, vec src) %{
20548   predicate(UseAVX == 0);
20549   match(Set dst (MulVS dst src));
20550   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20551   ins_encode %{
20552     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20553   %}
20554   ins_pipe( pipe_slow );
20555 %}
20556 
20557 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20558   predicate(UseAVX > 0);
20559   match(Set dst (MulVS src1 src2));
20560   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20561   ins_encode %{
20562     int vlen_enc = vector_length_encoding(this);
20563     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20564   %}
20565   ins_pipe( pipe_slow );
20566 %}
20567 
20568 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20569   predicate((UseAVX > 0) &&
20570             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20571   match(Set dst (MulVS src (LoadVector mem)));
20572   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20573   ins_encode %{
20574     int vlen_enc = vector_length_encoding(this);
20575     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20576   %}
20577   ins_pipe( pipe_slow );
20578 %}
20579 
20580 // Integers vector mul
20581 instruct vmulI(vec dst, vec src) %{
20582   predicate(UseAVX == 0);
20583   match(Set dst (MulVI dst src));
20584   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20585   ins_encode %{
20586     assert(UseSSE > 3, "required");
20587     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20588   %}
20589   ins_pipe( pipe_slow );
20590 %}
20591 
20592 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20593   predicate(UseAVX > 0);
20594   match(Set dst (MulVI src1 src2));
20595   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20596   ins_encode %{
20597     int vlen_enc = vector_length_encoding(this);
20598     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20599   %}
20600   ins_pipe( pipe_slow );
20601 %}
20602 
20603 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20604   predicate((UseAVX > 0) &&
20605             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20606   match(Set dst (MulVI src (LoadVector mem)));
20607   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20608   ins_encode %{
20609     int vlen_enc = vector_length_encoding(this);
20610     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20611   %}
20612   ins_pipe( pipe_slow );
20613 %}
20614 
20615 // Longs vector mul
20616 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20617   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20618              VM_Version::supports_avx512dq()) ||
20619             VM_Version::supports_avx512vldq());
20620   match(Set dst (MulVL src1 src2));
20621   ins_cost(500);
20622   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20623   ins_encode %{
20624     assert(UseAVX > 2, "required");
20625     int vlen_enc = vector_length_encoding(this);
20626     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20627   %}
20628   ins_pipe( pipe_slow );
20629 %}
20630 
20631 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20632   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20633              VM_Version::supports_avx512dq()) ||
20634             (Matcher::vector_length_in_bytes(n) > 8 &&
20635              VM_Version::supports_avx512vldq()));
20636   match(Set dst (MulVL src (LoadVector mem)));
20637   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20638   ins_cost(500);
20639   ins_encode %{
20640     assert(UseAVX > 2, "required");
20641     int vlen_enc = vector_length_encoding(this);
20642     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20643   %}
20644   ins_pipe( pipe_slow );
20645 %}
20646 
20647 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20648   predicate(UseAVX == 0);
20649   match(Set dst (MulVL src1 src2));
20650   ins_cost(500);
20651   effect(TEMP dst, TEMP xtmp);
20652   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20653   ins_encode %{
20654     assert(VM_Version::supports_sse4_1(), "required");
20655     // Get the lo-hi products, only the lower 32 bits is in concerns
20656     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20657     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20658     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20659     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20660     __ psllq($dst$$XMMRegister, 32);
20661     // Get the lo-lo products
20662     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20663     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20664     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20665   %}
20666   ins_pipe( pipe_slow );
20667 %}
20668 
20669 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20670   predicate(UseAVX > 0 &&
20671             ((Matcher::vector_length_in_bytes(n) == 64 &&
20672               !VM_Version::supports_avx512dq()) ||
20673              (Matcher::vector_length_in_bytes(n) < 64 &&
20674               !VM_Version::supports_avx512vldq())));
20675   match(Set dst (MulVL src1 src2));
20676   effect(TEMP xtmp1, TEMP xtmp2);
20677   ins_cost(500);
20678   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20679   ins_encode %{
20680     int vlen_enc = vector_length_encoding(this);
20681     // Get the lo-hi products, only the lower 32 bits is in concerns
20682     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20683     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20684     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20685     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20686     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20687     // Get the lo-lo products
20688     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20689     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20690   %}
20691   ins_pipe( pipe_slow );
20692 %}
20693 
20694 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20695   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20696   match(Set dst (MulVL src1 src2));
20697   ins_cost(100);
20698   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20699   ins_encode %{
20700     int vlen_enc = vector_length_encoding(this);
20701     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20702   %}
20703   ins_pipe( pipe_slow );
20704 %}
20705 
20706 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20707   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20708   match(Set dst (MulVL src1 src2));
20709   ins_cost(100);
20710   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20711   ins_encode %{
20712     int vlen_enc = vector_length_encoding(this);
20713     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20714   %}
20715   ins_pipe( pipe_slow );
20716 %}
20717 
20718 // Floats vector mul
20719 instruct vmulF(vec dst, vec src) %{
20720   predicate(UseAVX == 0);
20721   match(Set dst (MulVF dst src));
20722   format %{ "mulps   $dst,$src\t! mul packedF" %}
20723   ins_encode %{
20724     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20725   %}
20726   ins_pipe( pipe_slow );
20727 %}
20728 
20729 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20730   predicate(UseAVX > 0);
20731   match(Set dst (MulVF src1 src2));
20732   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20733   ins_encode %{
20734     int vlen_enc = vector_length_encoding(this);
20735     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20736   %}
20737   ins_pipe( pipe_slow );
20738 %}
20739 
20740 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20741   predicate((UseAVX > 0) &&
20742             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20743   match(Set dst (MulVF src (LoadVector mem)));
20744   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20745   ins_encode %{
20746     int vlen_enc = vector_length_encoding(this);
20747     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20748   %}
20749   ins_pipe( pipe_slow );
20750 %}
20751 
20752 // Doubles vector mul
20753 instruct vmulD(vec dst, vec src) %{
20754   predicate(UseAVX == 0);
20755   match(Set dst (MulVD dst src));
20756   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20757   ins_encode %{
20758     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20759   %}
20760   ins_pipe( pipe_slow );
20761 %}
20762 
20763 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20764   predicate(UseAVX > 0);
20765   match(Set dst (MulVD src1 src2));
20766   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20767   ins_encode %{
20768     int vlen_enc = vector_length_encoding(this);
20769     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20770   %}
20771   ins_pipe( pipe_slow );
20772 %}
20773 
20774 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20775   predicate((UseAVX > 0) &&
20776             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20777   match(Set dst (MulVD src (LoadVector mem)));
20778   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20779   ins_encode %{
20780     int vlen_enc = vector_length_encoding(this);
20781     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20782   %}
20783   ins_pipe( pipe_slow );
20784 %}
20785 
20786 // --------------------------------- DIV --------------------------------------
20787 
20788 // Floats vector div
20789 instruct vdivF(vec dst, vec src) %{
20790   predicate(UseAVX == 0);
20791   match(Set dst (DivVF dst src));
20792   format %{ "divps   $dst,$src\t! div packedF" %}
20793   ins_encode %{
20794     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20795   %}
20796   ins_pipe( pipe_slow );
20797 %}
20798 
20799 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20800   predicate(UseAVX > 0);
20801   match(Set dst (DivVF src1 src2));
20802   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20803   ins_encode %{
20804     int vlen_enc = vector_length_encoding(this);
20805     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20806   %}
20807   ins_pipe( pipe_slow );
20808 %}
20809 
20810 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20811   predicate((UseAVX > 0) &&
20812             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20813   match(Set dst (DivVF src (LoadVector mem)));
20814   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20815   ins_encode %{
20816     int vlen_enc = vector_length_encoding(this);
20817     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20818   %}
20819   ins_pipe( pipe_slow );
20820 %}
20821 
20822 // Doubles vector div
20823 instruct vdivD(vec dst, vec src) %{
20824   predicate(UseAVX == 0);
20825   match(Set dst (DivVD dst src));
20826   format %{ "divpd   $dst,$src\t! div packedD" %}
20827   ins_encode %{
20828     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20829   %}
20830   ins_pipe( pipe_slow );
20831 %}
20832 
20833 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20834   predicate(UseAVX > 0);
20835   match(Set dst (DivVD src1 src2));
20836   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20837   ins_encode %{
20838     int vlen_enc = vector_length_encoding(this);
20839     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20840   %}
20841   ins_pipe( pipe_slow );
20842 %}
20843 
20844 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20845   predicate((UseAVX > 0) &&
20846             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20847   match(Set dst (DivVD src (LoadVector mem)));
20848   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20849   ins_encode %{
20850     int vlen_enc = vector_length_encoding(this);
20851     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20852   %}
20853   ins_pipe( pipe_slow );
20854 %}
20855 
20856 // ------------------------------ MinMax ---------------------------------------
20857 
20858 // Byte, Short, Int vector Min/Max
20859 instruct minmax_reg_sse(vec dst, vec src) %{
20860   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20861             UseAVX == 0);
20862   match(Set dst (MinV dst src));
20863   match(Set dst (MaxV dst src));
20864   format %{ "vector_minmax  $dst,$src\t!  " %}
20865   ins_encode %{
20866     assert(UseSSE >= 4, "required");
20867 
20868     int opcode = this->ideal_Opcode();
20869     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20870     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20871   %}
20872   ins_pipe( pipe_slow );
20873 %}
20874 
20875 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20876   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20877             UseAVX > 0);
20878   match(Set dst (MinV src1 src2));
20879   match(Set dst (MaxV src1 src2));
20880   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20881   ins_encode %{
20882     int opcode = this->ideal_Opcode();
20883     int vlen_enc = vector_length_encoding(this);
20884     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20885 
20886     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20887   %}
20888   ins_pipe( pipe_slow );
20889 %}
20890 
20891 // Long vector Min/Max
20892 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20893   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20894             UseAVX == 0);
20895   match(Set dst (MinV dst src));
20896   match(Set dst (MaxV src dst));
20897   effect(TEMP dst, TEMP tmp);
20898   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20899   ins_encode %{
20900     assert(UseSSE >= 4, "required");
20901 
20902     int opcode = this->ideal_Opcode();
20903     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20904     assert(elem_bt == T_LONG, "sanity");
20905 
20906     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20907   %}
20908   ins_pipe( pipe_slow );
20909 %}
20910 
20911 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20912   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20913             UseAVX > 0 && !VM_Version::supports_avx512vl());
20914   match(Set dst (MinV src1 src2));
20915   match(Set dst (MaxV src1 src2));
20916   effect(TEMP dst);
20917   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20918   ins_encode %{
20919     int vlen_enc = vector_length_encoding(this);
20920     int opcode = this->ideal_Opcode();
20921     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20922     assert(elem_bt == T_LONG, "sanity");
20923 
20924     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20925   %}
20926   ins_pipe( pipe_slow );
20927 %}
20928 
20929 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20930   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20931             Matcher::vector_element_basic_type(n) == T_LONG);
20932   match(Set dst (MinV src1 src2));
20933   match(Set dst (MaxV src1 src2));
20934   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20935   ins_encode %{
20936     assert(UseAVX > 2, "required");
20937 
20938     int vlen_enc = vector_length_encoding(this);
20939     int opcode = this->ideal_Opcode();
20940     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20941     assert(elem_bt == T_LONG, "sanity");
20942 
20943     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20944   %}
20945   ins_pipe( pipe_slow );
20946 %}
20947 
20948 // Float/Double vector Min/Max
20949 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20950   predicate(VM_Version::supports_avx10_2() &&
20951             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20952   match(Set dst (MinV a b));
20953   match(Set dst (MaxV a b));
20954   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20955   ins_encode %{
20956     int vlen_enc = vector_length_encoding(this);
20957     int opcode = this->ideal_Opcode();
20958     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20959     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20960   %}
20961   ins_pipe( pipe_slow );
20962 %}
20963 
20964 // Float/Double vector Min/Max
20965 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20966   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20967             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20968             UseAVX > 0);
20969   match(Set dst (MinV a b));
20970   match(Set dst (MaxV a b));
20971   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20972   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20973   ins_encode %{
20974     assert(UseAVX > 0, "required");
20975 
20976     int opcode = this->ideal_Opcode();
20977     int vlen_enc = vector_length_encoding(this);
20978     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20979 
20980     __ vminmax_fp(opcode, elem_bt,
20981                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20982                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20983   %}
20984   ins_pipe( pipe_slow );
20985 %}
20986 
20987 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20988   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20989             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20990   match(Set dst (MinV a b));
20991   match(Set dst (MaxV a b));
20992   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20993   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20994   ins_encode %{
20995     assert(UseAVX > 2, "required");
20996 
20997     int opcode = this->ideal_Opcode();
20998     int vlen_enc = vector_length_encoding(this);
20999     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21000 
21001     __ evminmax_fp(opcode, elem_bt,
21002                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21003                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21004   %}
21005   ins_pipe( pipe_slow );
21006 %}
21007 
21008 // ------------------------------ Unsigned vector Min/Max ----------------------
21009 
21010 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21011   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21012   match(Set dst (UMinV a b));
21013   match(Set dst (UMaxV a b));
21014   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21015   ins_encode %{
21016     int opcode = this->ideal_Opcode();
21017     int vlen_enc = vector_length_encoding(this);
21018     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21019     assert(is_integral_type(elem_bt), "");
21020     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21021   %}
21022   ins_pipe( pipe_slow );
21023 %}
21024 
21025 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21026   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21027   match(Set dst (UMinV a (LoadVector b)));
21028   match(Set dst (UMaxV a (LoadVector b)));
21029   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21030   ins_encode %{
21031     int opcode = this->ideal_Opcode();
21032     int vlen_enc = vector_length_encoding(this);
21033     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21034     assert(is_integral_type(elem_bt), "");
21035     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21036   %}
21037   ins_pipe( pipe_slow );
21038 %}
21039 
21040 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21041   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21042   match(Set dst (UMinV a b));
21043   match(Set dst (UMaxV a b));
21044   effect(TEMP xtmp1, TEMP xtmp2);
21045   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21046   ins_encode %{
21047     int opcode = this->ideal_Opcode();
21048     int vlen_enc = vector_length_encoding(this);
21049     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21050   %}
21051   ins_pipe( pipe_slow );
21052 %}
21053 
21054 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21055   match(Set dst (UMinV (Binary dst src2) mask));
21056   match(Set dst (UMaxV (Binary dst src2) mask));
21057   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21058   ins_encode %{
21059     int vlen_enc = vector_length_encoding(this);
21060     BasicType bt = Matcher::vector_element_basic_type(this);
21061     int opc = this->ideal_Opcode();
21062     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21063                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21064   %}
21065   ins_pipe( pipe_slow );
21066 %}
21067 
21068 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21069   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21070   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21071   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21072   ins_encode %{
21073     int vlen_enc = vector_length_encoding(this);
21074     BasicType bt = Matcher::vector_element_basic_type(this);
21075     int opc = this->ideal_Opcode();
21076     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21077                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21078   %}
21079   ins_pipe( pipe_slow );
21080 %}
21081 
21082 // --------------------------------- Signum/CopySign ---------------------------
21083 
21084 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21085   match(Set dst (SignumF dst (Binary zero one)));
21086   effect(KILL cr);
21087   format %{ "signumF $dst, $dst" %}
21088   ins_encode %{
21089     int opcode = this->ideal_Opcode();
21090     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21091   %}
21092   ins_pipe( pipe_slow );
21093 %}
21094 
21095 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21096   match(Set dst (SignumD dst (Binary zero one)));
21097   effect(KILL cr);
21098   format %{ "signumD $dst, $dst" %}
21099   ins_encode %{
21100     int opcode = this->ideal_Opcode();
21101     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21102   %}
21103   ins_pipe( pipe_slow );
21104 %}
21105 
21106 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21107   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21108   match(Set dst (SignumVF src (Binary zero one)));
21109   match(Set dst (SignumVD src (Binary zero one)));
21110   effect(TEMP dst, TEMP xtmp1);
21111   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21112   ins_encode %{
21113     int opcode = this->ideal_Opcode();
21114     int vec_enc = vector_length_encoding(this);
21115     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21116                          $xtmp1$$XMMRegister, vec_enc);
21117   %}
21118   ins_pipe( pipe_slow );
21119 %}
21120 
21121 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21122   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21123   match(Set dst (SignumVF src (Binary zero one)));
21124   match(Set dst (SignumVD src (Binary zero one)));
21125   effect(TEMP dst, TEMP ktmp1);
21126   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21127   ins_encode %{
21128     int opcode = this->ideal_Opcode();
21129     int vec_enc = vector_length_encoding(this);
21130     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21131                           $ktmp1$$KRegister, vec_enc);
21132   %}
21133   ins_pipe( pipe_slow );
21134 %}
21135 
21136 // ---------------------------------------
21137 // For copySign use 0xE4 as writemask for vpternlog
21138 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21139 // C (xmm2) is set to 0x7FFFFFFF
21140 // Wherever xmm2 is 0, we want to pick from B (sign)
21141 // Wherever xmm2 is 1, we want to pick from A (src)
21142 //
21143 // A B C Result
21144 // 0 0 0 0
21145 // 0 0 1 0
21146 // 0 1 0 1
21147 // 0 1 1 0
21148 // 1 0 0 0
21149 // 1 0 1 1
21150 // 1 1 0 1
21151 // 1 1 1 1
21152 //
21153 // Result going from high bit to low bit is 0x11100100 = 0xe4
21154 // ---------------------------------------
21155 
21156 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21157   match(Set dst (CopySignF dst src));
21158   effect(TEMP tmp1, TEMP tmp2);
21159   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21160   ins_encode %{
21161     __ movl($tmp2$$Register, 0x7FFFFFFF);
21162     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21163     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21164   %}
21165   ins_pipe( pipe_slow );
21166 %}
21167 
21168 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21169   match(Set dst (CopySignD dst (Binary src zero)));
21170   ins_cost(100);
21171   effect(TEMP tmp1, TEMP tmp2);
21172   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21173   ins_encode %{
21174     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21175     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21176     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21177   %}
21178   ins_pipe( pipe_slow );
21179 %}
21180 
21181 //----------------------------- CompressBits/ExpandBits ------------------------
21182 
21183 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21184   predicate(n->bottom_type()->isa_int());
21185   match(Set dst (CompressBits src mask));
21186   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21187   ins_encode %{
21188     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21189   %}
21190   ins_pipe( pipe_slow );
21191 %}
21192 
21193 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21194   predicate(n->bottom_type()->isa_int());
21195   match(Set dst (ExpandBits src mask));
21196   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21197   ins_encode %{
21198     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21199   %}
21200   ins_pipe( pipe_slow );
21201 %}
21202 
21203 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21204   predicate(n->bottom_type()->isa_int());
21205   match(Set dst (CompressBits src (LoadI mask)));
21206   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21207   ins_encode %{
21208     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21209   %}
21210   ins_pipe( pipe_slow );
21211 %}
21212 
21213 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21214   predicate(n->bottom_type()->isa_int());
21215   match(Set dst (ExpandBits src (LoadI mask)));
21216   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21217   ins_encode %{
21218     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21219   %}
21220   ins_pipe( pipe_slow );
21221 %}
21222 
21223 // --------------------------------- Sqrt --------------------------------------
21224 
21225 instruct vsqrtF_reg(vec dst, vec src) %{
21226   match(Set dst (SqrtVF src));
21227   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21228   ins_encode %{
21229     assert(UseAVX > 0, "required");
21230     int vlen_enc = vector_length_encoding(this);
21231     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21232   %}
21233   ins_pipe( pipe_slow );
21234 %}
21235 
21236 instruct vsqrtF_mem(vec dst, memory mem) %{
21237   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21238   match(Set dst (SqrtVF (LoadVector mem)));
21239   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21240   ins_encode %{
21241     assert(UseAVX > 0, "required");
21242     int vlen_enc = vector_length_encoding(this);
21243     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21244   %}
21245   ins_pipe( pipe_slow );
21246 %}
21247 
21248 // Floating point vector sqrt
21249 instruct vsqrtD_reg(vec dst, vec src) %{
21250   match(Set dst (SqrtVD src));
21251   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21252   ins_encode %{
21253     assert(UseAVX > 0, "required");
21254     int vlen_enc = vector_length_encoding(this);
21255     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21256   %}
21257   ins_pipe( pipe_slow );
21258 %}
21259 
21260 instruct vsqrtD_mem(vec dst, memory mem) %{
21261   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21262   match(Set dst (SqrtVD (LoadVector mem)));
21263   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21264   ins_encode %{
21265     assert(UseAVX > 0, "required");
21266     int vlen_enc = vector_length_encoding(this);
21267     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21268   %}
21269   ins_pipe( pipe_slow );
21270 %}
21271 
21272 // ------------------------------ Shift ---------------------------------------
21273 
21274 // Left and right shift count vectors are the same on x86
21275 // (only lowest bits of xmm reg are used for count).
21276 instruct vshiftcnt(vec dst, rRegI cnt) %{
21277   match(Set dst (LShiftCntV cnt));
21278   match(Set dst (RShiftCntV cnt));
21279   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21280   ins_encode %{
21281     __ movdl($dst$$XMMRegister, $cnt$$Register);
21282   %}
21283   ins_pipe( pipe_slow );
21284 %}
21285 
21286 // Byte vector shift
21287 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21288   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21289   match(Set dst ( LShiftVB src shift));
21290   match(Set dst ( RShiftVB src shift));
21291   match(Set dst (URShiftVB src shift));
21292   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21293   format %{"vector_byte_shift $dst,$src,$shift" %}
21294   ins_encode %{
21295     assert(UseSSE > 3, "required");
21296     int opcode = this->ideal_Opcode();
21297     bool sign = (opcode != Op_URShiftVB);
21298     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21299     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21300     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21301     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21302     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21303   %}
21304   ins_pipe( pipe_slow );
21305 %}
21306 
21307 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21308   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21309             UseAVX <= 1);
21310   match(Set dst ( LShiftVB src shift));
21311   match(Set dst ( RShiftVB src shift));
21312   match(Set dst (URShiftVB src shift));
21313   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21314   format %{"vector_byte_shift $dst,$src,$shift" %}
21315   ins_encode %{
21316     assert(UseSSE > 3, "required");
21317     int opcode = this->ideal_Opcode();
21318     bool sign = (opcode != Op_URShiftVB);
21319     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21320     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21321     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21322     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21323     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21324     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21325     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21326     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21327     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21328   %}
21329   ins_pipe( pipe_slow );
21330 %}
21331 
21332 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21333   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21334             UseAVX > 1);
21335   match(Set dst ( LShiftVB src shift));
21336   match(Set dst ( RShiftVB src shift));
21337   match(Set dst (URShiftVB src shift));
21338   effect(TEMP dst, TEMP tmp);
21339   format %{"vector_byte_shift $dst,$src,$shift" %}
21340   ins_encode %{
21341     int opcode = this->ideal_Opcode();
21342     bool sign = (opcode != Op_URShiftVB);
21343     int vlen_enc = Assembler::AVX_256bit;
21344     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21345     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21346     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21347     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21348     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21349   %}
21350   ins_pipe( pipe_slow );
21351 %}
21352 
21353 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21354   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21355   match(Set dst ( LShiftVB src shift));
21356   match(Set dst ( RShiftVB src shift));
21357   match(Set dst (URShiftVB src shift));
21358   effect(TEMP dst, TEMP tmp);
21359   format %{"vector_byte_shift $dst,$src,$shift" %}
21360   ins_encode %{
21361     assert(UseAVX > 1, "required");
21362     int opcode = this->ideal_Opcode();
21363     bool sign = (opcode != Op_URShiftVB);
21364     int vlen_enc = Assembler::AVX_256bit;
21365     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21366     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21367     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21368     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21369     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21370     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21371     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21372     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21373     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21374   %}
21375   ins_pipe( pipe_slow );
21376 %}
21377 
21378 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21379   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21380   match(Set dst ( LShiftVB src shift));
21381   match(Set dst  (RShiftVB src shift));
21382   match(Set dst (URShiftVB src shift));
21383   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21384   format %{"vector_byte_shift $dst,$src,$shift" %}
21385   ins_encode %{
21386     assert(UseAVX > 2, "required");
21387     int opcode = this->ideal_Opcode();
21388     bool sign = (opcode != Op_URShiftVB);
21389     int vlen_enc = Assembler::AVX_512bit;
21390     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21391     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21392     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21393     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21394     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21395     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21396     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21397     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21398     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21399     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21400     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21401     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402   %}
21403   ins_pipe( pipe_slow );
21404 %}
21405 
21406 // Shorts vector logical right shift produces incorrect Java result
21407 // for negative data because java code convert short value into int with
21408 // sign extension before a shift. But char vectors are fine since chars are
21409 // unsigned values.
21410 // Shorts/Chars vector left shift
21411 instruct vshiftS(vec dst, vec src, vec shift) %{
21412   predicate(!n->as_ShiftV()->is_var_shift());
21413   match(Set dst ( LShiftVS src shift));
21414   match(Set dst ( RShiftVS src shift));
21415   match(Set dst (URShiftVS src shift));
21416   effect(TEMP dst, USE src, USE shift);
21417   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21418   ins_encode %{
21419     int opcode = this->ideal_Opcode();
21420     if (UseAVX > 0) {
21421       int vlen_enc = vector_length_encoding(this);
21422       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21423     } else {
21424       int vlen = Matcher::vector_length(this);
21425       if (vlen == 2) {
21426         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21427         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21428       } else if (vlen == 4) {
21429         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21430         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431       } else {
21432         assert (vlen == 8, "sanity");
21433         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21434         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435       }
21436     }
21437   %}
21438   ins_pipe( pipe_slow );
21439 %}
21440 
21441 // Integers vector left shift
21442 instruct vshiftI(vec dst, vec src, vec shift) %{
21443   predicate(!n->as_ShiftV()->is_var_shift());
21444   match(Set dst ( LShiftVI src shift));
21445   match(Set dst ( RShiftVI src shift));
21446   match(Set dst (URShiftVI src shift));
21447   effect(TEMP dst, USE src, USE shift);
21448   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21449   ins_encode %{
21450     int opcode = this->ideal_Opcode();
21451     if (UseAVX > 0) {
21452       int vlen_enc = vector_length_encoding(this);
21453       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21454     } else {
21455       int vlen = Matcher::vector_length(this);
21456       if (vlen == 2) {
21457         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21458         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21459       } else {
21460         assert(vlen == 4, "sanity");
21461         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21462         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21463       }
21464     }
21465   %}
21466   ins_pipe( pipe_slow );
21467 %}
21468 
21469 // Integers vector left constant shift
21470 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21471   match(Set dst (LShiftVI src (LShiftCntV shift)));
21472   match(Set dst (RShiftVI src (RShiftCntV shift)));
21473   match(Set dst (URShiftVI src (RShiftCntV shift)));
21474   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21475   ins_encode %{
21476     int opcode = this->ideal_Opcode();
21477     if (UseAVX > 0) {
21478       int vector_len = vector_length_encoding(this);
21479       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21480     } else {
21481       int vlen = Matcher::vector_length(this);
21482       if (vlen == 2) {
21483         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21484         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21485       } else {
21486         assert(vlen == 4, "sanity");
21487         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21488         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21489       }
21490     }
21491   %}
21492   ins_pipe( pipe_slow );
21493 %}
21494 
21495 // Longs vector shift
21496 instruct vshiftL(vec dst, vec src, vec shift) %{
21497   predicate(!n->as_ShiftV()->is_var_shift());
21498   match(Set dst ( LShiftVL src shift));
21499   match(Set dst (URShiftVL src shift));
21500   effect(TEMP dst, USE src, USE shift);
21501   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21502   ins_encode %{
21503     int opcode = this->ideal_Opcode();
21504     if (UseAVX > 0) {
21505       int vlen_enc = vector_length_encoding(this);
21506       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21507     } else {
21508       assert(Matcher::vector_length(this) == 2, "");
21509       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21510       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21511     }
21512   %}
21513   ins_pipe( pipe_slow );
21514 %}
21515 
21516 // Longs vector constant shift
21517 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21518   match(Set dst (LShiftVL src (LShiftCntV shift)));
21519   match(Set dst (URShiftVL src (RShiftCntV shift)));
21520   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21521   ins_encode %{
21522     int opcode = this->ideal_Opcode();
21523     if (UseAVX > 0) {
21524       int vector_len = vector_length_encoding(this);
21525       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21526     } else {
21527       assert(Matcher::vector_length(this) == 2, "");
21528       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21529       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21530     }
21531   %}
21532   ins_pipe( pipe_slow );
21533 %}
21534 
21535 // -------------------ArithmeticRightShift -----------------------------------
21536 // Long vector arithmetic right shift
21537 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21538   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21539   match(Set dst (RShiftVL src shift));
21540   effect(TEMP dst, TEMP tmp);
21541   format %{ "vshiftq $dst,$src,$shift" %}
21542   ins_encode %{
21543     uint vlen = Matcher::vector_length(this);
21544     if (vlen == 2) {
21545       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21546       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21547       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21548       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21549       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21550       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21551     } else {
21552       assert(vlen == 4, "sanity");
21553       assert(UseAVX > 1, "required");
21554       int vlen_enc = Assembler::AVX_256bit;
21555       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21556       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21557       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21558       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21559       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21560     }
21561   %}
21562   ins_pipe( pipe_slow );
21563 %}
21564 
21565 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21566   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21567   match(Set dst (RShiftVL src shift));
21568   format %{ "vshiftq $dst,$src,$shift" %}
21569   ins_encode %{
21570     int vlen_enc = vector_length_encoding(this);
21571     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21572   %}
21573   ins_pipe( pipe_slow );
21574 %}
21575 
21576 // ------------------- Variable Shift -----------------------------
21577 // Byte variable shift
21578 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21579   predicate(Matcher::vector_length(n) <= 8 &&
21580             n->as_ShiftV()->is_var_shift() &&
21581             !VM_Version::supports_avx512bw());
21582   match(Set dst ( LShiftVB src shift));
21583   match(Set dst ( RShiftVB src shift));
21584   match(Set dst (URShiftVB src shift));
21585   effect(TEMP dst, TEMP vtmp);
21586   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21587   ins_encode %{
21588     assert(UseAVX >= 2, "required");
21589 
21590     int opcode = this->ideal_Opcode();
21591     int vlen_enc = Assembler::AVX_128bit;
21592     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21593     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21594   %}
21595   ins_pipe( pipe_slow );
21596 %}
21597 
21598 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21599   predicate(Matcher::vector_length(n) == 16 &&
21600             n->as_ShiftV()->is_var_shift() &&
21601             !VM_Version::supports_avx512bw());
21602   match(Set dst ( LShiftVB src shift));
21603   match(Set dst ( RShiftVB src shift));
21604   match(Set dst (URShiftVB src shift));
21605   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21606   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21607   ins_encode %{
21608     assert(UseAVX >= 2, "required");
21609 
21610     int opcode = this->ideal_Opcode();
21611     int vlen_enc = Assembler::AVX_128bit;
21612     // Shift lower half and get word result in dst
21613     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21614 
21615     // Shift upper half and get word result in vtmp1
21616     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21617     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21618     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21619 
21620     // Merge and down convert the two word results to byte in dst
21621     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21622   %}
21623   ins_pipe( pipe_slow );
21624 %}
21625 
21626 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21627   predicate(Matcher::vector_length(n) == 32 &&
21628             n->as_ShiftV()->is_var_shift() &&
21629             !VM_Version::supports_avx512bw());
21630   match(Set dst ( LShiftVB src shift));
21631   match(Set dst ( RShiftVB src shift));
21632   match(Set dst (URShiftVB src shift));
21633   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21634   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21635   ins_encode %{
21636     assert(UseAVX >= 2, "required");
21637 
21638     int opcode = this->ideal_Opcode();
21639     int vlen_enc = Assembler::AVX_128bit;
21640     // Process lower 128 bits and get result in dst
21641     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21642     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21643     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21644     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21645     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21646 
21647     // Process higher 128 bits and get result in vtmp3
21648     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21649     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21650     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21651     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21652     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21653     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21654     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21655 
21656     // Merge the two results in dst
21657     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21658   %}
21659   ins_pipe( pipe_slow );
21660 %}
21661 
21662 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21663   predicate(Matcher::vector_length(n) <= 32 &&
21664             n->as_ShiftV()->is_var_shift() &&
21665             VM_Version::supports_avx512bw());
21666   match(Set dst ( LShiftVB src shift));
21667   match(Set dst ( RShiftVB src shift));
21668   match(Set dst (URShiftVB src shift));
21669   effect(TEMP dst, TEMP vtmp);
21670   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21671   ins_encode %{
21672     assert(UseAVX > 2, "required");
21673 
21674     int opcode = this->ideal_Opcode();
21675     int vlen_enc = vector_length_encoding(this);
21676     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21677   %}
21678   ins_pipe( pipe_slow );
21679 %}
21680 
21681 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21682   predicate(Matcher::vector_length(n) == 64 &&
21683             n->as_ShiftV()->is_var_shift() &&
21684             VM_Version::supports_avx512bw());
21685   match(Set dst ( LShiftVB src shift));
21686   match(Set dst ( RShiftVB src shift));
21687   match(Set dst (URShiftVB src shift));
21688   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21689   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21690   ins_encode %{
21691     assert(UseAVX > 2, "required");
21692 
21693     int opcode = this->ideal_Opcode();
21694     int vlen_enc = Assembler::AVX_256bit;
21695     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21696     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21697     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21698     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21699     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21700   %}
21701   ins_pipe( pipe_slow );
21702 %}
21703 
21704 // Short variable shift
21705 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21706   predicate(Matcher::vector_length(n) <= 8 &&
21707             n->as_ShiftV()->is_var_shift() &&
21708             !VM_Version::supports_avx512bw());
21709   match(Set dst ( LShiftVS src shift));
21710   match(Set dst ( RShiftVS src shift));
21711   match(Set dst (URShiftVS src shift));
21712   effect(TEMP dst, TEMP vtmp);
21713   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21714   ins_encode %{
21715     assert(UseAVX >= 2, "required");
21716 
21717     int opcode = this->ideal_Opcode();
21718     bool sign = (opcode != Op_URShiftVS);
21719     int vlen_enc = Assembler::AVX_256bit;
21720     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21721     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21722     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21723     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21724     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21725     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21726   %}
21727   ins_pipe( pipe_slow );
21728 %}
21729 
21730 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21731   predicate(Matcher::vector_length(n) == 16 &&
21732             n->as_ShiftV()->is_var_shift() &&
21733             !VM_Version::supports_avx512bw());
21734   match(Set dst ( LShiftVS src shift));
21735   match(Set dst ( RShiftVS src shift));
21736   match(Set dst (URShiftVS src shift));
21737   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21738   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21739   ins_encode %{
21740     assert(UseAVX >= 2, "required");
21741 
21742     int opcode = this->ideal_Opcode();
21743     bool sign = (opcode != Op_URShiftVS);
21744     int vlen_enc = Assembler::AVX_256bit;
21745     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21746     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21747     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21748     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21749     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21750 
21751     // Shift upper half, with result in dst using vtmp1 as TEMP
21752     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21753     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21754     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21755     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21756     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21757     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21758 
21759     // Merge lower and upper half result into dst
21760     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21762   %}
21763   ins_pipe( pipe_slow );
21764 %}
21765 
21766 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21767   predicate(n->as_ShiftV()->is_var_shift() &&
21768             VM_Version::supports_avx512bw());
21769   match(Set dst ( LShiftVS src shift));
21770   match(Set dst ( RShiftVS src shift));
21771   match(Set dst (URShiftVS src shift));
21772   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21773   ins_encode %{
21774     assert(UseAVX > 2, "required");
21775 
21776     int opcode = this->ideal_Opcode();
21777     int vlen_enc = vector_length_encoding(this);
21778     if (!VM_Version::supports_avx512vl()) {
21779       vlen_enc = Assembler::AVX_512bit;
21780     }
21781     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21782   %}
21783   ins_pipe( pipe_slow );
21784 %}
21785 
21786 //Integer variable shift
21787 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21788   predicate(n->as_ShiftV()->is_var_shift());
21789   match(Set dst ( LShiftVI src shift));
21790   match(Set dst ( RShiftVI src shift));
21791   match(Set dst (URShiftVI src shift));
21792   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21793   ins_encode %{
21794     assert(UseAVX >= 2, "required");
21795 
21796     int opcode = this->ideal_Opcode();
21797     int vlen_enc = vector_length_encoding(this);
21798     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21799   %}
21800   ins_pipe( pipe_slow );
21801 %}
21802 
21803 //Long variable shift
21804 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21805   predicate(n->as_ShiftV()->is_var_shift());
21806   match(Set dst ( LShiftVL src shift));
21807   match(Set dst (URShiftVL src shift));
21808   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21809   ins_encode %{
21810     assert(UseAVX >= 2, "required");
21811 
21812     int opcode = this->ideal_Opcode();
21813     int vlen_enc = vector_length_encoding(this);
21814     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21815   %}
21816   ins_pipe( pipe_slow );
21817 %}
21818 
21819 //Long variable right shift arithmetic
21820 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21821   predicate(Matcher::vector_length(n) <= 4 &&
21822             n->as_ShiftV()->is_var_shift() &&
21823             UseAVX == 2);
21824   match(Set dst (RShiftVL src shift));
21825   effect(TEMP dst, TEMP vtmp);
21826   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21827   ins_encode %{
21828     int opcode = this->ideal_Opcode();
21829     int vlen_enc = vector_length_encoding(this);
21830     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21831                  $vtmp$$XMMRegister);
21832   %}
21833   ins_pipe( pipe_slow );
21834 %}
21835 
21836 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21837   predicate(n->as_ShiftV()->is_var_shift() &&
21838             UseAVX > 2);
21839   match(Set dst (RShiftVL src shift));
21840   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21841   ins_encode %{
21842     int opcode = this->ideal_Opcode();
21843     int vlen_enc = vector_length_encoding(this);
21844     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21845   %}
21846   ins_pipe( pipe_slow );
21847 %}
21848 
21849 // --------------------------------- AND --------------------------------------
21850 
21851 instruct vand(vec dst, vec src) %{
21852   predicate(UseAVX == 0);
21853   match(Set dst (AndV dst src));
21854   format %{ "pand    $dst,$src\t! and vectors" %}
21855   ins_encode %{
21856     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21857   %}
21858   ins_pipe( pipe_slow );
21859 %}
21860 
21861 instruct vand_reg(vec dst, vec src1, vec src2) %{
21862   predicate(UseAVX > 0);
21863   match(Set dst (AndV src1 src2));
21864   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21865   ins_encode %{
21866     int vlen_enc = vector_length_encoding(this);
21867     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21868   %}
21869   ins_pipe( pipe_slow );
21870 %}
21871 
21872 instruct vand_mem(vec dst, vec src, memory mem) %{
21873   predicate((UseAVX > 0) &&
21874             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21875   match(Set dst (AndV src (LoadVector mem)));
21876   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21877   ins_encode %{
21878     int vlen_enc = vector_length_encoding(this);
21879     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21880   %}
21881   ins_pipe( pipe_slow );
21882 %}
21883 
21884 // --------------------------------- OR ---------------------------------------
21885 
21886 instruct vor(vec dst, vec src) %{
21887   predicate(UseAVX == 0);
21888   match(Set dst (OrV dst src));
21889   format %{ "por     $dst,$src\t! or vectors" %}
21890   ins_encode %{
21891     __ por($dst$$XMMRegister, $src$$XMMRegister);
21892   %}
21893   ins_pipe( pipe_slow );
21894 %}
21895 
21896 instruct vor_reg(vec dst, vec src1, vec src2) %{
21897   predicate(UseAVX > 0);
21898   match(Set dst (OrV src1 src2));
21899   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21900   ins_encode %{
21901     int vlen_enc = vector_length_encoding(this);
21902     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21903   %}
21904   ins_pipe( pipe_slow );
21905 %}
21906 
21907 instruct vor_mem(vec dst, vec src, memory mem) %{
21908   predicate((UseAVX > 0) &&
21909             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21910   match(Set dst (OrV src (LoadVector mem)));
21911   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21912   ins_encode %{
21913     int vlen_enc = vector_length_encoding(this);
21914     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21915   %}
21916   ins_pipe( pipe_slow );
21917 %}
21918 
21919 // --------------------------------- XOR --------------------------------------
21920 
21921 instruct vxor(vec dst, vec src) %{
21922   predicate(UseAVX == 0);
21923   match(Set dst (XorV dst src));
21924   format %{ "pxor    $dst,$src\t! xor vectors" %}
21925   ins_encode %{
21926     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21927   %}
21928   ins_pipe( pipe_slow );
21929 %}
21930 
21931 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21932   predicate(UseAVX > 0);
21933   match(Set dst (XorV src1 src2));
21934   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21935   ins_encode %{
21936     int vlen_enc = vector_length_encoding(this);
21937     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21938   %}
21939   ins_pipe( pipe_slow );
21940 %}
21941 
21942 instruct vxor_mem(vec dst, vec src, memory mem) %{
21943   predicate((UseAVX > 0) &&
21944             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21945   match(Set dst (XorV src (LoadVector mem)));
21946   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21947   ins_encode %{
21948     int vlen_enc = vector_length_encoding(this);
21949     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21950   %}
21951   ins_pipe( pipe_slow );
21952 %}
21953 
21954 // --------------------------------- VectorCast --------------------------------------
21955 
21956 instruct vcastBtoX(vec dst, vec src) %{
21957   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21958   match(Set dst (VectorCastB2X src));
21959   format %{ "vector_cast_b2x $dst,$src\t!" %}
21960   ins_encode %{
21961     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21962     int vlen_enc = vector_length_encoding(this);
21963     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21964   %}
21965   ins_pipe( pipe_slow );
21966 %}
21967 
21968 instruct vcastBtoD(legVec dst, legVec src) %{
21969   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21970   match(Set dst (VectorCastB2X src));
21971   format %{ "vector_cast_b2x $dst,$src\t!" %}
21972   ins_encode %{
21973     int vlen_enc = vector_length_encoding(this);
21974     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21975   %}
21976   ins_pipe( pipe_slow );
21977 %}
21978 
21979 instruct castStoX(vec dst, vec src) %{
21980   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21981             Matcher::vector_length(n->in(1)) <= 8 && // src
21982             Matcher::vector_element_basic_type(n) == T_BYTE);
21983   match(Set dst (VectorCastS2X src));
21984   format %{ "vector_cast_s2x $dst,$src" %}
21985   ins_encode %{
21986     assert(UseAVX > 0, "required");
21987 
21988     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21989     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21990   %}
21991   ins_pipe( pipe_slow );
21992 %}
21993 
21994 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21995   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21996             Matcher::vector_length(n->in(1)) == 16 && // src
21997             Matcher::vector_element_basic_type(n) == T_BYTE);
21998   effect(TEMP dst, TEMP vtmp);
21999   match(Set dst (VectorCastS2X src));
22000   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22001   ins_encode %{
22002     assert(UseAVX > 0, "required");
22003 
22004     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22005     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22006     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22007     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22008   %}
22009   ins_pipe( pipe_slow );
22010 %}
22011 
22012 instruct vcastStoX_evex(vec dst, vec src) %{
22013   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22014             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22015   match(Set dst (VectorCastS2X src));
22016   format %{ "vector_cast_s2x $dst,$src\t!" %}
22017   ins_encode %{
22018     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22019     int src_vlen_enc = vector_length_encoding(this, $src);
22020     int vlen_enc = vector_length_encoding(this);
22021     switch (to_elem_bt) {
22022       case T_BYTE:
22023         if (!VM_Version::supports_avx512vl()) {
22024           vlen_enc = Assembler::AVX_512bit;
22025         }
22026         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22027         break;
22028       case T_INT:
22029         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030         break;
22031       case T_FLOAT:
22032         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22034         break;
22035       case T_LONG:
22036         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037         break;
22038       case T_DOUBLE: {
22039         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22040         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22041         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22042         break;
22043       }
22044       default:
22045         ShouldNotReachHere();
22046     }
22047   %}
22048   ins_pipe( pipe_slow );
22049 %}
22050 
22051 instruct castItoX(vec dst, vec src) %{
22052   predicate(UseAVX <= 2 &&
22053             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22054             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22055   match(Set dst (VectorCastI2X src));
22056   format %{ "vector_cast_i2x $dst,$src" %}
22057   ins_encode %{
22058     assert(UseAVX > 0, "required");
22059 
22060     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22061     int vlen_enc = vector_length_encoding(this, $src);
22062 
22063     if (to_elem_bt == T_BYTE) {
22064       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22065       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22066       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22067     } else {
22068       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22069       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22070       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071     }
22072   %}
22073   ins_pipe( pipe_slow );
22074 %}
22075 
22076 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22077   predicate(UseAVX <= 2 &&
22078             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22079             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22080   match(Set dst (VectorCastI2X src));
22081   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22082   effect(TEMP dst, TEMP vtmp);
22083   ins_encode %{
22084     assert(UseAVX > 0, "required");
22085 
22086     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22087     int vlen_enc = vector_length_encoding(this, $src);
22088 
22089     if (to_elem_bt == T_BYTE) {
22090       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22091       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22092       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22093       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22094     } else {
22095       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22096       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22097       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22098       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22099     }
22100   %}
22101   ins_pipe( pipe_slow );
22102 %}
22103 
22104 instruct vcastItoX_evex(vec dst, vec src) %{
22105   predicate(UseAVX > 2 ||
22106             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22107   match(Set dst (VectorCastI2X src));
22108   format %{ "vector_cast_i2x $dst,$src\t!" %}
22109   ins_encode %{
22110     assert(UseAVX > 0, "required");
22111 
22112     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22113     int src_vlen_enc = vector_length_encoding(this, $src);
22114     int dst_vlen_enc = vector_length_encoding(this);
22115     switch (dst_elem_bt) {
22116       case T_BYTE:
22117         if (!VM_Version::supports_avx512vl()) {
22118           src_vlen_enc = Assembler::AVX_512bit;
22119         }
22120         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22121         break;
22122       case T_SHORT:
22123         if (!VM_Version::supports_avx512vl()) {
22124           src_vlen_enc = Assembler::AVX_512bit;
22125         }
22126         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22127         break;
22128       case T_FLOAT:
22129         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22130         break;
22131       case T_LONG:
22132         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133         break;
22134       case T_DOUBLE:
22135         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136         break;
22137       default:
22138         ShouldNotReachHere();
22139     }
22140   %}
22141   ins_pipe( pipe_slow );
22142 %}
22143 
22144 instruct vcastLtoBS(vec dst, vec src) %{
22145   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22146             UseAVX <= 2);
22147   match(Set dst (VectorCastL2X src));
22148   format %{ "vector_cast_l2x  $dst,$src" %}
22149   ins_encode %{
22150     assert(UseAVX > 0, "required");
22151 
22152     int vlen = Matcher::vector_length_in_bytes(this, $src);
22153     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22154     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22155                                                       : ExternalAddress(vector_int_to_short_mask());
22156     if (vlen <= 16) {
22157       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22158       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22159       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22160     } else {
22161       assert(vlen <= 32, "required");
22162       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22163       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22164       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22165       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166     }
22167     if (to_elem_bt == T_BYTE) {
22168       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22169     }
22170   %}
22171   ins_pipe( pipe_slow );
22172 %}
22173 
22174 instruct vcastLtoX_evex(vec dst, vec src) %{
22175   predicate(UseAVX > 2 ||
22176             (Matcher::vector_element_basic_type(n) == T_INT ||
22177              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22178              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22179   match(Set dst (VectorCastL2X src));
22180   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22181   ins_encode %{
22182     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22183     int vlen = Matcher::vector_length_in_bytes(this, $src);
22184     int vlen_enc = vector_length_encoding(this, $src);
22185     switch (to_elem_bt) {
22186       case T_BYTE:
22187         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22188           vlen_enc = Assembler::AVX_512bit;
22189         }
22190         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22191         break;
22192       case T_SHORT:
22193         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22194           vlen_enc = Assembler::AVX_512bit;
22195         }
22196         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22197         break;
22198       case T_INT:
22199         if (vlen == 8) {
22200           if ($dst$$XMMRegister != $src$$XMMRegister) {
22201             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22202           }
22203         } else if (vlen == 16) {
22204           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22205         } else if (vlen == 32) {
22206           if (UseAVX > 2) {
22207             if (!VM_Version::supports_avx512vl()) {
22208               vlen_enc = Assembler::AVX_512bit;
22209             }
22210             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22211           } else {
22212             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22213             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22214           }
22215         } else { // vlen == 64
22216           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22217         }
22218         break;
22219       case T_FLOAT:
22220         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22221         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22222         break;
22223       case T_DOUBLE:
22224         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22225         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22226         break;
22227 
22228       default: assert(false, "%s", type2name(to_elem_bt));
22229     }
22230   %}
22231   ins_pipe( pipe_slow );
22232 %}
22233 
22234 instruct vcastFtoD_reg(vec dst, vec src) %{
22235   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22236   match(Set dst (VectorCastF2X src));
22237   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22238   ins_encode %{
22239     int vlen_enc = vector_length_encoding(this);
22240     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22241   %}
22242   ins_pipe( pipe_slow );
22243 %}
22244 
22245 
22246 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22247   predicate(!VM_Version::supports_avx10_2() &&
22248             !VM_Version::supports_avx512vl() &&
22249             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22250             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22251             is_integral_type(Matcher::vector_element_basic_type(n)));
22252   match(Set dst (VectorCastF2X src));
22253   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22254   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22255   ins_encode %{
22256     int vlen_enc = vector_length_encoding(this, $src);
22257     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22258     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22259     // 32 bit addresses for register indirect addressing mode since stub constants
22260     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22261     // However, targets are free to increase this limit, but having a large code cache size
22262     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22263     // cap we save a temporary register allocation which in limiting case can prevent
22264     // spilling in high register pressure blocks.
22265     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22266                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22267                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22268   %}
22269   ins_pipe( pipe_slow );
22270 %}
22271 
22272 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22273   predicate(!VM_Version::supports_avx10_2() &&
22274             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22275             is_integral_type(Matcher::vector_element_basic_type(n)));
22276   match(Set dst (VectorCastF2X src));
22277   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22278   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22279   ins_encode %{
22280     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22281     if (to_elem_bt == T_LONG) {
22282       int vlen_enc = vector_length_encoding(this);
22283       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22284                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22285                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22286     } else {
22287       int vlen_enc = vector_length_encoding(this, $src);
22288       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22289                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22290                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22291     }
22292   %}
22293   ins_pipe( pipe_slow );
22294 %}
22295 
22296 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22297   predicate(VM_Version::supports_avx10_2() &&
22298             is_integral_type(Matcher::vector_element_basic_type(n)));
22299   match(Set dst (VectorCastF2X src));
22300   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22301   ins_encode %{
22302     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22303     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22304     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22305   %}
22306   ins_pipe( pipe_slow );
22307 %}
22308 
22309 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22310   predicate(VM_Version::supports_avx10_2() &&
22311             is_integral_type(Matcher::vector_element_basic_type(n)));
22312   match(Set dst (VectorCastF2X (LoadVector src)));
22313   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22314   ins_encode %{
22315     int vlen = Matcher::vector_length(this);
22316     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22317     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22318     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22319   %}
22320   ins_pipe( pipe_slow );
22321 %}
22322 
22323 instruct vcastDtoF_reg(vec dst, vec src) %{
22324   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22325   match(Set dst (VectorCastD2X src));
22326   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22327   ins_encode %{
22328     int vlen_enc = vector_length_encoding(this, $src);
22329     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22330   %}
22331   ins_pipe( pipe_slow );
22332 %}
22333 
22334 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22335   predicate(!VM_Version::supports_avx10_2() &&
22336             !VM_Version::supports_avx512vl() &&
22337             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22338             is_integral_type(Matcher::vector_element_basic_type(n)));
22339   match(Set dst (VectorCastD2X src));
22340   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22341   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22342   ins_encode %{
22343     int vlen_enc = vector_length_encoding(this, $src);
22344     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22345     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22346                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22347                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22348   %}
22349   ins_pipe( pipe_slow );
22350 %}
22351 
22352 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22353   predicate(!VM_Version::supports_avx10_2() &&
22354             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22355             is_integral_type(Matcher::vector_element_basic_type(n)));
22356   match(Set dst (VectorCastD2X src));
22357   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22358   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22359   ins_encode %{
22360     int vlen_enc = vector_length_encoding(this, $src);
22361     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22362     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22363                               ExternalAddress(vector_float_signflip());
22364     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22365                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22366   %}
22367   ins_pipe( pipe_slow );
22368 %}
22369 
22370 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22371   predicate(VM_Version::supports_avx10_2() &&
22372             is_integral_type(Matcher::vector_element_basic_type(n)));
22373   match(Set dst (VectorCastD2X src));
22374   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22375   ins_encode %{
22376     int vlen_enc = vector_length_encoding(this, $src);
22377     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22378     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22379   %}
22380   ins_pipe( pipe_slow );
22381 %}
22382 
22383 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22384   predicate(VM_Version::supports_avx10_2() &&
22385             is_integral_type(Matcher::vector_element_basic_type(n)));
22386   match(Set dst (VectorCastD2X (LoadVector src)));
22387   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22388   ins_encode %{
22389     int vlen = Matcher::vector_length(this);
22390     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22391     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22392     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22393   %}
22394   ins_pipe( pipe_slow );
22395 %}
22396 
22397 instruct vucast(vec dst, vec src) %{
22398   match(Set dst (VectorUCastB2X src));
22399   match(Set dst (VectorUCastS2X src));
22400   match(Set dst (VectorUCastI2X src));
22401   format %{ "vector_ucast $dst,$src\t!" %}
22402   ins_encode %{
22403     assert(UseAVX > 0, "required");
22404 
22405     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22406     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22407     int vlen_enc = vector_length_encoding(this);
22408     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22409   %}
22410   ins_pipe( pipe_slow );
22411 %}
22412 
22413 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22414   predicate(!VM_Version::supports_avx512vl() &&
22415             Matcher::vector_length_in_bytes(n) < 64 &&
22416             Matcher::vector_element_basic_type(n) == T_INT);
22417   match(Set dst (RoundVF src));
22418   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22419   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22420   ins_encode %{
22421     int vlen_enc = vector_length_encoding(this);
22422     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22423     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22424                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22425                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22426   %}
22427   ins_pipe( pipe_slow );
22428 %}
22429 
22430 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22431   predicate((VM_Version::supports_avx512vl() ||
22432              Matcher::vector_length_in_bytes(n) == 64) &&
22433              Matcher::vector_element_basic_type(n) == T_INT);
22434   match(Set dst (RoundVF src));
22435   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22436   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22437   ins_encode %{
22438     int vlen_enc = vector_length_encoding(this);
22439     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22440     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22441                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22442                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22443   %}
22444   ins_pipe( pipe_slow );
22445 %}
22446 
22447 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22448   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22449   match(Set dst (RoundVD src));
22450   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22451   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22452   ins_encode %{
22453     int vlen_enc = vector_length_encoding(this);
22454     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22455     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22456                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22457                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22458   %}
22459   ins_pipe( pipe_slow );
22460 %}
22461 
22462 // --------------------------------- VectorMaskCmp --------------------------------------
22463 
22464 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22465   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22466             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22467             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22468             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22469   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22470   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22471   ins_encode %{
22472     int vlen_enc = vector_length_encoding(this, $src1);
22473     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22474     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22475       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22476     } else {
22477       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22478     }
22479   %}
22480   ins_pipe( pipe_slow );
22481 %}
22482 
22483 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22484   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22485             n->bottom_type()->isa_vectmask() == nullptr &&
22486             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22487   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22488   effect(TEMP ktmp);
22489   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22490   ins_encode %{
22491     int vlen_enc = Assembler::AVX_512bit;
22492     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22493     KRegister mask = k0; // The comparison itself is not being masked.
22494     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22495       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22496       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22497     } else {
22498       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22499       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22500     }
22501   %}
22502   ins_pipe( pipe_slow );
22503 %}
22504 
22505 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22506   predicate(n->bottom_type()->isa_vectmask() &&
22507             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22508   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22509   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22510   ins_encode %{
22511     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22512     int vlen_enc = vector_length_encoding(this, $src1);
22513     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22514     KRegister mask = k0; // The comparison itself is not being masked.
22515     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22516       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22517     } else {
22518       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22519     }
22520   %}
22521   ins_pipe( pipe_slow );
22522 %}
22523 
22524 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22525   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22526             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22527             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22528             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22529             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22530             (n->in(2)->get_int() == BoolTest::eq ||
22531              n->in(2)->get_int() == BoolTest::lt ||
22532              n->in(2)->get_int() == BoolTest::gt)); // cond
22533   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22534   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22535   ins_encode %{
22536     int vlen_enc = vector_length_encoding(this, $src1);
22537     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22538     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22539     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22540   %}
22541   ins_pipe( pipe_slow );
22542 %}
22543 
22544 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22545   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22546             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22547             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22548             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22549             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22550             (n->in(2)->get_int() == BoolTest::ne ||
22551              n->in(2)->get_int() == BoolTest::le ||
22552              n->in(2)->get_int() == BoolTest::ge)); // cond
22553   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22554   effect(TEMP dst, TEMP xtmp);
22555   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22556   ins_encode %{
22557     int vlen_enc = vector_length_encoding(this, $src1);
22558     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22559     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22560     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22561   %}
22562   ins_pipe( pipe_slow );
22563 %}
22564 
22565 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22566   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22567             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22568             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22569             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22570             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22571   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22572   effect(TEMP dst, TEMP xtmp);
22573   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22574   ins_encode %{
22575     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22576     int vlen_enc = vector_length_encoding(this, $src1);
22577     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22578     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22579 
22580     if (vlen_enc == Assembler::AVX_128bit) {
22581       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22582     } else {
22583       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22584     }
22585     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22586     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22587     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22588   %}
22589   ins_pipe( pipe_slow );
22590 %}
22591 
22592 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22593   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22594              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22595              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22596   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22597   effect(TEMP ktmp);
22598   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22599   ins_encode %{
22600     assert(UseAVX > 2, "required");
22601 
22602     int vlen_enc = vector_length_encoding(this, $src1);
22603     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22604     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22605     KRegister mask = k0; // The comparison itself is not being masked.
22606     bool merge = false;
22607     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22608 
22609     switch (src1_elem_bt) {
22610       case T_INT: {
22611         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22612         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22613         break;
22614       }
22615       case T_LONG: {
22616         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22617         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22618         break;
22619       }
22620       default: assert(false, "%s", type2name(src1_elem_bt));
22621     }
22622   %}
22623   ins_pipe( pipe_slow );
22624 %}
22625 
22626 
22627 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22628   predicate(n->bottom_type()->isa_vectmask() &&
22629             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22630   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22631   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22632   ins_encode %{
22633     assert(UseAVX > 2, "required");
22634     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22635 
22636     int vlen_enc = vector_length_encoding(this, $src1);
22637     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22638     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22639     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22640 
22641     // Comparison i
22642     switch (src1_elem_bt) {
22643       case T_BYTE: {
22644         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22645         break;
22646       }
22647       case T_SHORT: {
22648         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649         break;
22650       }
22651       case T_INT: {
22652         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653         break;
22654       }
22655       case T_LONG: {
22656         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22657         break;
22658       }
22659       default: assert(false, "%s", type2name(src1_elem_bt));
22660     }
22661   %}
22662   ins_pipe( pipe_slow );
22663 %}
22664 
22665 // Extract
22666 
22667 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22668   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22669   match(Set dst (ExtractI src idx));
22670   match(Set dst (ExtractS src idx));
22671   match(Set dst (ExtractB src idx));
22672   format %{ "extractI $dst,$src,$idx\t!" %}
22673   ins_encode %{
22674     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22675 
22676     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22677     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22678   %}
22679   ins_pipe( pipe_slow );
22680 %}
22681 
22682 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22683   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22684             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22685   match(Set dst (ExtractI src idx));
22686   match(Set dst (ExtractS src idx));
22687   match(Set dst (ExtractB src idx));
22688   effect(TEMP vtmp);
22689   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22690   ins_encode %{
22691     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22692 
22693     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22694     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22695     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22696   %}
22697   ins_pipe( pipe_slow );
22698 %}
22699 
22700 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22701   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22702   match(Set dst (ExtractL src idx));
22703   format %{ "extractL $dst,$src,$idx\t!" %}
22704   ins_encode %{
22705     assert(UseSSE >= 4, "required");
22706     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22707 
22708     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22709   %}
22710   ins_pipe( pipe_slow );
22711 %}
22712 
22713 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22714   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22715             Matcher::vector_length(n->in(1)) == 8);  // src
22716   match(Set dst (ExtractL src idx));
22717   effect(TEMP vtmp);
22718   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22719   ins_encode %{
22720     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22721 
22722     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22723     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22724   %}
22725   ins_pipe( pipe_slow );
22726 %}
22727 
22728 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22729   predicate(Matcher::vector_length(n->in(1)) <= 4);
22730   match(Set dst (ExtractF src idx));
22731   effect(TEMP dst, TEMP vtmp);
22732   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22733   ins_encode %{
22734     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22735 
22736     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22737   %}
22738   ins_pipe( pipe_slow );
22739 %}
22740 
22741 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22742   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22743             Matcher::vector_length(n->in(1)/*src*/) == 16);
22744   match(Set dst (ExtractF src idx));
22745   effect(TEMP vtmp);
22746   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22747   ins_encode %{
22748     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22749 
22750     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22751     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22752   %}
22753   ins_pipe( pipe_slow );
22754 %}
22755 
22756 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22757   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22758   match(Set dst (ExtractD src idx));
22759   format %{ "extractD $dst,$src,$idx\t!" %}
22760   ins_encode %{
22761     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22762 
22763     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22764   %}
22765   ins_pipe( pipe_slow );
22766 %}
22767 
22768 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22769   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22770             Matcher::vector_length(n->in(1)) == 8);  // src
22771   match(Set dst (ExtractD src idx));
22772   effect(TEMP vtmp);
22773   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22774   ins_encode %{
22775     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22776 
22777     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22778     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22779   %}
22780   ins_pipe( pipe_slow );
22781 %}
22782 
22783 // --------------------------------- Vector Blend --------------------------------------
22784 
22785 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22786   predicate(UseAVX == 0);
22787   match(Set dst (VectorBlend (Binary dst src) mask));
22788   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22789   effect(TEMP tmp);
22790   ins_encode %{
22791     assert(UseSSE >= 4, "required");
22792 
22793     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22794       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22795     }
22796     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22797   %}
22798   ins_pipe( pipe_slow );
22799 %}
22800 
22801 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22802   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22803             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22804             Matcher::vector_length_in_bytes(n) <= 32 &&
22805             is_integral_type(Matcher::vector_element_basic_type(n)));
22806   match(Set dst (VectorBlend (Binary src1 src2) mask));
22807   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22808   ins_encode %{
22809     int vlen_enc = vector_length_encoding(this);
22810     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22811   %}
22812   ins_pipe( pipe_slow );
22813 %}
22814 
22815 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22816   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22817             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22818             Matcher::vector_length_in_bytes(n) <= 32 &&
22819             !is_integral_type(Matcher::vector_element_basic_type(n)));
22820   match(Set dst (VectorBlend (Binary src1 src2) mask));
22821   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22822   ins_encode %{
22823     int vlen_enc = vector_length_encoding(this);
22824     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22825   %}
22826   ins_pipe( pipe_slow );
22827 %}
22828 
22829 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22830   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22831             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22832             Matcher::vector_length_in_bytes(n) <= 32);
22833   match(Set dst (VectorBlend (Binary src1 src2) mask));
22834   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22835   effect(TEMP vtmp, TEMP dst);
22836   ins_encode %{
22837     int vlen_enc = vector_length_encoding(this);
22838     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22839     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22840     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22841   %}
22842   ins_pipe( pipe_slow );
22843 %}
22844 
22845 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22846   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22847             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22848   match(Set dst (VectorBlend (Binary src1 src2) mask));
22849   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22850   effect(TEMP ktmp);
22851   ins_encode %{
22852      int vlen_enc = Assembler::AVX_512bit;
22853      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22854     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22855     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22856   %}
22857   ins_pipe( pipe_slow );
22858 %}
22859 
22860 
22861 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22862   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22863             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22864              VM_Version::supports_avx512bw()));
22865   match(Set dst (VectorBlend (Binary src1 src2) mask));
22866   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22867   ins_encode %{
22868     int vlen_enc = vector_length_encoding(this);
22869     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22870     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22871   %}
22872   ins_pipe( pipe_slow );
22873 %}
22874 
22875 // --------------------------------- ABS --------------------------------------
22876 // a = |a|
22877 instruct vabsB_reg(vec dst, vec src) %{
22878   match(Set dst (AbsVB  src));
22879   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22880   ins_encode %{
22881     uint vlen = Matcher::vector_length(this);
22882     if (vlen <= 16) {
22883       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22884     } else {
22885       int vlen_enc = vector_length_encoding(this);
22886       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22887     }
22888   %}
22889   ins_pipe( pipe_slow );
22890 %}
22891 
22892 instruct vabsS_reg(vec dst, vec src) %{
22893   match(Set dst (AbsVS  src));
22894   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22895   ins_encode %{
22896     uint vlen = Matcher::vector_length(this);
22897     if (vlen <= 8) {
22898       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22899     } else {
22900       int vlen_enc = vector_length_encoding(this);
22901       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22902     }
22903   %}
22904   ins_pipe( pipe_slow );
22905 %}
22906 
22907 instruct vabsI_reg(vec dst, vec src) %{
22908   match(Set dst (AbsVI  src));
22909   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22910   ins_encode %{
22911     uint vlen = Matcher::vector_length(this);
22912     if (vlen <= 4) {
22913       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22914     } else {
22915       int vlen_enc = vector_length_encoding(this);
22916       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22917     }
22918   %}
22919   ins_pipe( pipe_slow );
22920 %}
22921 
22922 instruct vabsL_reg(vec dst, vec src) %{
22923   match(Set dst (AbsVL  src));
22924   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22925   ins_encode %{
22926     assert(UseAVX > 2, "required");
22927     int vlen_enc = vector_length_encoding(this);
22928     if (!VM_Version::supports_avx512vl()) {
22929       vlen_enc = Assembler::AVX_512bit;
22930     }
22931     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22932   %}
22933   ins_pipe( pipe_slow );
22934 %}
22935 
22936 // --------------------------------- ABSNEG --------------------------------------
22937 
22938 instruct vabsnegF(vec dst, vec src) %{
22939   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22940   match(Set dst (AbsVF src));
22941   match(Set dst (NegVF src));
22942   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22943   ins_cost(150);
22944   ins_encode %{
22945     int opcode = this->ideal_Opcode();
22946     int vlen = Matcher::vector_length(this);
22947     if (vlen == 2) {
22948       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22949     } else {
22950       assert(vlen == 8 || vlen == 16, "required");
22951       int vlen_enc = vector_length_encoding(this);
22952       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22953     }
22954   %}
22955   ins_pipe( pipe_slow );
22956 %}
22957 
22958 instruct vabsneg4F(vec dst) %{
22959   predicate(Matcher::vector_length(n) == 4);
22960   match(Set dst (AbsVF dst));
22961   match(Set dst (NegVF dst));
22962   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22963   ins_cost(150);
22964   ins_encode %{
22965     int opcode = this->ideal_Opcode();
22966     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22967   %}
22968   ins_pipe( pipe_slow );
22969 %}
22970 
22971 instruct vabsnegD(vec dst, vec src) %{
22972   match(Set dst (AbsVD  src));
22973   match(Set dst (NegVD  src));
22974   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22975   ins_encode %{
22976     int opcode = this->ideal_Opcode();
22977     uint vlen = Matcher::vector_length(this);
22978     if (vlen == 2) {
22979       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22980     } else {
22981       int vlen_enc = vector_length_encoding(this);
22982       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22983     }
22984   %}
22985   ins_pipe( pipe_slow );
22986 %}
22987 
22988 //------------------------------------- VectorTest --------------------------------------------
22989 
22990 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22991   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22992   match(Set cr (VectorTest src1 src2));
22993   effect(TEMP vtmp);
22994   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22995   ins_encode %{
22996     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22997     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22998     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22999   %}
23000   ins_pipe( pipe_slow );
23001 %}
23002 
23003 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23004   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23005   match(Set cr (VectorTest src1 src2));
23006   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23007   ins_encode %{
23008     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23009     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23010     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23011   %}
23012   ins_pipe( pipe_slow );
23013 %}
23014 
23015 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23016   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23017              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23018             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23019   match(Set cr (VectorTest src1 src2));
23020   effect(TEMP tmp);
23021   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23022   ins_encode %{
23023     uint masklen = Matcher::vector_length(this, $src1);
23024     __ kmovwl($tmp$$Register, $src1$$KRegister);
23025     __ andl($tmp$$Register, (1 << masklen) - 1);
23026     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23027   %}
23028   ins_pipe( pipe_slow );
23029 %}
23030 
23031 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23032   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23033              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23034             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23035   match(Set cr (VectorTest src1 src2));
23036   effect(TEMP tmp);
23037   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23038   ins_encode %{
23039     uint masklen = Matcher::vector_length(this, $src1);
23040     __ kmovwl($tmp$$Register, $src1$$KRegister);
23041     __ andl($tmp$$Register, (1 << masklen) - 1);
23042   %}
23043   ins_pipe( pipe_slow );
23044 %}
23045 
23046 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23047   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23048             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23049   match(Set cr (VectorTest src1 src2));
23050   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23051   ins_encode %{
23052     uint masklen = Matcher::vector_length(this, $src1);
23053     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23054   %}
23055   ins_pipe( pipe_slow );
23056 %}
23057 
23058 //------------------------------------- LoadMask --------------------------------------------
23059 
23060 instruct loadMask(legVec dst, legVec src) %{
23061   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23062   match(Set dst (VectorLoadMask src));
23063   effect(TEMP dst);
23064   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23065   ins_encode %{
23066     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23067     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23068     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23069   %}
23070   ins_pipe( pipe_slow );
23071 %}
23072 
23073 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23074   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23075   match(Set dst (VectorLoadMask src));
23076   effect(TEMP xtmp);
23077   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23078   ins_encode %{
23079     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23080                         true, Assembler::AVX_512bit);
23081   %}
23082   ins_pipe( pipe_slow );
23083 %}
23084 
23085 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23086   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23087   match(Set dst (VectorLoadMask src));
23088   effect(TEMP xtmp);
23089   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23090   ins_encode %{
23091     int vlen_enc = vector_length_encoding(in(1));
23092     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23093                         false, vlen_enc);
23094   %}
23095   ins_pipe( pipe_slow );
23096 %}
23097 
23098 //------------------------------------- StoreMask --------------------------------------------
23099 
23100 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23101   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23102   match(Set dst (VectorStoreMask src size));
23103   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23104   ins_encode %{
23105     int vlen = Matcher::vector_length(this);
23106     if (vlen <= 16 && UseAVX <= 2) {
23107       assert(UseSSE >= 3, "required");
23108       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23109     } else {
23110       assert(UseAVX > 0, "required");
23111       int src_vlen_enc = vector_length_encoding(this, $src);
23112       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23113     }
23114   %}
23115   ins_pipe( pipe_slow );
23116 %}
23117 
23118 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23119   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23120   match(Set dst (VectorStoreMask src size));
23121   effect(TEMP_DEF dst, TEMP xtmp);
23122   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23123   ins_encode %{
23124     int vlen_enc = Assembler::AVX_128bit;
23125     int vlen = Matcher::vector_length(this);
23126     if (vlen <= 8) {
23127       assert(UseSSE >= 3, "required");
23128       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23129       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23130       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23131     } else {
23132       assert(UseAVX > 0, "required");
23133       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23134       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23135       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23136     }
23137   %}
23138   ins_pipe( pipe_slow );
23139 %}
23140 
23141 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23142   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23143   match(Set dst (VectorStoreMask src size));
23144   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23145   effect(TEMP_DEF dst, TEMP xtmp);
23146   ins_encode %{
23147     int vlen_enc = Assembler::AVX_128bit;
23148     int vlen = Matcher::vector_length(this);
23149     if (vlen <= 4) {
23150       assert(UseSSE >= 3, "required");
23151       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23152       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23153       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23154       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23155     } else {
23156       assert(UseAVX > 0, "required");
23157       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23158       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23159       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23160       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23161       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23162     }
23163   %}
23164   ins_pipe( pipe_slow );
23165 %}
23166 
23167 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23168   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23169   match(Set dst (VectorStoreMask src size));
23170   effect(TEMP_DEF dst, TEMP xtmp);
23171   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23172   ins_encode %{
23173     assert(UseSSE >= 3, "required");
23174     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23175     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23176     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23177     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23178     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23179   %}
23180   ins_pipe( pipe_slow );
23181 %}
23182 
23183 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23184   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23185   match(Set dst (VectorStoreMask src size));
23186   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23187   effect(TEMP_DEF dst, TEMP vtmp);
23188   ins_encode %{
23189     int vlen_enc = Assembler::AVX_128bit;
23190     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23191     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23192     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23193     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23194     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23195     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23196     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23197   %}
23198   ins_pipe( pipe_slow );
23199 %}
23200 
23201 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23202   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23203   match(Set dst (VectorStoreMask src size));
23204   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23205   ins_encode %{
23206     int src_vlen_enc = vector_length_encoding(this, $src);
23207     int dst_vlen_enc = vector_length_encoding(this);
23208     if (!VM_Version::supports_avx512vl()) {
23209       src_vlen_enc = Assembler::AVX_512bit;
23210     }
23211     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23212     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23213   %}
23214   ins_pipe( pipe_slow );
23215 %}
23216 
23217 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23218   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23219   match(Set dst (VectorStoreMask src size));
23220   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23221   ins_encode %{
23222     int src_vlen_enc = vector_length_encoding(this, $src);
23223     int dst_vlen_enc = vector_length_encoding(this);
23224     if (!VM_Version::supports_avx512vl()) {
23225       src_vlen_enc = Assembler::AVX_512bit;
23226     }
23227     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23228     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23229   %}
23230   ins_pipe( pipe_slow );
23231 %}
23232 
23233 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23234   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23235   match(Set dst (VectorStoreMask mask size));
23236   effect(TEMP_DEF dst);
23237   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23238   ins_encode %{
23239     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23240     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23241                  false, Assembler::AVX_512bit, noreg);
23242     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23243   %}
23244   ins_pipe( pipe_slow );
23245 %}
23246 
23247 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23248   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23249   match(Set dst (VectorStoreMask mask size));
23250   effect(TEMP_DEF dst);
23251   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23252   ins_encode %{
23253     int dst_vlen_enc = vector_length_encoding(this);
23254     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23255     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23256   %}
23257   ins_pipe( pipe_slow );
23258 %}
23259 
23260 instruct vmaskcast_evex(kReg dst) %{
23261   match(Set dst (VectorMaskCast dst));
23262   ins_cost(0);
23263   format %{ "vector_mask_cast $dst" %}
23264   ins_encode %{
23265     // empty
23266   %}
23267   ins_pipe(empty);
23268 %}
23269 
23270 instruct vmaskcast(vec dst) %{
23271   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23272   match(Set dst (VectorMaskCast dst));
23273   ins_cost(0);
23274   format %{ "vector_mask_cast $dst" %}
23275   ins_encode %{
23276     // empty
23277   %}
23278   ins_pipe(empty);
23279 %}
23280 
23281 instruct vmaskcast_avx(vec dst, vec src) %{
23282   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23283   match(Set dst (VectorMaskCast src));
23284   format %{ "vector_mask_cast $dst, $src" %}
23285   ins_encode %{
23286     int vlen = Matcher::vector_length(this);
23287     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23288     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23289     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23290   %}
23291   ins_pipe(pipe_slow);
23292 %}
23293 
23294 //-------------------------------- Load Iota Indices ----------------------------------
23295 
23296 instruct loadIotaIndices(vec dst, immI_0 src) %{
23297   match(Set dst (VectorLoadConst src));
23298   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23299   ins_encode %{
23300      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23301      BasicType bt = Matcher::vector_element_basic_type(this);
23302      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23303   %}
23304   ins_pipe( pipe_slow );
23305 %}
23306 
23307 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23308   match(Set dst (PopulateIndex src1 src2));
23309   effect(TEMP dst, TEMP vtmp);
23310   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23311   ins_encode %{
23312      assert($src2$$constant == 1, "required");
23313      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23314      int vlen_enc = vector_length_encoding(this);
23315      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23316      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23317      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23318      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23319   %}
23320   ins_pipe( pipe_slow );
23321 %}
23322 
23323 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23324   match(Set dst (PopulateIndex src1 src2));
23325   effect(TEMP dst, TEMP vtmp);
23326   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23327   ins_encode %{
23328      assert($src2$$constant == 1, "required");
23329      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23330      int vlen_enc = vector_length_encoding(this);
23331      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23332      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23333      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23334      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23335   %}
23336   ins_pipe( pipe_slow );
23337 %}
23338 
23339 //-------------------------------- Rearrange ----------------------------------
23340 
23341 // LoadShuffle/Rearrange for Byte
23342 instruct rearrangeB(vec dst, vec shuffle) %{
23343   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23344             Matcher::vector_length(n) < 32);
23345   match(Set dst (VectorRearrange dst shuffle));
23346   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23347   ins_encode %{
23348     assert(UseSSE >= 4, "required");
23349     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23350   %}
23351   ins_pipe( pipe_slow );
23352 %}
23353 
23354 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23355   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23356             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23357   match(Set dst (VectorRearrange src shuffle));
23358   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23359   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23360   ins_encode %{
23361     assert(UseAVX >= 2, "required");
23362     // Swap src into vtmp1
23363     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23364     // Shuffle swapped src to get entries from other 128 bit lane
23365     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23366     // Shuffle original src to get entries from self 128 bit lane
23367     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23368     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23369     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23370     // Perform the blend
23371     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23372   %}
23373   ins_pipe( pipe_slow );
23374 %}
23375 
23376 
23377 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23378   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23379             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23380   match(Set dst (VectorRearrange src shuffle));
23381   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23382   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23383   ins_encode %{
23384     int vlen_enc = vector_length_encoding(this);
23385     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23386                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23387                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23388   %}
23389   ins_pipe( pipe_slow );
23390 %}
23391 
23392 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23393   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23394             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23395   match(Set dst (VectorRearrange src shuffle));
23396   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23397   ins_encode %{
23398     int vlen_enc = vector_length_encoding(this);
23399     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23400   %}
23401   ins_pipe( pipe_slow );
23402 %}
23403 
23404 // LoadShuffle/Rearrange for Short
23405 
23406 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23407   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23408             !VM_Version::supports_avx512bw());
23409   match(Set dst (VectorLoadShuffle src));
23410   effect(TEMP dst, TEMP vtmp);
23411   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23412   ins_encode %{
23413     // Create a byte shuffle mask from short shuffle mask
23414     // only byte shuffle instruction available on these platforms
23415     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23416     if (UseAVX == 0) {
23417       assert(vlen_in_bytes <= 16, "required");
23418       // Multiply each shuffle by two to get byte index
23419       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23420       __ psllw($vtmp$$XMMRegister, 1);
23421 
23422       // Duplicate to create 2 copies of byte index
23423       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23424       __ psllw($dst$$XMMRegister, 8);
23425       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23426 
23427       // Add one to get alternate byte index
23428       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23429       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23430     } else {
23431       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23432       int vlen_enc = vector_length_encoding(this);
23433       // Multiply each shuffle by two to get byte index
23434       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23435 
23436       // Duplicate to create 2 copies of byte index
23437       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23438       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23439 
23440       // Add one to get alternate byte index
23441       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23442     }
23443   %}
23444   ins_pipe( pipe_slow );
23445 %}
23446 
23447 instruct rearrangeS(vec dst, vec shuffle) %{
23448   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23449             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23450   match(Set dst (VectorRearrange dst shuffle));
23451   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23452   ins_encode %{
23453     assert(UseSSE >= 4, "required");
23454     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23455   %}
23456   ins_pipe( pipe_slow );
23457 %}
23458 
23459 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23460   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23461             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23462   match(Set dst (VectorRearrange src shuffle));
23463   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23464   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23465   ins_encode %{
23466     assert(UseAVX >= 2, "required");
23467     // Swap src into vtmp1
23468     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23469     // Shuffle swapped src to get entries from other 128 bit lane
23470     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23471     // Shuffle original src to get entries from self 128 bit lane
23472     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23473     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23474     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23475     // Perform the blend
23476     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23477   %}
23478   ins_pipe( pipe_slow );
23479 %}
23480 
23481 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23482   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23483             VM_Version::supports_avx512bw());
23484   match(Set dst (VectorRearrange src shuffle));
23485   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23486   ins_encode %{
23487     int vlen_enc = vector_length_encoding(this);
23488     if (!VM_Version::supports_avx512vl()) {
23489       vlen_enc = Assembler::AVX_512bit;
23490     }
23491     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23492   %}
23493   ins_pipe( pipe_slow );
23494 %}
23495 
23496 // LoadShuffle/Rearrange for Integer and Float
23497 
23498 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23499   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23500             Matcher::vector_length(n) == 4 && UseAVX == 0);
23501   match(Set dst (VectorLoadShuffle src));
23502   effect(TEMP dst, TEMP vtmp);
23503   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23504   ins_encode %{
23505     assert(UseSSE >= 4, "required");
23506 
23507     // Create a byte shuffle mask from int shuffle mask
23508     // only byte shuffle instruction available on these platforms
23509 
23510     // Duplicate and multiply each shuffle by 4
23511     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23512     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23513     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23514     __ psllw($vtmp$$XMMRegister, 2);
23515 
23516     // Duplicate again to create 4 copies of byte index
23517     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23518     __ psllw($dst$$XMMRegister, 8);
23519     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23520 
23521     // Add 3,2,1,0 to get alternate byte index
23522     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23523     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23524   %}
23525   ins_pipe( pipe_slow );
23526 %}
23527 
23528 instruct rearrangeI(vec dst, vec shuffle) %{
23529   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23530             UseAVX == 0);
23531   match(Set dst (VectorRearrange dst shuffle));
23532   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23533   ins_encode %{
23534     assert(UseSSE >= 4, "required");
23535     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23536   %}
23537   ins_pipe( pipe_slow );
23538 %}
23539 
23540 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23541   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23542             UseAVX > 0);
23543   match(Set dst (VectorRearrange src shuffle));
23544   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23545   ins_encode %{
23546     int vlen_enc = vector_length_encoding(this);
23547     BasicType bt = Matcher::vector_element_basic_type(this);
23548     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23549   %}
23550   ins_pipe( pipe_slow );
23551 %}
23552 
23553 // LoadShuffle/Rearrange for Long and Double
23554 
23555 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23556   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23557             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23558   match(Set dst (VectorLoadShuffle src));
23559   effect(TEMP dst, TEMP vtmp);
23560   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23561   ins_encode %{
23562     assert(UseAVX >= 2, "required");
23563 
23564     int vlen_enc = vector_length_encoding(this);
23565     // Create a double word shuffle mask from long shuffle mask
23566     // only double word shuffle instruction available on these platforms
23567 
23568     // Multiply each shuffle by two to get double word index
23569     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23570 
23571     // Duplicate each double word shuffle
23572     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23573     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23574 
23575     // Add one to get alternate double word index
23576     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23577   %}
23578   ins_pipe( pipe_slow );
23579 %}
23580 
23581 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23582   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23583             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23584   match(Set dst (VectorRearrange src shuffle));
23585   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23586   ins_encode %{
23587     assert(UseAVX >= 2, "required");
23588 
23589     int vlen_enc = vector_length_encoding(this);
23590     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23591   %}
23592   ins_pipe( pipe_slow );
23593 %}
23594 
23595 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23596   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23597             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23598   match(Set dst (VectorRearrange src shuffle));
23599   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23600   ins_encode %{
23601     assert(UseAVX > 2, "required");
23602 
23603     int vlen_enc = vector_length_encoding(this);
23604     if (vlen_enc == Assembler::AVX_128bit) {
23605       vlen_enc = Assembler::AVX_256bit;
23606     }
23607     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23608   %}
23609   ins_pipe( pipe_slow );
23610 %}
23611 
23612 // --------------------------------- FMA --------------------------------------
23613 // a * b + c
23614 
23615 instruct vfmaF_reg(vec a, vec b, vec c) %{
23616   match(Set c (FmaVF  c (Binary a b)));
23617   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23618   ins_cost(150);
23619   ins_encode %{
23620     assert(UseFMA, "not enabled");
23621     int vlen_enc = vector_length_encoding(this);
23622     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23623   %}
23624   ins_pipe( pipe_slow );
23625 %}
23626 
23627 instruct vfmaF_mem(vec a, memory b, vec c) %{
23628   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23629   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23630   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23631   ins_cost(150);
23632   ins_encode %{
23633     assert(UseFMA, "not enabled");
23634     int vlen_enc = vector_length_encoding(this);
23635     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23636   %}
23637   ins_pipe( pipe_slow );
23638 %}
23639 
23640 instruct vfmaD_reg(vec a, vec b, vec c) %{
23641   match(Set c (FmaVD  c (Binary a b)));
23642   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23643   ins_cost(150);
23644   ins_encode %{
23645     assert(UseFMA, "not enabled");
23646     int vlen_enc = vector_length_encoding(this);
23647     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23648   %}
23649   ins_pipe( pipe_slow );
23650 %}
23651 
23652 instruct vfmaD_mem(vec a, memory b, vec c) %{
23653   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23654   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23655   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23656   ins_cost(150);
23657   ins_encode %{
23658     assert(UseFMA, "not enabled");
23659     int vlen_enc = vector_length_encoding(this);
23660     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23661   %}
23662   ins_pipe( pipe_slow );
23663 %}
23664 
23665 // --------------------------------- Vector Multiply Add --------------------------------------
23666 
23667 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23668   predicate(UseAVX == 0);
23669   match(Set dst (MulAddVS2VI dst src1));
23670   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23671   ins_encode %{
23672     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23673   %}
23674   ins_pipe( pipe_slow );
23675 %}
23676 
23677 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23678   predicate(UseAVX > 0);
23679   match(Set dst (MulAddVS2VI src1 src2));
23680   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23681   ins_encode %{
23682     int vlen_enc = vector_length_encoding(this);
23683     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23684   %}
23685   ins_pipe( pipe_slow );
23686 %}
23687 
23688 // --------------------------------- Vector Multiply Add Add ----------------------------------
23689 
23690 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23691   predicate(VM_Version::supports_avx512_vnni());
23692   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23693   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23694   ins_encode %{
23695     assert(UseAVX > 2, "required");
23696     int vlen_enc = vector_length_encoding(this);
23697     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23698   %}
23699   ins_pipe( pipe_slow );
23700   ins_cost(10);
23701 %}
23702 
23703 // --------------------------------- PopCount --------------------------------------
23704 
23705 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23706   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23707   match(Set dst (PopCountVI src));
23708   match(Set dst (PopCountVL src));
23709   format %{ "vector_popcount_integral $dst, $src" %}
23710   ins_encode %{
23711     int opcode = this->ideal_Opcode();
23712     int vlen_enc = vector_length_encoding(this, $src);
23713     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23714     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23715   %}
23716   ins_pipe( pipe_slow );
23717 %}
23718 
23719 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23720   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23721   match(Set dst (PopCountVI src mask));
23722   match(Set dst (PopCountVL src mask));
23723   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23724   ins_encode %{
23725     int vlen_enc = vector_length_encoding(this, $src);
23726     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23727     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23728     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23729   %}
23730   ins_pipe( pipe_slow );
23731 %}
23732 
23733 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23734   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23735   match(Set dst (PopCountVI src));
23736   match(Set dst (PopCountVL src));
23737   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23738   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23739   ins_encode %{
23740     int opcode = this->ideal_Opcode();
23741     int vlen_enc = vector_length_encoding(this, $src);
23742     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23743     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23744                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23745   %}
23746   ins_pipe( pipe_slow );
23747 %}
23748 
23749 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23750 
23751 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23752   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23753                                               Matcher::vector_length_in_bytes(n->in(1))));
23754   match(Set dst (CountTrailingZerosV src));
23755   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23756   ins_cost(400);
23757   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23758   ins_encode %{
23759     int vlen_enc = vector_length_encoding(this, $src);
23760     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23761     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23762                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23763   %}
23764   ins_pipe( pipe_slow );
23765 %}
23766 
23767 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23768   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23769             VM_Version::supports_avx512cd() &&
23770             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23771   match(Set dst (CountTrailingZerosV src));
23772   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23773   ins_cost(400);
23774   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23775   ins_encode %{
23776     int vlen_enc = vector_length_encoding(this, $src);
23777     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23778     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23779                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23780   %}
23781   ins_pipe( pipe_slow );
23782 %}
23783 
23784 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23785   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23786   match(Set dst (CountTrailingZerosV src));
23787   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23788   ins_cost(400);
23789   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23790   ins_encode %{
23791     int vlen_enc = vector_length_encoding(this, $src);
23792     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23793     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23794                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23795                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23796   %}
23797   ins_pipe( pipe_slow );
23798 %}
23799 
23800 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23801   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23802   match(Set dst (CountTrailingZerosV src));
23803   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23804   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23805   ins_encode %{
23806     int vlen_enc = vector_length_encoding(this, $src);
23807     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23808     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23809                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23810   %}
23811   ins_pipe( pipe_slow );
23812 %}
23813 
23814 
23815 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23816 
23817 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23818   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23819   effect(TEMP dst);
23820   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23821   ins_encode %{
23822     int vector_len = vector_length_encoding(this);
23823     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23824   %}
23825   ins_pipe( pipe_slow );
23826 %}
23827 
23828 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23829   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23830   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23831   effect(TEMP dst);
23832   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23833   ins_encode %{
23834     int vector_len = vector_length_encoding(this);
23835     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23836   %}
23837   ins_pipe( pipe_slow );
23838 %}
23839 
23840 // --------------------------------- Rotation Operations ----------------------------------
23841 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23842   match(Set dst (RotateLeftV src shift));
23843   match(Set dst (RotateRightV src shift));
23844   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23845   ins_encode %{
23846     int opcode      = this->ideal_Opcode();
23847     int vector_len  = vector_length_encoding(this);
23848     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23849     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23850   %}
23851   ins_pipe( pipe_slow );
23852 %}
23853 
23854 instruct vprorate(vec dst, vec src, vec shift) %{
23855   match(Set dst (RotateLeftV src shift));
23856   match(Set dst (RotateRightV src shift));
23857   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23858   ins_encode %{
23859     int opcode      = this->ideal_Opcode();
23860     int vector_len  = vector_length_encoding(this);
23861     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23862     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23863   %}
23864   ins_pipe( pipe_slow );
23865 %}
23866 
23867 // ---------------------------------- Masked Operations ------------------------------------
23868 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23869   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23870   match(Set dst (LoadVectorMasked mem mask));
23871   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23872   ins_encode %{
23873     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23874     int vlen_enc = vector_length_encoding(this);
23875     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23876   %}
23877   ins_pipe( pipe_slow );
23878 %}
23879 
23880 
23881 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23882   predicate(n->in(3)->bottom_type()->isa_vectmask());
23883   match(Set dst (LoadVectorMasked mem mask));
23884   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23885   ins_encode %{
23886     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23887     int vector_len = vector_length_encoding(this);
23888     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23889   %}
23890   ins_pipe( pipe_slow );
23891 %}
23892 
23893 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23894   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23895   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23896   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23897   ins_encode %{
23898     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23899     int vlen_enc = vector_length_encoding(src_node);
23900     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23901     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23902   %}
23903   ins_pipe( pipe_slow );
23904 %}
23905 
23906 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23907   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23908   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23909   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23910   ins_encode %{
23911     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23912     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23913     int vlen_enc = vector_length_encoding(src_node);
23914     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23915   %}
23916   ins_pipe( pipe_slow );
23917 %}
23918 
23919 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23920   match(Set addr (VerifyVectorAlignment addr mask));
23921   effect(KILL cr);
23922   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23923   ins_encode %{
23924     Label Lskip;
23925     // check if masked bits of addr are zero
23926     __ testq($addr$$Register, $mask$$constant);
23927     __ jccb(Assembler::equal, Lskip);
23928     __ stop("verify_vector_alignment found a misaligned vector memory access");
23929     __ bind(Lskip);
23930   %}
23931   ins_pipe(pipe_slow);
23932 %}
23933 
23934 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23935   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23936   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23937   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23938   ins_encode %{
23939     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23940     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23941 
23942     Label DONE;
23943     int vlen_enc = vector_length_encoding(this, $src1);
23944     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23945 
23946     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23947     __ mov64($dst$$Register, -1L);
23948     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23949     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23950     __ jccb(Assembler::carrySet, DONE);
23951     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23952     __ notq($dst$$Register);
23953     __ tzcntq($dst$$Register, $dst$$Register);
23954     __ bind(DONE);
23955   %}
23956   ins_pipe( pipe_slow );
23957 %}
23958 
23959 
23960 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23961   match(Set dst (VectorMaskGen len));
23962   effect(TEMP temp, KILL cr);
23963   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23964   ins_encode %{
23965     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23966   %}
23967   ins_pipe( pipe_slow );
23968 %}
23969 
23970 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23971   match(Set dst (VectorMaskGen len));
23972   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23973   effect(TEMP temp);
23974   ins_encode %{
23975     if ($len$$constant > 0) {
23976       __ mov64($temp$$Register, right_n_bits($len$$constant));
23977       __ kmovql($dst$$KRegister, $temp$$Register);
23978     } else {
23979       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23980     }
23981   %}
23982   ins_pipe( pipe_slow );
23983 %}
23984 
23985 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23986   predicate(n->in(1)->bottom_type()->isa_vectmask());
23987   match(Set dst (VectorMaskToLong mask));
23988   effect(TEMP dst, KILL cr);
23989   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23990   ins_encode %{
23991     int opcode = this->ideal_Opcode();
23992     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23993     int mask_len = Matcher::vector_length(this, $mask);
23994     int mask_size = mask_len * type2aelembytes(mbt);
23995     int vlen_enc = vector_length_encoding(this, $mask);
23996     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23997                              $dst$$Register, mask_len, mask_size, vlen_enc);
23998   %}
23999   ins_pipe( pipe_slow );
24000 %}
24001 
24002 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24003   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24004   match(Set dst (VectorMaskToLong mask));
24005   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24006   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24007   ins_encode %{
24008     int opcode = this->ideal_Opcode();
24009     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24010     int mask_len = Matcher::vector_length(this, $mask);
24011     int vlen_enc = vector_length_encoding(this, $mask);
24012     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24013                              $dst$$Register, mask_len, mbt, vlen_enc);
24014   %}
24015   ins_pipe( pipe_slow );
24016 %}
24017 
24018 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24019   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24020   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24021   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24022   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24023   ins_encode %{
24024     int opcode = this->ideal_Opcode();
24025     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24026     int mask_len = Matcher::vector_length(this, $mask);
24027     int vlen_enc = vector_length_encoding(this, $mask);
24028     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24029                              $dst$$Register, mask_len, mbt, vlen_enc);
24030   %}
24031   ins_pipe( pipe_slow );
24032 %}
24033 
24034 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24035   predicate(n->in(1)->bottom_type()->isa_vectmask());
24036   match(Set dst (VectorMaskTrueCount mask));
24037   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24038   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24039   ins_encode %{
24040     int opcode = this->ideal_Opcode();
24041     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24042     int mask_len = Matcher::vector_length(this, $mask);
24043     int mask_size = mask_len * type2aelembytes(mbt);
24044     int vlen_enc = vector_length_encoding(this, $mask);
24045     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24046                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24047   %}
24048   ins_pipe( pipe_slow );
24049 %}
24050 
24051 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24052   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24053   match(Set dst (VectorMaskTrueCount mask));
24054   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24055   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24056   ins_encode %{
24057     int opcode = this->ideal_Opcode();
24058     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24059     int mask_len = Matcher::vector_length(this, $mask);
24060     int vlen_enc = vector_length_encoding(this, $mask);
24061     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24062                              $tmp$$Register, mask_len, mbt, vlen_enc);
24063   %}
24064   ins_pipe( pipe_slow );
24065 %}
24066 
24067 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24068   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24069   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24070   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24071   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24072   ins_encode %{
24073     int opcode = this->ideal_Opcode();
24074     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24075     int mask_len = Matcher::vector_length(this, $mask);
24076     int vlen_enc = vector_length_encoding(this, $mask);
24077     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24078                              $tmp$$Register, mask_len, mbt, vlen_enc);
24079   %}
24080   ins_pipe( pipe_slow );
24081 %}
24082 
24083 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24084   predicate(n->in(1)->bottom_type()->isa_vectmask());
24085   match(Set dst (VectorMaskFirstTrue mask));
24086   match(Set dst (VectorMaskLastTrue mask));
24087   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24088   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24089   ins_encode %{
24090     int opcode = this->ideal_Opcode();
24091     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24092     int mask_len = Matcher::vector_length(this, $mask);
24093     int mask_size = mask_len * type2aelembytes(mbt);
24094     int vlen_enc = vector_length_encoding(this, $mask);
24095     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24096                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24097   %}
24098   ins_pipe( pipe_slow );
24099 %}
24100 
24101 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24102   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24103   match(Set dst (VectorMaskFirstTrue mask));
24104   match(Set dst (VectorMaskLastTrue mask));
24105   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24106   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24107   ins_encode %{
24108     int opcode = this->ideal_Opcode();
24109     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24110     int mask_len = Matcher::vector_length(this, $mask);
24111     int vlen_enc = vector_length_encoding(this, $mask);
24112     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24113                              $tmp$$Register, mask_len, mbt, vlen_enc);
24114   %}
24115   ins_pipe( pipe_slow );
24116 %}
24117 
24118 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24119   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24120   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24121   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24122   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24123   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24124   ins_encode %{
24125     int opcode = this->ideal_Opcode();
24126     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24127     int mask_len = Matcher::vector_length(this, $mask);
24128     int vlen_enc = vector_length_encoding(this, $mask);
24129     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24130                              $tmp$$Register, mask_len, mbt, vlen_enc);
24131   %}
24132   ins_pipe( pipe_slow );
24133 %}
24134 
24135 // --------------------------------- Compress/Expand Operations ---------------------------
24136 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24137   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24138   match(Set dst (CompressV src mask));
24139   match(Set dst (ExpandV src mask));
24140   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24141   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24142   ins_encode %{
24143     int opcode = this->ideal_Opcode();
24144     int vlen_enc = vector_length_encoding(this);
24145     BasicType bt  = Matcher::vector_element_basic_type(this);
24146     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24147                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24148   %}
24149   ins_pipe( pipe_slow );
24150 %}
24151 
24152 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24153   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24154   match(Set dst (CompressV src mask));
24155   match(Set dst (ExpandV src mask));
24156   format %{ "vector_compress_expand $dst, $src, $mask" %}
24157   ins_encode %{
24158     int opcode = this->ideal_Opcode();
24159     int vector_len = vector_length_encoding(this);
24160     BasicType bt  = Matcher::vector_element_basic_type(this);
24161     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24162   %}
24163   ins_pipe( pipe_slow );
24164 %}
24165 
24166 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24167   match(Set dst (CompressM mask));
24168   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24169   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24170   ins_encode %{
24171     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24172     int mask_len = Matcher::vector_length(this);
24173     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24174   %}
24175   ins_pipe( pipe_slow );
24176 %}
24177 
24178 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24179 
24180 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24181   predicate(!VM_Version::supports_gfni());
24182   match(Set dst (ReverseV src));
24183   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24184   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24185   ins_encode %{
24186     int vec_enc = vector_length_encoding(this);
24187     BasicType bt = Matcher::vector_element_basic_type(this);
24188     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24189                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24190   %}
24191   ins_pipe( pipe_slow );
24192 %}
24193 
24194 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24195   predicate(VM_Version::supports_gfni());
24196   match(Set dst (ReverseV src));
24197   effect(TEMP dst, TEMP xtmp);
24198   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24199   ins_encode %{
24200     int vec_enc = vector_length_encoding(this);
24201     BasicType bt  = Matcher::vector_element_basic_type(this);
24202     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24203     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24204                                $xtmp$$XMMRegister);
24205   %}
24206   ins_pipe( pipe_slow );
24207 %}
24208 
24209 instruct vreverse_byte_reg(vec dst, vec src) %{
24210   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24211   match(Set dst (ReverseBytesV src));
24212   effect(TEMP dst);
24213   format %{ "vector_reverse_byte $dst, $src" %}
24214   ins_encode %{
24215     int vec_enc = vector_length_encoding(this);
24216     BasicType bt = Matcher::vector_element_basic_type(this);
24217     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24218   %}
24219   ins_pipe( pipe_slow );
24220 %}
24221 
24222 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24223   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24224   match(Set dst (ReverseBytesV src));
24225   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24226   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24227   ins_encode %{
24228     int vec_enc = vector_length_encoding(this);
24229     BasicType bt = Matcher::vector_element_basic_type(this);
24230     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24231                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24232   %}
24233   ins_pipe( pipe_slow );
24234 %}
24235 
24236 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24237 
24238 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24239   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24240                                               Matcher::vector_length_in_bytes(n->in(1))));
24241   match(Set dst (CountLeadingZerosV src));
24242   format %{ "vector_count_leading_zeros $dst, $src" %}
24243   ins_encode %{
24244      int vlen_enc = vector_length_encoding(this, $src);
24245      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24246      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24247                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24248   %}
24249   ins_pipe( pipe_slow );
24250 %}
24251 
24252 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24253   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24254                                               Matcher::vector_length_in_bytes(n->in(1))));
24255   match(Set dst (CountLeadingZerosV src mask));
24256   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24257   ins_encode %{
24258     int vlen_enc = vector_length_encoding(this, $src);
24259     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24260     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24261     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24262                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24263   %}
24264   ins_pipe( pipe_slow );
24265 %}
24266 
24267 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24268   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24269             VM_Version::supports_avx512cd() &&
24270             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24271   match(Set dst (CountLeadingZerosV src));
24272   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24273   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24274   ins_encode %{
24275     int vlen_enc = vector_length_encoding(this, $src);
24276     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24277     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24278                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24279   %}
24280   ins_pipe( pipe_slow );
24281 %}
24282 
24283 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24284   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24285   match(Set dst (CountLeadingZerosV src));
24286   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24287   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24288   ins_encode %{
24289     int vlen_enc = vector_length_encoding(this, $src);
24290     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24291     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24292                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24293                                        $rtmp$$Register, true, vlen_enc);
24294   %}
24295   ins_pipe( pipe_slow );
24296 %}
24297 
24298 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24299   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24300             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24301   match(Set dst (CountLeadingZerosV src));
24302   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24303   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24304   ins_encode %{
24305     int vlen_enc = vector_length_encoding(this, $src);
24306     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24307     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24308                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24309   %}
24310   ins_pipe( pipe_slow );
24311 %}
24312 
24313 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24314   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24315             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24316   match(Set dst (CountLeadingZerosV src));
24317   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24318   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24319   ins_encode %{
24320     int vlen_enc = vector_length_encoding(this, $src);
24321     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24322     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24323                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24324   %}
24325   ins_pipe( pipe_slow );
24326 %}
24327 
24328 // ---------------------------------- Vector Masked Operations ------------------------------------
24329 
24330 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24331   match(Set dst (AddVB (Binary dst src2) mask));
24332   match(Set dst (AddVS (Binary dst src2) mask));
24333   match(Set dst (AddVI (Binary dst src2) mask));
24334   match(Set dst (AddVL (Binary dst src2) mask));
24335   match(Set dst (AddVF (Binary dst src2) mask));
24336   match(Set dst (AddVD (Binary dst src2) mask));
24337   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24338   ins_encode %{
24339     int vlen_enc = vector_length_encoding(this);
24340     BasicType bt = Matcher::vector_element_basic_type(this);
24341     int opc = this->ideal_Opcode();
24342     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24343                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24344   %}
24345   ins_pipe( pipe_slow );
24346 %}
24347 
24348 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24349   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24350   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24351   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24352   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24353   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24354   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24355   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24356   ins_encode %{
24357     int vlen_enc = vector_length_encoding(this);
24358     BasicType bt = Matcher::vector_element_basic_type(this);
24359     int opc = this->ideal_Opcode();
24360     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24361                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24362   %}
24363   ins_pipe( pipe_slow );
24364 %}
24365 
24366 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24367   match(Set dst (XorV (Binary dst src2) mask));
24368   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24369   ins_encode %{
24370     int vlen_enc = vector_length_encoding(this);
24371     BasicType bt = Matcher::vector_element_basic_type(this);
24372     int opc = this->ideal_Opcode();
24373     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24374                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24375   %}
24376   ins_pipe( pipe_slow );
24377 %}
24378 
24379 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24380   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24381   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24382   ins_encode %{
24383     int vlen_enc = vector_length_encoding(this);
24384     BasicType bt = Matcher::vector_element_basic_type(this);
24385     int opc = this->ideal_Opcode();
24386     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24387                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24388   %}
24389   ins_pipe( pipe_slow );
24390 %}
24391 
24392 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24393   match(Set dst (OrV (Binary dst src2) mask));
24394   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24395   ins_encode %{
24396     int vlen_enc = vector_length_encoding(this);
24397     BasicType bt = Matcher::vector_element_basic_type(this);
24398     int opc = this->ideal_Opcode();
24399     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24400                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24401   %}
24402   ins_pipe( pipe_slow );
24403 %}
24404 
24405 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24406   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24407   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24408   ins_encode %{
24409     int vlen_enc = vector_length_encoding(this);
24410     BasicType bt = Matcher::vector_element_basic_type(this);
24411     int opc = this->ideal_Opcode();
24412     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24413                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24414   %}
24415   ins_pipe( pipe_slow );
24416 %}
24417 
24418 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24419   match(Set dst (AndV (Binary dst src2) mask));
24420   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24421   ins_encode %{
24422     int vlen_enc = vector_length_encoding(this);
24423     BasicType bt = Matcher::vector_element_basic_type(this);
24424     int opc = this->ideal_Opcode();
24425     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24427   %}
24428   ins_pipe( pipe_slow );
24429 %}
24430 
24431 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24432   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24433   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24434   ins_encode %{
24435     int vlen_enc = vector_length_encoding(this);
24436     BasicType bt = Matcher::vector_element_basic_type(this);
24437     int opc = this->ideal_Opcode();
24438     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24439                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24440   %}
24441   ins_pipe( pipe_slow );
24442 %}
24443 
24444 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24445   match(Set dst (SubVB (Binary dst src2) mask));
24446   match(Set dst (SubVS (Binary dst src2) mask));
24447   match(Set dst (SubVI (Binary dst src2) mask));
24448   match(Set dst (SubVL (Binary dst src2) mask));
24449   match(Set dst (SubVF (Binary dst src2) mask));
24450   match(Set dst (SubVD (Binary dst src2) mask));
24451   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24452   ins_encode %{
24453     int vlen_enc = vector_length_encoding(this);
24454     BasicType bt = Matcher::vector_element_basic_type(this);
24455     int opc = this->ideal_Opcode();
24456     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24457                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24458   %}
24459   ins_pipe( pipe_slow );
24460 %}
24461 
24462 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24463   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24464   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24465   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24466   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24467   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24468   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24469   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24470   ins_encode %{
24471     int vlen_enc = vector_length_encoding(this);
24472     BasicType bt = Matcher::vector_element_basic_type(this);
24473     int opc = this->ideal_Opcode();
24474     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24476   %}
24477   ins_pipe( pipe_slow );
24478 %}
24479 
24480 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24481   match(Set dst (MulVS (Binary dst src2) mask));
24482   match(Set dst (MulVI (Binary dst src2) mask));
24483   match(Set dst (MulVL (Binary dst src2) mask));
24484   match(Set dst (MulVF (Binary dst src2) mask));
24485   match(Set dst (MulVD (Binary dst src2) mask));
24486   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24487   ins_encode %{
24488     int vlen_enc = vector_length_encoding(this);
24489     BasicType bt = Matcher::vector_element_basic_type(this);
24490     int opc = this->ideal_Opcode();
24491     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24492                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24493   %}
24494   ins_pipe( pipe_slow );
24495 %}
24496 
24497 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24498   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24499   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24500   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24501   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24502   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24503   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24504   ins_encode %{
24505     int vlen_enc = vector_length_encoding(this);
24506     BasicType bt = Matcher::vector_element_basic_type(this);
24507     int opc = this->ideal_Opcode();
24508     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24509                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24510   %}
24511   ins_pipe( pipe_slow );
24512 %}
24513 
24514 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24515   match(Set dst (SqrtVF dst mask));
24516   match(Set dst (SqrtVD dst mask));
24517   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24518   ins_encode %{
24519     int vlen_enc = vector_length_encoding(this);
24520     BasicType bt = Matcher::vector_element_basic_type(this);
24521     int opc = this->ideal_Opcode();
24522     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24523                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24524   %}
24525   ins_pipe( pipe_slow );
24526 %}
24527 
24528 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24529   match(Set dst (DivVF (Binary dst src2) mask));
24530   match(Set dst (DivVD (Binary dst src2) mask));
24531   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24532   ins_encode %{
24533     int vlen_enc = vector_length_encoding(this);
24534     BasicType bt = Matcher::vector_element_basic_type(this);
24535     int opc = this->ideal_Opcode();
24536     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24538   %}
24539   ins_pipe( pipe_slow );
24540 %}
24541 
24542 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24543   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24544   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24545   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24546   ins_encode %{
24547     int vlen_enc = vector_length_encoding(this);
24548     BasicType bt = Matcher::vector_element_basic_type(this);
24549     int opc = this->ideal_Opcode();
24550     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24551                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24552   %}
24553   ins_pipe( pipe_slow );
24554 %}
24555 
24556 
24557 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24558   match(Set dst (RotateLeftV (Binary dst shift) mask));
24559   match(Set dst (RotateRightV (Binary dst shift) mask));
24560   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24561   ins_encode %{
24562     int vlen_enc = vector_length_encoding(this);
24563     BasicType bt = Matcher::vector_element_basic_type(this);
24564     int opc = this->ideal_Opcode();
24565     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24566                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24567   %}
24568   ins_pipe( pipe_slow );
24569 %}
24570 
24571 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24572   match(Set dst (RotateLeftV (Binary dst src2) mask));
24573   match(Set dst (RotateRightV (Binary dst src2) mask));
24574   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24575   ins_encode %{
24576     int vlen_enc = vector_length_encoding(this);
24577     BasicType bt = Matcher::vector_element_basic_type(this);
24578     int opc = this->ideal_Opcode();
24579     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24580                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24581   %}
24582   ins_pipe( pipe_slow );
24583 %}
24584 
24585 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24586   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24587   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24588   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24589   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24590   ins_encode %{
24591     int vlen_enc = vector_length_encoding(this);
24592     BasicType bt = Matcher::vector_element_basic_type(this);
24593     int opc = this->ideal_Opcode();
24594     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24595                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24596   %}
24597   ins_pipe( pipe_slow );
24598 %}
24599 
24600 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24601   predicate(!n->as_ShiftV()->is_var_shift());
24602   match(Set dst (LShiftVS (Binary dst src2) mask));
24603   match(Set dst (LShiftVI (Binary dst src2) mask));
24604   match(Set dst (LShiftVL (Binary dst src2) mask));
24605   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24606   ins_encode %{
24607     int vlen_enc = vector_length_encoding(this);
24608     BasicType bt = Matcher::vector_element_basic_type(this);
24609     int opc = this->ideal_Opcode();
24610     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24611                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24612   %}
24613   ins_pipe( pipe_slow );
24614 %}
24615 
24616 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24617   predicate(n->as_ShiftV()->is_var_shift());
24618   match(Set dst (LShiftVS (Binary dst src2) mask));
24619   match(Set dst (LShiftVI (Binary dst src2) mask));
24620   match(Set dst (LShiftVL (Binary dst src2) mask));
24621   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24622   ins_encode %{
24623     int vlen_enc = vector_length_encoding(this);
24624     BasicType bt = Matcher::vector_element_basic_type(this);
24625     int opc = this->ideal_Opcode();
24626     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24627                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24628   %}
24629   ins_pipe( pipe_slow );
24630 %}
24631 
24632 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24633   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24634   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24635   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24636   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24637   ins_encode %{
24638     int vlen_enc = vector_length_encoding(this);
24639     BasicType bt = Matcher::vector_element_basic_type(this);
24640     int opc = this->ideal_Opcode();
24641     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24642                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24643   %}
24644   ins_pipe( pipe_slow );
24645 %}
24646 
24647 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24648   predicate(!n->as_ShiftV()->is_var_shift());
24649   match(Set dst (RShiftVS (Binary dst src2) mask));
24650   match(Set dst (RShiftVI (Binary dst src2) mask));
24651   match(Set dst (RShiftVL (Binary dst src2) mask));
24652   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24653   ins_encode %{
24654     int vlen_enc = vector_length_encoding(this);
24655     BasicType bt = Matcher::vector_element_basic_type(this);
24656     int opc = this->ideal_Opcode();
24657     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24658                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24659   %}
24660   ins_pipe( pipe_slow );
24661 %}
24662 
24663 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24664   predicate(n->as_ShiftV()->is_var_shift());
24665   match(Set dst (RShiftVS (Binary dst src2) mask));
24666   match(Set dst (RShiftVI (Binary dst src2) mask));
24667   match(Set dst (RShiftVL (Binary dst src2) mask));
24668   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24669   ins_encode %{
24670     int vlen_enc = vector_length_encoding(this);
24671     BasicType bt = Matcher::vector_element_basic_type(this);
24672     int opc = this->ideal_Opcode();
24673     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24675   %}
24676   ins_pipe( pipe_slow );
24677 %}
24678 
24679 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24680   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24681   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24682   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24683   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24684   ins_encode %{
24685     int vlen_enc = vector_length_encoding(this);
24686     BasicType bt = Matcher::vector_element_basic_type(this);
24687     int opc = this->ideal_Opcode();
24688     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24689                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24690   %}
24691   ins_pipe( pipe_slow );
24692 %}
24693 
24694 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24695   predicate(!n->as_ShiftV()->is_var_shift());
24696   match(Set dst (URShiftVS (Binary dst src2) mask));
24697   match(Set dst (URShiftVI (Binary dst src2) mask));
24698   match(Set dst (URShiftVL (Binary dst src2) mask));
24699   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24700   ins_encode %{
24701     int vlen_enc = vector_length_encoding(this);
24702     BasicType bt = Matcher::vector_element_basic_type(this);
24703     int opc = this->ideal_Opcode();
24704     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24705                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24706   %}
24707   ins_pipe( pipe_slow );
24708 %}
24709 
24710 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24711   predicate(n->as_ShiftV()->is_var_shift());
24712   match(Set dst (URShiftVS (Binary dst src2) mask));
24713   match(Set dst (URShiftVI (Binary dst src2) mask));
24714   match(Set dst (URShiftVL (Binary dst src2) mask));
24715   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24716   ins_encode %{
24717     int vlen_enc = vector_length_encoding(this);
24718     BasicType bt = Matcher::vector_element_basic_type(this);
24719     int opc = this->ideal_Opcode();
24720     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24721                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24722   %}
24723   ins_pipe( pipe_slow );
24724 %}
24725 
24726 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24727   match(Set dst (MaxV (Binary dst src2) mask));
24728   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24729   ins_encode %{
24730     int vlen_enc = vector_length_encoding(this);
24731     BasicType bt = Matcher::vector_element_basic_type(this);
24732     int opc = this->ideal_Opcode();
24733     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24734                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24735   %}
24736   ins_pipe( pipe_slow );
24737 %}
24738 
24739 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24740   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24741   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24742   ins_encode %{
24743     int vlen_enc = vector_length_encoding(this);
24744     BasicType bt = Matcher::vector_element_basic_type(this);
24745     int opc = this->ideal_Opcode();
24746     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24747                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24748   %}
24749   ins_pipe( pipe_slow );
24750 %}
24751 
24752 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24753   match(Set dst (MinV (Binary dst src2) mask));
24754   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24755   ins_encode %{
24756     int vlen_enc = vector_length_encoding(this);
24757     BasicType bt = Matcher::vector_element_basic_type(this);
24758     int opc = this->ideal_Opcode();
24759     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24761   %}
24762   ins_pipe( pipe_slow );
24763 %}
24764 
24765 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24766   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24767   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24768   ins_encode %{
24769     int vlen_enc = vector_length_encoding(this);
24770     BasicType bt = Matcher::vector_element_basic_type(this);
24771     int opc = this->ideal_Opcode();
24772     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24773                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24774   %}
24775   ins_pipe( pipe_slow );
24776 %}
24777 
24778 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24779   match(Set dst (VectorRearrange (Binary dst src2) mask));
24780   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24781   ins_encode %{
24782     int vlen_enc = vector_length_encoding(this);
24783     BasicType bt = Matcher::vector_element_basic_type(this);
24784     int opc = this->ideal_Opcode();
24785     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24787   %}
24788   ins_pipe( pipe_slow );
24789 %}
24790 
24791 instruct vabs_masked(vec dst, kReg mask) %{
24792   match(Set dst (AbsVB dst mask));
24793   match(Set dst (AbsVS dst mask));
24794   match(Set dst (AbsVI dst mask));
24795   match(Set dst (AbsVL dst mask));
24796   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24797   ins_encode %{
24798     int vlen_enc = vector_length_encoding(this);
24799     BasicType bt = Matcher::vector_element_basic_type(this);
24800     int opc = this->ideal_Opcode();
24801     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24802                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24803   %}
24804   ins_pipe( pipe_slow );
24805 %}
24806 
24807 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24808   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24809   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24810   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24811   ins_encode %{
24812     assert(UseFMA, "Needs FMA instructions support.");
24813     int vlen_enc = vector_length_encoding(this);
24814     BasicType bt = Matcher::vector_element_basic_type(this);
24815     int opc = this->ideal_Opcode();
24816     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24817                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24818   %}
24819   ins_pipe( pipe_slow );
24820 %}
24821 
24822 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24823   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24824   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24825   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24826   ins_encode %{
24827     assert(UseFMA, "Needs FMA instructions support.");
24828     int vlen_enc = vector_length_encoding(this);
24829     BasicType bt = Matcher::vector_element_basic_type(this);
24830     int opc = this->ideal_Opcode();
24831     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24832                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24833   %}
24834   ins_pipe( pipe_slow );
24835 %}
24836 
24837 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24838   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24839   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24840   ins_encode %{
24841     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24842     int vlen_enc = vector_length_encoding(this, $src1);
24843     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24844 
24845     // Comparison i
24846     switch (src1_elem_bt) {
24847       case T_BYTE: {
24848         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24849         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24850         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24851         break;
24852       }
24853       case T_SHORT: {
24854         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24855         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24856         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24857         break;
24858       }
24859       case T_INT: {
24860         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24861         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24862         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24863         break;
24864       }
24865       case T_LONG: {
24866         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24867         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24868         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24869         break;
24870       }
24871       case T_FLOAT: {
24872         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24873         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24874         break;
24875       }
24876       case T_DOUBLE: {
24877         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24878         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24879         break;
24880       }
24881       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24882     }
24883   %}
24884   ins_pipe( pipe_slow );
24885 %}
24886 
24887 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24888   predicate(Matcher::vector_length(n) <= 32);
24889   match(Set dst (MaskAll src));
24890   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24891   ins_encode %{
24892     int mask_len = Matcher::vector_length(this);
24893     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24894   %}
24895   ins_pipe( pipe_slow );
24896 %}
24897 
24898 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24899   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24900   match(Set dst (XorVMask src (MaskAll cnt)));
24901   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24902   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24903   ins_encode %{
24904     uint masklen = Matcher::vector_length(this);
24905     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24906   %}
24907   ins_pipe( pipe_slow );
24908 %}
24909 
24910 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24911   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24912             (Matcher::vector_length(n) == 16) ||
24913             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24914   match(Set dst (XorVMask src (MaskAll cnt)));
24915   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24916   ins_encode %{
24917     uint masklen = Matcher::vector_length(this);
24918     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24919   %}
24920   ins_pipe( pipe_slow );
24921 %}
24922 
24923 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24924   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24925   match(Set dst (VectorLongToMask src));
24926   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24927   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24928   ins_encode %{
24929     int mask_len = Matcher::vector_length(this);
24930     int vec_enc  = vector_length_encoding(mask_len);
24931     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24932                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24933   %}
24934   ins_pipe( pipe_slow );
24935 %}
24936 
24937 
24938 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24939   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24940   match(Set dst (VectorLongToMask src));
24941   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24942   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24943   ins_encode %{
24944     int mask_len = Matcher::vector_length(this);
24945     assert(mask_len <= 32, "invalid mask length");
24946     int vec_enc  = vector_length_encoding(mask_len);
24947     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24948                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24949   %}
24950   ins_pipe( pipe_slow );
24951 %}
24952 
24953 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24954   predicate(n->bottom_type()->isa_vectmask());
24955   match(Set dst (VectorLongToMask src));
24956   format %{ "long_to_mask_evex $dst, $src\t!" %}
24957   ins_encode %{
24958     __ kmov($dst$$KRegister, $src$$Register);
24959   %}
24960   ins_pipe( pipe_slow );
24961 %}
24962 
24963 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24964   match(Set dst (AndVMask src1 src2));
24965   match(Set dst (OrVMask src1 src2));
24966   match(Set dst (XorVMask src1 src2));
24967   effect(TEMP kscratch);
24968   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24969   ins_encode %{
24970     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24971     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24972     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24973     uint masklen = Matcher::vector_length(this);
24974     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24975     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24976   %}
24977   ins_pipe( pipe_slow );
24978 %}
24979 
24980 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24981   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24982   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24983   ins_encode %{
24984     int vlen_enc = vector_length_encoding(this);
24985     BasicType bt = Matcher::vector_element_basic_type(this);
24986     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24987                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24988   %}
24989   ins_pipe( pipe_slow );
24990 %}
24991 
24992 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24993   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24994   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24995   ins_encode %{
24996     int vlen_enc = vector_length_encoding(this);
24997     BasicType bt = Matcher::vector_element_basic_type(this);
24998     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24999                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25000   %}
25001   ins_pipe( pipe_slow );
25002 %}
25003 
25004 instruct castMM(kReg dst)
25005 %{
25006   match(Set dst (CastVV dst));
25007 
25008   size(0);
25009   format %{ "# castVV of $dst" %}
25010   ins_encode(/* empty encoding */);
25011   ins_cost(0);
25012   ins_pipe(empty);
25013 %}
25014 
25015 instruct castVV(vec dst)
25016 %{
25017   match(Set dst (CastVV dst));
25018 
25019   size(0);
25020   format %{ "# castVV of $dst" %}
25021   ins_encode(/* empty encoding */);
25022   ins_cost(0);
25023   ins_pipe(empty);
25024 %}
25025 
25026 instruct castVVLeg(legVec dst)
25027 %{
25028   match(Set dst (CastVV dst));
25029 
25030   size(0);
25031   format %{ "# castVV of $dst" %}
25032   ins_encode(/* empty encoding */);
25033   ins_cost(0);
25034   ins_pipe(empty);
25035 %}
25036 
25037 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25038 %{
25039   match(Set dst (IsInfiniteF src));
25040   effect(TEMP ktmp, KILL cr);
25041   format %{ "float_class_check $dst, $src" %}
25042   ins_encode %{
25043     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25044     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25045   %}
25046   ins_pipe(pipe_slow);
25047 %}
25048 
25049 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25050 %{
25051   match(Set dst (IsInfiniteD src));
25052   effect(TEMP ktmp, KILL cr);
25053   format %{ "double_class_check $dst, $src" %}
25054   ins_encode %{
25055     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25056     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25057   %}
25058   ins_pipe(pipe_slow);
25059 %}
25060 
25061 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25062 %{
25063   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25064             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25065   match(Set dst (SaturatingAddV src1 src2));
25066   match(Set dst (SaturatingSubV src1 src2));
25067   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25068   ins_encode %{
25069     int vlen_enc = vector_length_encoding(this);
25070     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25071     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25072                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25073   %}
25074   ins_pipe(pipe_slow);
25075 %}
25076 
25077 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25078 %{
25079   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25080             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25081   match(Set dst (SaturatingAddV src1 src2));
25082   match(Set dst (SaturatingSubV src1 src2));
25083   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25084   ins_encode %{
25085     int vlen_enc = vector_length_encoding(this);
25086     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25087     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25088                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25089   %}
25090   ins_pipe(pipe_slow);
25091 %}
25092 
25093 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25094 %{
25095   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25096             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25097             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25098   match(Set dst (SaturatingAddV src1 src2));
25099   match(Set dst (SaturatingSubV src1 src2));
25100   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25101   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25102   ins_encode %{
25103     int vlen_enc = vector_length_encoding(this);
25104     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25105     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25106                                         $src1$$XMMRegister, $src2$$XMMRegister,
25107                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25108                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25109   %}
25110   ins_pipe(pipe_slow);
25111 %}
25112 
25113 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25114 %{
25115   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25116             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25117             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25118   match(Set dst (SaturatingAddV src1 src2));
25119   match(Set dst (SaturatingSubV src1 src2));
25120   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25121   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25122   ins_encode %{
25123     int vlen_enc = vector_length_encoding(this);
25124     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25125     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25126                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25127                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25128   %}
25129   ins_pipe(pipe_slow);
25130 %}
25131 
25132 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25133 %{
25134   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25135             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25136             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25137   match(Set dst (SaturatingAddV src1 src2));
25138   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25139   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25140   ins_encode %{
25141     int vlen_enc = vector_length_encoding(this);
25142     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25143     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25144                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25145   %}
25146   ins_pipe(pipe_slow);
25147 %}
25148 
25149 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25150 %{
25151   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25152             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25153             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25154   match(Set dst (SaturatingAddV src1 src2));
25155   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25156   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25157   ins_encode %{
25158     int vlen_enc = vector_length_encoding(this);
25159     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25160     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25161                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25162   %}
25163   ins_pipe(pipe_slow);
25164 %}
25165 
25166 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25167 %{
25168   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25169             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25170             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25171   match(Set dst (SaturatingSubV src1 src2));
25172   effect(TEMP ktmp);
25173   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25174   ins_encode %{
25175     int vlen_enc = vector_length_encoding(this);
25176     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25177     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25178                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25179   %}
25180   ins_pipe(pipe_slow);
25181 %}
25182 
25183 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25184 %{
25185   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25186             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25187             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25188   match(Set dst (SaturatingSubV src1 src2));
25189   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25190   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25191   ins_encode %{
25192     int vlen_enc = vector_length_encoding(this);
25193     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25194     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25195                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25196   %}
25197   ins_pipe(pipe_slow);
25198 %}
25199 
25200 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25201 %{
25202   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25203             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25204   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25205   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25206   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25207   ins_encode %{
25208     int vlen_enc = vector_length_encoding(this);
25209     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25210     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25211                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25212   %}
25213   ins_pipe(pipe_slow);
25214 %}
25215 
25216 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25217 %{
25218   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25219             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25220   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25221   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25222   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25223   ins_encode %{
25224     int vlen_enc = vector_length_encoding(this);
25225     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25226     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25227                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25228   %}
25229   ins_pipe(pipe_slow);
25230 %}
25231 
25232 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25233   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25234             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25235   match(Set dst (SaturatingAddV (Binary dst src) mask));
25236   match(Set dst (SaturatingSubV (Binary dst src) mask));
25237   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25238   ins_encode %{
25239     int vlen_enc = vector_length_encoding(this);
25240     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25241     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25242                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25243   %}
25244   ins_pipe( pipe_slow );
25245 %}
25246 
25247 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25248   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25249             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25250   match(Set dst (SaturatingAddV (Binary dst src) mask));
25251   match(Set dst (SaturatingSubV (Binary dst src) mask));
25252   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25253   ins_encode %{
25254     int vlen_enc = vector_length_encoding(this);
25255     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25256     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25257                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25258   %}
25259   ins_pipe( pipe_slow );
25260 %}
25261 
25262 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25263   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25264             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25265   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25266   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25267   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25268   ins_encode %{
25269     int vlen_enc = vector_length_encoding(this);
25270     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25271     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25272                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25273   %}
25274   ins_pipe( pipe_slow );
25275 %}
25276 
25277 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25278   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25279             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25280   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25281   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25282   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25283   ins_encode %{
25284     int vlen_enc = vector_length_encoding(this);
25285     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25286     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25287                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25288   %}
25289   ins_pipe( pipe_slow );
25290 %}
25291 
25292 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25293 %{
25294   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25295   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25296   ins_encode %{
25297     int vlen_enc = vector_length_encoding(this);
25298     BasicType bt = Matcher::vector_element_basic_type(this);
25299     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25300   %}
25301   ins_pipe(pipe_slow);
25302 %}
25303 
25304 instruct reinterpretS2HF(regF dst, rRegI src)
25305 %{
25306   match(Set dst (ReinterpretS2HF src));
25307   format %{ "evmovw $dst, $src" %}
25308   ins_encode %{
25309     __ evmovw($dst$$XMMRegister, $src$$Register);
25310   %}
25311   ins_pipe(pipe_slow);
25312 %}
25313 
25314 instruct reinterpretHF2S(rRegI dst, regF src)
25315 %{
25316   match(Set dst (ReinterpretHF2S src));
25317   format %{ "evmovw $dst, $src" %}
25318   ins_encode %{
25319     __ evmovw($dst$$Register, $src$$XMMRegister);
25320   %}
25321   ins_pipe(pipe_slow);
25322 %}
25323 
25324 instruct convF2HFAndS2HF(regF dst, regF src)
25325 %{
25326   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25327   format %{ "convF2HFAndS2HF $dst, $src" %}
25328   ins_encode %{
25329     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25330   %}
25331   ins_pipe(pipe_slow);
25332 %}
25333 
25334 instruct convHF2SAndHF2F(regF dst, regF src)
25335 %{
25336   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25337   format %{ "convHF2SAndHF2F $dst, $src" %}
25338   ins_encode %{
25339     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25340   %}
25341   ins_pipe(pipe_slow);
25342 %}
25343 
25344 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25345 %{
25346   match(Set dst (SqrtHF src));
25347   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25348   ins_encode %{
25349     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25350   %}
25351   ins_pipe(pipe_slow);
25352 %}
25353 
25354 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25355 %{
25356   match(Set dst (AddHF src1 src2));
25357   match(Set dst (DivHF src1 src2));
25358   match(Set dst (MulHF src1 src2));
25359   match(Set dst (SubHF src1 src2));
25360   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25361   ins_encode %{
25362     int opcode = this->ideal_Opcode();
25363     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25364   %}
25365   ins_pipe(pipe_slow);
25366 %}
25367 
25368 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25369 %{
25370   predicate(VM_Version::supports_avx10_2());
25371   match(Set dst (MaxHF src1 src2));
25372   match(Set dst (MinHF src1 src2));
25373 
25374   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25375   ins_encode %{
25376     int opcode = this->ideal_Opcode();
25377     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25378   %}
25379   ins_pipe( pipe_slow );
25380 %}
25381 
25382 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25383 %{
25384   predicate(!VM_Version::supports_avx10_2());
25385   match(Set dst (MaxHF src1 src2));
25386   match(Set dst (MinHF src1 src2));
25387   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25388 
25389   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25390   ins_encode %{
25391     int opcode = this->ideal_Opcode();
25392     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25393                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25394   %}
25395   ins_pipe( pipe_slow );
25396 %}
25397 
25398 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25399 %{
25400   match(Set dst (FmaHF  src2 (Binary dst src1)));
25401   effect(DEF dst);
25402   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25403   ins_encode %{
25404     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25405   %}
25406   ins_pipe( pipe_slow );
25407 %}
25408 
25409 
25410 instruct vector_sqrt_HF_reg(vec dst, vec src)
25411 %{
25412   match(Set dst (SqrtVHF src));
25413   format %{ "vector_sqrt_fp16 $dst, $src" %}
25414   ins_encode %{
25415     int vlen_enc = vector_length_encoding(this);
25416     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25417   %}
25418   ins_pipe(pipe_slow);
25419 %}
25420 
25421 instruct vector_sqrt_HF_mem(vec dst, memory src)
25422 %{
25423   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25424   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25425   ins_encode %{
25426     int vlen_enc = vector_length_encoding(this);
25427     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25428   %}
25429   ins_pipe(pipe_slow);
25430 %}
25431 
25432 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25433 %{
25434   match(Set dst (AddVHF src1 src2));
25435   match(Set dst (DivVHF src1 src2));
25436   match(Set dst (MulVHF src1 src2));
25437   match(Set dst (SubVHF src1 src2));
25438   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25439   ins_encode %{
25440     int vlen_enc = vector_length_encoding(this);
25441     int opcode = this->ideal_Opcode();
25442     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25443   %}
25444   ins_pipe(pipe_slow);
25445 %}
25446 
25447 
25448 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25449 %{
25450   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25451   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25452   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25453   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25454   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25455   ins_encode %{
25456     int vlen_enc = vector_length_encoding(this);
25457     int opcode = this->ideal_Opcode();
25458     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25459   %}
25460   ins_pipe(pipe_slow);
25461 %}
25462 
25463 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25464 %{
25465   match(Set dst (FmaVHF src2 (Binary dst src1)));
25466   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25467   ins_encode %{
25468     int vlen_enc = vector_length_encoding(this);
25469     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25470   %}
25471   ins_pipe( pipe_slow );
25472 %}
25473 
25474 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25475 %{
25476   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25477   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25478   ins_encode %{
25479     int vlen_enc = vector_length_encoding(this);
25480     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25481   %}
25482   ins_pipe( pipe_slow );
25483 %}
25484 
25485 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25486 %{
25487   predicate(VM_Version::supports_avx10_2());
25488   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25489   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25490   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25491   ins_encode %{
25492     int vlen_enc = vector_length_encoding(this);
25493     int opcode = this->ideal_Opcode();
25494     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25495                             k0, vlen_enc);
25496   %}
25497   ins_pipe( pipe_slow );
25498 %}
25499 
25500 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25501 %{
25502   predicate(VM_Version::supports_avx10_2());
25503   match(Set dst (MinVHF src1 src2));
25504   match(Set dst (MaxVHF src1 src2));
25505   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25506   ins_encode %{
25507     int vlen_enc = vector_length_encoding(this);
25508     int opcode = this->ideal_Opcode();
25509     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25510                             k0, vlen_enc);
25511   %}
25512   ins_pipe( pipe_slow );
25513 %}
25514 
25515 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25516 %{
25517   predicate(!VM_Version::supports_avx10_2());
25518   match(Set dst (MinVHF src1 src2));
25519   match(Set dst (MaxVHF src1 src2));
25520   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25521   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25522   ins_encode %{
25523     int vlen_enc = vector_length_encoding(this);
25524     int opcode = this->ideal_Opcode();
25525     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25526                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25527   %}
25528   ins_pipe( pipe_slow );
25529 %}
25530 
25531 //----------PEEPHOLE RULES-----------------------------------------------------
25532 // These must follow all instruction definitions as they use the names
25533 // defined in the instructions definitions.
25534 //
25535 // peeppredicate ( rule_predicate );
25536 // // the predicate unless which the peephole rule will be ignored
25537 //
25538 // peepmatch ( root_instr_name [preceding_instruction]* );
25539 //
25540 // peepprocedure ( procedure_name );
25541 // // provide a procedure name to perform the optimization, the procedure should
25542 // // reside in the architecture dependent peephole file, the method has the
25543 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25544 // // with the arguments being the basic block, the current node index inside the
25545 // // block, the register allocator, the functions upon invoked return a new node
25546 // // defined in peepreplace, and the rules of the nodes appearing in the
25547 // // corresponding peepmatch, the function return true if successful, else
25548 // // return false
25549 //
25550 // peepconstraint %{
25551 // (instruction_number.operand_name relational_op instruction_number.operand_name
25552 //  [, ...] );
25553 // // instruction numbers are zero-based using left to right order in peepmatch
25554 //
25555 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25556 // // provide an instruction_number.operand_name for each operand that appears
25557 // // in the replacement instruction's match rule
25558 //
25559 // ---------VM FLAGS---------------------------------------------------------
25560 //
25561 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25562 //
25563 // Each peephole rule is given an identifying number starting with zero and
25564 // increasing by one in the order seen by the parser.  An individual peephole
25565 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25566 // on the command-line.
25567 //
25568 // ---------CURRENT LIMITATIONS----------------------------------------------
25569 //
25570 // Only transformations inside a basic block (do we need more for peephole)
25571 //
25572 // ---------EXAMPLE----------------------------------------------------------
25573 //
25574 // // pertinent parts of existing instructions in architecture description
25575 // instruct movI(rRegI dst, rRegI src)
25576 // %{
25577 //   match(Set dst (CopyI src));
25578 // %}
25579 //
25580 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25581 // %{
25582 //   match(Set dst (AddI dst src));
25583 //   effect(KILL cr);
25584 // %}
25585 //
25586 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25587 // %{
25588 //   match(Set dst (AddI dst src));
25589 // %}
25590 //
25591 // 1. Simple replacement
25592 // - Only match adjacent instructions in same basic block
25593 // - Only equality constraints
25594 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25595 // - Only one replacement instruction
25596 //
25597 // // Change (inc mov) to lea
25598 // peephole %{
25599 //   // lea should only be emitted when beneficial
25600 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25601 //   // increment preceded by register-register move
25602 //   peepmatch ( incI_rReg movI );
25603 //   // require that the destination register of the increment
25604 //   // match the destination register of the move
25605 //   peepconstraint ( 0.dst == 1.dst );
25606 //   // construct a replacement instruction that sets
25607 //   // the destination to ( move's source register + one )
25608 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25609 // %}
25610 //
25611 // 2. Procedural replacement
25612 // - More flexible finding relevent nodes
25613 // - More flexible constraints
25614 // - More flexible transformations
25615 // - May utilise architecture-dependent API more effectively
25616 // - Currently only one replacement instruction due to adlc parsing capabilities
25617 //
25618 // // Change (inc mov) to lea
25619 // peephole %{
25620 //   // lea should only be emitted when beneficial
25621 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25622 //   // the rule numbers of these nodes inside are passed into the function below
25623 //   peepmatch ( incI_rReg movI );
25624 //   // the method that takes the responsibility of transformation
25625 //   peepprocedure ( inc_mov_to_lea );
25626 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25627 //   // node is passed into the function above
25628 //   peepreplace ( leaI_rReg_immI() );
25629 // %}
25630 
25631 // These instructions is not matched by the matcher but used by the peephole
25632 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25633 %{
25634   predicate(false);
25635   match(Set dst (AddI src1 src2));
25636   format %{ "leal    $dst, [$src1 + $src2]" %}
25637   ins_encode %{
25638     Register dst = $dst$$Register;
25639     Register src1 = $src1$$Register;
25640     Register src2 = $src2$$Register;
25641     if (src1 != rbp && src1 != r13) {
25642       __ leal(dst, Address(src1, src2, Address::times_1));
25643     } else {
25644       assert(src2 != rbp && src2 != r13, "");
25645       __ leal(dst, Address(src2, src1, Address::times_1));
25646     }
25647   %}
25648   ins_pipe(ialu_reg_reg);
25649 %}
25650 
25651 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25652 %{
25653   predicate(false);
25654   match(Set dst (AddI src1 src2));
25655   format %{ "leal    $dst, [$src1 + $src2]" %}
25656   ins_encode %{
25657     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25658   %}
25659   ins_pipe(ialu_reg_reg);
25660 %}
25661 
25662 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25663 %{
25664   predicate(false);
25665   match(Set dst (LShiftI src shift));
25666   format %{ "leal    $dst, [$src << $shift]" %}
25667   ins_encode %{
25668     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25669     Register src = $src$$Register;
25670     if (scale == Address::times_2 && src != rbp && src != r13) {
25671       __ leal($dst$$Register, Address(src, src, Address::times_1));
25672     } else {
25673       __ leal($dst$$Register, Address(noreg, src, scale));
25674     }
25675   %}
25676   ins_pipe(ialu_reg_reg);
25677 %}
25678 
25679 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25680 %{
25681   predicate(false);
25682   match(Set dst (AddL src1 src2));
25683   format %{ "leaq    $dst, [$src1 + $src2]" %}
25684   ins_encode %{
25685     Register dst = $dst$$Register;
25686     Register src1 = $src1$$Register;
25687     Register src2 = $src2$$Register;
25688     if (src1 != rbp && src1 != r13) {
25689       __ leaq(dst, Address(src1, src2, Address::times_1));
25690     } else {
25691       assert(src2 != rbp && src2 != r13, "");
25692       __ leaq(dst, Address(src2, src1, Address::times_1));
25693     }
25694   %}
25695   ins_pipe(ialu_reg_reg);
25696 %}
25697 
25698 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25699 %{
25700   predicate(false);
25701   match(Set dst (AddL src1 src2));
25702   format %{ "leaq    $dst, [$src1 + $src2]" %}
25703   ins_encode %{
25704     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25705   %}
25706   ins_pipe(ialu_reg_reg);
25707 %}
25708 
25709 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25710 %{
25711   predicate(false);
25712   match(Set dst (LShiftL src shift));
25713   format %{ "leaq    $dst, [$src << $shift]" %}
25714   ins_encode %{
25715     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25716     Register src = $src$$Register;
25717     if (scale == Address::times_2 && src != rbp && src != r13) {
25718       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25719     } else {
25720       __ leaq($dst$$Register, Address(noreg, src, scale));
25721     }
25722   %}
25723   ins_pipe(ialu_reg_reg);
25724 %}
25725 
25726 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25727 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25728 // processors with at least partial ALU support for lea
25729 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25730 // beneficial for processors with full ALU support
25731 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25732 
25733 peephole
25734 %{
25735   peeppredicate(VM_Version::supports_fast_2op_lea());
25736   peepmatch (addI_rReg);
25737   peepprocedure (lea_coalesce_reg);
25738   peepreplace (leaI_rReg_rReg_peep());
25739 %}
25740 
25741 peephole
25742 %{
25743   peeppredicate(VM_Version::supports_fast_2op_lea());
25744   peepmatch (addI_rReg_imm);
25745   peepprocedure (lea_coalesce_imm);
25746   peepreplace (leaI_rReg_immI_peep());
25747 %}
25748 
25749 peephole
25750 %{
25751   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25752                 VM_Version::is_intel_cascade_lake());
25753   peepmatch (incI_rReg);
25754   peepprocedure (lea_coalesce_imm);
25755   peepreplace (leaI_rReg_immI_peep());
25756 %}
25757 
25758 peephole
25759 %{
25760   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25761                 VM_Version::is_intel_cascade_lake());
25762   peepmatch (decI_rReg);
25763   peepprocedure (lea_coalesce_imm);
25764   peepreplace (leaI_rReg_immI_peep());
25765 %}
25766 
25767 peephole
25768 %{
25769   peeppredicate(VM_Version::supports_fast_2op_lea());
25770   peepmatch (salI_rReg_immI2);
25771   peepprocedure (lea_coalesce_imm);
25772   peepreplace (leaI_rReg_immI2_peep());
25773 %}
25774 
25775 peephole
25776 %{
25777   peeppredicate(VM_Version::supports_fast_2op_lea());
25778   peepmatch (addL_rReg);
25779   peepprocedure (lea_coalesce_reg);
25780   peepreplace (leaL_rReg_rReg_peep());
25781 %}
25782 
25783 peephole
25784 %{
25785   peeppredicate(VM_Version::supports_fast_2op_lea());
25786   peepmatch (addL_rReg_imm);
25787   peepprocedure (lea_coalesce_imm);
25788   peepreplace (leaL_rReg_immL32_peep());
25789 %}
25790 
25791 peephole
25792 %{
25793   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25794                 VM_Version::is_intel_cascade_lake());
25795   peepmatch (incL_rReg);
25796   peepprocedure (lea_coalesce_imm);
25797   peepreplace (leaL_rReg_immL32_peep());
25798 %}
25799 
25800 peephole
25801 %{
25802   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25803                 VM_Version::is_intel_cascade_lake());
25804   peepmatch (decL_rReg);
25805   peepprocedure (lea_coalesce_imm);
25806   peepreplace (leaL_rReg_immL32_peep());
25807 %}
25808 
25809 peephole
25810 %{
25811   peeppredicate(VM_Version::supports_fast_2op_lea());
25812   peepmatch (salL_rReg_immI2);
25813   peepprocedure (lea_coalesce_imm);
25814   peepreplace (leaL_rReg_immI2_peep());
25815 %}
25816 
25817 peephole
25818 %{
25819   peepmatch (leaPCompressedOopOffset);
25820   peepprocedure (lea_remove_redundant);
25821 %}
25822 
25823 peephole
25824 %{
25825   peepmatch (leaP8Narrow);
25826   peepprocedure (lea_remove_redundant);
25827 %}
25828 
25829 peephole
25830 %{
25831   peepmatch (leaP32Narrow);
25832   peepprocedure (lea_remove_redundant);
25833 %}
25834 
25835 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25836 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25837 
25838 //int variant
25839 peephole
25840 %{
25841   peepmatch (testI_reg);
25842   peepprocedure (test_may_remove);
25843 %}
25844 
25845 //long variant
25846 peephole
25847 %{
25848   peepmatch (testL_reg);
25849   peepprocedure (test_may_remove);
25850 %}
25851 
25852 
25853 //----------SMARTSPILL RULES---------------------------------------------------
25854 // These must follow all instruction definitions as they use the names
25855 // defined in the instructions definitions.