1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2637   assert(EnableVectorSupport, "sanity");
 2638   int lo = XMM0_num;
 2639   int hi = XMM0b_num;
 2640   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2641   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2642   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2643   return OptoRegPair(hi, lo);
 2644 }
 2645 
 2646 // Is this branch offset short enough that a short branch can be used?
 2647 //
 2648 // NOTE: If the platform does not provide any short branch variants, then
 2649 //       this method should return false for offset 0.
 2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2651   // The passed offset is relative to address of the branch.
 2652   // On 86 a branch displacement is calculated relative to address
 2653   // of a next instruction.
 2654   offset -= br_size;
 2655 
 2656   // the short version of jmpConUCF2 contains multiple branches,
 2657   // making the reach slightly less
 2658   if (rule == jmpConUCF2_rule)
 2659     return (-126 <= offset && offset <= 125);
 2660   return (-128 <= offset && offset <= 127);
 2661 }
 2662 
 2663 // Return whether or not this register is ever used as an argument.
 2664 // This function is used on startup to build the trampoline stubs in
 2665 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2666 // call in the trampoline, and arguments in those registers not be
 2667 // available to the callee.
 2668 bool Matcher::can_be_java_arg(int reg)
 2669 {
 2670   return
 2671     reg ==  RDI_num || reg == RDI_H_num ||
 2672     reg ==  RSI_num || reg == RSI_H_num ||
 2673     reg ==  RDX_num || reg == RDX_H_num ||
 2674     reg ==  RCX_num || reg == RCX_H_num ||
 2675     reg ==   R8_num || reg ==  R8_H_num ||
 2676     reg ==   R9_num || reg ==  R9_H_num ||
 2677     reg ==  R12_num || reg == R12_H_num ||
 2678     reg == XMM0_num || reg == XMM0b_num ||
 2679     reg == XMM1_num || reg == XMM1b_num ||
 2680     reg == XMM2_num || reg == XMM2b_num ||
 2681     reg == XMM3_num || reg == XMM3b_num ||
 2682     reg == XMM4_num || reg == XMM4b_num ||
 2683     reg == XMM5_num || reg == XMM5b_num ||
 2684     reg == XMM6_num || reg == XMM6b_num ||
 2685     reg == XMM7_num || reg == XMM7b_num;
 2686 }
 2687 
 2688 bool Matcher::is_spillable_arg(int reg)
 2689 {
 2690   return can_be_java_arg(reg);
 2691 }
 2692 
 2693 uint Matcher::int_pressure_limit()
 2694 {
 2695   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2696 }
 2697 
 2698 uint Matcher::float_pressure_limit()
 2699 {
 2700   // After experiment around with different values, the following default threshold
 2701   // works best for LCM's register pressure scheduling on x64.
 2702   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2703   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2704   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2705 }
 2706 
 2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2708   // In 64 bit mode a code which use multiply when
 2709   // devisor is constant is faster than hardware
 2710   // DIV instruction (it uses MulHiL).
 2711   return false;
 2712 }
 2713 
 2714 // Register for DIVI projection of divmodI
 2715 const RegMask& Matcher::divI_proj_mask() {
 2716   return INT_RAX_REG_mask();
 2717 }
 2718 
 2719 // Register for MODI projection of divmodI
 2720 const RegMask& Matcher::modI_proj_mask() {
 2721   return INT_RDX_REG_mask();
 2722 }
 2723 
 2724 // Register for DIVL projection of divmodL
 2725 const RegMask& Matcher::divL_proj_mask() {
 2726   return LONG_RAX_REG_mask();
 2727 }
 2728 
 2729 // Register for MODL projection of divmodL
 2730 const RegMask& Matcher::modL_proj_mask() {
 2731   return LONG_RDX_REG_mask();
 2732 }
 2733 
 2734 %}
 2735 
 2736 source_hpp %{
 2737 // Header information of the source block.
 2738 // Method declarations/definitions which are used outside
 2739 // the ad-scope can conveniently be defined here.
 2740 //
 2741 // To keep related declarations/definitions/uses close together,
 2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2743 
 2744 #include "runtime/vm_version.hpp"
 2745 
 2746 class NativeJump;
 2747 
 2748 class CallStubImpl {
 2749 
 2750   //--------------------------------------------------------------
 2751   //---<  Used for optimization in Compile::shorten_branches  >---
 2752   //--------------------------------------------------------------
 2753 
 2754  public:
 2755   // Size of call trampoline stub.
 2756   static uint size_call_trampoline() {
 2757     return 0; // no call trampolines on this platform
 2758   }
 2759 
 2760   // number of relocations needed by a call trampoline stub
 2761   static uint reloc_call_trampoline() {
 2762     return 0; // no call trampolines on this platform
 2763   }
 2764 };
 2765 
 2766 class HandlerImpl {
 2767 
 2768  public:
 2769 
 2770   static int emit_exception_handler(C2_MacroAssembler *masm);
 2771   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2772 
 2773   static uint size_exception_handler() {
 2774     // NativeCall instruction size is the same as NativeJump.
 2775     // exception handler starts out as jump and can be patched to
 2776     // a call be deoptimization.  (4932387)
 2777     // Note that this value is also credited (in output.cpp) to
 2778     // the size of the code section.
 2779     return NativeJump::instruction_size;
 2780   }
 2781 
 2782   static uint size_deopt_handler() {
 2783     // three 5 byte instructions plus one move for unreachable address.
 2784     return 15+3;
 2785   }
 2786 };
 2787 
 2788 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2789   switch(bytes) {
 2790     case  4: // fall-through
 2791     case  8: // fall-through
 2792     case 16: return Assembler::AVX_128bit;
 2793     case 32: return Assembler::AVX_256bit;
 2794     case 64: return Assembler::AVX_512bit;
 2795 
 2796     default: {
 2797       ShouldNotReachHere();
 2798       return Assembler::AVX_NoVec;
 2799     }
 2800   }
 2801 }
 2802 
 2803 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2804   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2805 }
 2806 
 2807 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2808   uint def_idx = use->operand_index(opnd);
 2809   Node* def = use->in(def_idx);
 2810   return vector_length_encoding(def);
 2811 }
 2812 
 2813 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2814   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2815          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2816 }
 2817 
 2818 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2819   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2820            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2821 }
 2822 
 2823 class Node::PD {
 2824 public:
 2825   enum NodeFlags {
 2826     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2827     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2828     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2829     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2830     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2831     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2832     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2833     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2834     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2835     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2836     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2837     _last_flag                = Flag_clears_sign_flag
 2838   };
 2839 };
 2840 
 2841 %} // end source_hpp
 2842 
 2843 source %{
 2844 
 2845 #include "opto/addnode.hpp"
 2846 #include "c2_intelJccErratum_x86.hpp"
 2847 
 2848 void PhaseOutput::pd_perform_mach_node_analysis() {
 2849   if (VM_Version::has_intel_jcc_erratum()) {
 2850     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2851     _buf_sizes._code += extra_padding;
 2852   }
 2853 }
 2854 
 2855 int MachNode::pd_alignment_required() const {
 2856   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2857     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2858     return IntelJccErratum::largest_jcc_size() + 1;
 2859   } else {
 2860     return 1;
 2861   }
 2862 }
 2863 
 2864 int MachNode::compute_padding(int current_offset) const {
 2865   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2866     Compile* C = Compile::current();
 2867     PhaseOutput* output = C->output();
 2868     Block* block = output->block();
 2869     int index = output->index();
 2870     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2871   } else {
 2872     return 0;
 2873   }
 2874 }
 2875 
 2876 // Emit exception handler code.
 2877 // Stuff framesize into a register and call a VM stub routine.
 2878 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
 2879 
 2880   // Note that the code buffer's insts_mark is always relative to insts.
 2881   // That's why we must use the macroassembler to generate a handler.
 2882   address base = __ start_a_stub(size_exception_handler());
 2883   if (base == nullptr) {
 2884     ciEnv::current()->record_failure("CodeCache is full");
 2885     return 0;  // CodeBuffer::expand failed
 2886   }
 2887   int offset = __ offset();
 2888   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
 2889   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
 2890   __ end_a_stub();
 2891   return offset;
 2892 }
 2893 
 2894 // Emit deopt handler code.
 2895 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2896 
 2897   // Note that the code buffer's insts_mark is always relative to insts.
 2898   // That's why we must use the macroassembler to generate a handler.
 2899   address base = __ start_a_stub(size_deopt_handler());
 2900   if (base == nullptr) {
 2901     ciEnv::current()->record_failure("CodeCache is full");
 2902     return 0;  // CodeBuffer::expand failed
 2903   }
 2904   int offset = __ offset();
 2905 
 2906   address the_pc = (address) __ pc();
 2907   Label next;
 2908   // push a "the_pc" on the stack without destroying any registers
 2909   // as they all may be live.
 2910 
 2911   // push address of "next"
 2912   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
 2913   __ bind(next);
 2914   // adjust it so it matches "the_pc"
 2915   __ subptr(Address(rsp, 0), __ offset() - offset);
 2916 
 2917   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2918   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2919   __ end_a_stub();
 2920   return offset;
 2921 }
 2922 
 2923 static Assembler::Width widthForType(BasicType bt) {
 2924   if (bt == T_BYTE) {
 2925     return Assembler::B;
 2926   } else if (bt == T_SHORT) {
 2927     return Assembler::W;
 2928   } else if (bt == T_INT) {
 2929     return Assembler::D;
 2930   } else {
 2931     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2932     return Assembler::Q;
 2933   }
 2934 }
 2935 
 2936 //=============================================================================
 2937 
 2938   // Float masks come from different places depending on platform.
 2939   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2940   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2941   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2942   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2943   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2944   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2945   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2946   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2947   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2948   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2949   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2950   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2951   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2952   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2953   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2954   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2955   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2956   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2957   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2958 
 2959 //=============================================================================
 2960 bool Matcher::match_rule_supported(int opcode) {
 2961   if (!has_match_rule(opcode)) {
 2962     return false; // no match rule present
 2963   }
 2964   switch (opcode) {
 2965     case Op_AbsVL:
 2966     case Op_StoreVectorScatter:
 2967       if (UseAVX < 3) {
 2968         return false;
 2969       }
 2970       break;
 2971     case Op_PopCountI:
 2972     case Op_PopCountL:
 2973       if (!UsePopCountInstruction) {
 2974         return false;
 2975       }
 2976       break;
 2977     case Op_PopCountVI:
 2978       if (UseAVX < 2) {
 2979         return false;
 2980       }
 2981       break;
 2982     case Op_CompressV:
 2983     case Op_ExpandV:
 2984     case Op_PopCountVL:
 2985       if (UseAVX < 2) {
 2986         return false;
 2987       }
 2988       break;
 2989     case Op_MulVI:
 2990       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 2991         return false;
 2992       }
 2993       break;
 2994     case Op_MulVL:
 2995       if (UseSSE < 4) { // only with SSE4_1 or AVX
 2996         return false;
 2997       }
 2998       break;
 2999     case Op_MulReductionVL:
 3000       if (VM_Version::supports_avx512dq() == false) {
 3001         return false;
 3002       }
 3003       break;
 3004     case Op_AbsVB:
 3005     case Op_AbsVS:
 3006     case Op_AbsVI:
 3007     case Op_AddReductionVI:
 3008     case Op_AndReductionV:
 3009     case Op_OrReductionV:
 3010     case Op_XorReductionV:
 3011       if (UseSSE < 3) { // requires at least SSSE3
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_MaxHF:
 3016     case Op_MinHF:
 3017       if (!VM_Version::supports_avx512vlbw()) {
 3018         return false;
 3019       }  // fallthrough
 3020     case Op_AddHF:
 3021     case Op_DivHF:
 3022     case Op_FmaHF:
 3023     case Op_MulHF:
 3024     case Op_ReinterpretS2HF:
 3025     case Op_ReinterpretHF2S:
 3026     case Op_SubHF:
 3027     case Op_SqrtHF:
 3028       if (!VM_Version::supports_avx512_fp16()) {
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_VectorLoadShuffle:
 3033     case Op_VectorRearrange:
 3034     case Op_MulReductionVI:
 3035       if (UseSSE < 4) { // requires at least SSE4
 3036         return false;
 3037       }
 3038       break;
 3039     case Op_IsInfiniteF:
 3040     case Op_IsInfiniteD:
 3041       if (!VM_Version::supports_avx512dq()) {
 3042         return false;
 3043       }
 3044       break;
 3045     case Op_SqrtVD:
 3046     case Op_SqrtVF:
 3047     case Op_VectorMaskCmp:
 3048     case Op_VectorCastB2X:
 3049     case Op_VectorCastS2X:
 3050     case Op_VectorCastI2X:
 3051     case Op_VectorCastL2X:
 3052     case Op_VectorCastF2X:
 3053     case Op_VectorCastD2X:
 3054     case Op_VectorUCastB2X:
 3055     case Op_VectorUCastS2X:
 3056     case Op_VectorUCastI2X:
 3057     case Op_VectorMaskCast:
 3058       if (UseAVX < 1) { // enabled for AVX only
 3059         return false;
 3060       }
 3061       break;
 3062     case Op_PopulateIndex:
 3063       if (UseAVX < 2) {
 3064         return false;
 3065       }
 3066       break;
 3067     case Op_RoundVF:
 3068       if (UseAVX < 2) { // enabled for AVX2 only
 3069         return false;
 3070       }
 3071       break;
 3072     case Op_RoundVD:
 3073       if (UseAVX < 3) {
 3074         return false;  // enabled for AVX3 only
 3075       }
 3076       break;
 3077     case Op_CompareAndSwapL:
 3078     case Op_CompareAndSwapP:
 3079       break;
 3080     case Op_StrIndexOf:
 3081       if (!UseSSE42Intrinsics) {
 3082         return false;
 3083       }
 3084       break;
 3085     case Op_StrIndexOfChar:
 3086       if (!UseSSE42Intrinsics) {
 3087         return false;
 3088       }
 3089       break;
 3090     case Op_OnSpinWait:
 3091       if (VM_Version::supports_on_spin_wait() == false) {
 3092         return false;
 3093       }
 3094       break;
 3095     case Op_MulVB:
 3096     case Op_LShiftVB:
 3097     case Op_RShiftVB:
 3098     case Op_URShiftVB:
 3099     case Op_VectorInsert:
 3100     case Op_VectorLoadMask:
 3101     case Op_VectorStoreMask:
 3102     case Op_VectorBlend:
 3103       if (UseSSE < 4) {
 3104         return false;
 3105       }
 3106       break;
 3107     case Op_MaxD:
 3108     case Op_MaxF:
 3109     case Op_MinD:
 3110     case Op_MinF:
 3111       if (UseAVX < 1) { // enabled for AVX only
 3112         return false;
 3113       }
 3114       break;
 3115     case Op_CacheWB:
 3116     case Op_CacheWBPreSync:
 3117     case Op_CacheWBPostSync:
 3118       if (!VM_Version::supports_data_cache_line_flush()) {
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_ExtractB:
 3123     case Op_ExtractL:
 3124     case Op_ExtractI:
 3125     case Op_RoundDoubleMode:
 3126       if (UseSSE < 4) {
 3127         return false;
 3128       }
 3129       break;
 3130     case Op_RoundDoubleModeV:
 3131       if (VM_Version::supports_avx() == false) {
 3132         return false; // 128bit vroundpd is not available
 3133       }
 3134       break;
 3135     case Op_LoadVectorGather:
 3136     case Op_LoadVectorGatherMasked:
 3137       if (UseAVX < 2) {
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_FmaF:
 3142     case Op_FmaD:
 3143     case Op_FmaVD:
 3144     case Op_FmaVF:
 3145       if (!UseFMA) {
 3146         return false;
 3147       }
 3148       break;
 3149     case Op_MacroLogicV:
 3150       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3151         return false;
 3152       }
 3153       break;
 3154 
 3155     case Op_VectorCmpMasked:
 3156     case Op_VectorMaskGen:
 3157       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3158         return false;
 3159       }
 3160       break;
 3161     case Op_VectorMaskFirstTrue:
 3162     case Op_VectorMaskLastTrue:
 3163     case Op_VectorMaskTrueCount:
 3164     case Op_VectorMaskToLong:
 3165       if (UseAVX < 1) {
 3166          return false;
 3167       }
 3168       break;
 3169     case Op_RoundF:
 3170     case Op_RoundD:
 3171       break;
 3172     case Op_CopySignD:
 3173     case Op_CopySignF:
 3174       if (UseAVX < 3)  {
 3175         return false;
 3176       }
 3177       if (!VM_Version::supports_avx512vl()) {
 3178         return false;
 3179       }
 3180       break;
 3181     case Op_CompressBits:
 3182     case Op_ExpandBits:
 3183       if (!VM_Version::supports_bmi2()) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_CompressM:
 3188       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3189         return false;
 3190       }
 3191       break;
 3192     case Op_ConvF2HF:
 3193     case Op_ConvHF2F:
 3194       if (!VM_Version::supports_float16()) {
 3195         return false;
 3196       }
 3197       break;
 3198     case Op_VectorCastF2HF:
 3199     case Op_VectorCastHF2F:
 3200       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3201         return false;
 3202       }
 3203       break;
 3204   }
 3205   return true;  // Match rules are supported by default.
 3206 }
 3207 
 3208 //------------------------------------------------------------------------
 3209 
 3210 static inline bool is_pop_count_instr_target(BasicType bt) {
 3211   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3212          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3213 }
 3214 
 3215 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3216   return match_rule_supported_vector(opcode, vlen, bt);
 3217 }
 3218 
 3219 // Identify extra cases that we might want to provide match rules for vector nodes and
 3220 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3221 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3222   if (!match_rule_supported(opcode)) {
 3223     return false;
 3224   }
 3225   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3226   //   * SSE2 supports 128bit vectors for all types;
 3227   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3228   //   * AVX2 supports 256bit vectors for all types;
 3229   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3230   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3231   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3232   // And MaxVectorSize is taken into account as well.
 3233   if (!vector_size_supported(bt, vlen)) {
 3234     return false;
 3235   }
 3236   // Special cases which require vector length follow:
 3237   //   * implementation limitations
 3238   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3239   //   * 128bit vroundpd instruction is present only in AVX1
 3240   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3241   switch (opcode) {
 3242     case Op_MaxVHF:
 3243     case Op_MinVHF:
 3244       if (!VM_Version::supports_avx512bw()) {
 3245         return false;
 3246       }
 3247     case Op_AddVHF:
 3248     case Op_DivVHF:
 3249     case Op_FmaVHF:
 3250     case Op_MulVHF:
 3251     case Op_SubVHF:
 3252     case Op_SqrtVHF:
 3253       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3254         return false;
 3255       }
 3256       if (!VM_Version::supports_avx512_fp16()) {
 3257         return false;
 3258       }
 3259       break;
 3260     case Op_AbsVF:
 3261     case Op_NegVF:
 3262       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3263         return false; // 512bit vandps and vxorps are not available
 3264       }
 3265       break;
 3266     case Op_AbsVD:
 3267     case Op_NegVD:
 3268       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3269         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3270       }
 3271       break;
 3272     case Op_RotateRightV:
 3273     case Op_RotateLeftV:
 3274       if (bt != T_INT && bt != T_LONG) {
 3275         return false;
 3276       } // fallthrough
 3277     case Op_MacroLogicV:
 3278       if (!VM_Version::supports_evex() ||
 3279           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3280         return false;
 3281       }
 3282       break;
 3283     case Op_ClearArray:
 3284     case Op_VectorMaskGen:
 3285     case Op_VectorCmpMasked:
 3286       if (!VM_Version::supports_avx512bw()) {
 3287         return false;
 3288       }
 3289       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3290         return false;
 3291       }
 3292       break;
 3293     case Op_LoadVectorMasked:
 3294     case Op_StoreVectorMasked:
 3295       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3296         return false;
 3297       }
 3298       break;
 3299     case Op_UMinV:
 3300     case Op_UMaxV:
 3301       if (UseAVX == 0) {
 3302         return false;
 3303       }
 3304       break;
 3305     case Op_MaxV:
 3306     case Op_MinV:
 3307       if (UseSSE < 4 && is_integral_type(bt)) {
 3308         return false;
 3309       }
 3310       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3311           // Float/Double intrinsics are enabled for AVX family currently.
 3312           if (UseAVX == 0) {
 3313             return false;
 3314           }
 3315           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3316             return false;
 3317           }
 3318       }
 3319       break;
 3320     case Op_CallLeafVector:
 3321       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_AddReductionVI:
 3326       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3327         return false;
 3328       }
 3329       // fallthrough
 3330     case Op_AndReductionV:
 3331     case Op_OrReductionV:
 3332     case Op_XorReductionV:
 3333       if (is_subword_type(bt) && (UseSSE < 4)) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_MinReductionV:
 3338     case Op_MaxReductionV:
 3339       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3340         return false;
 3341       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3342         return false;
 3343       }
 3344       // Float/Double intrinsics enabled for AVX family.
 3345       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3346         return false;
 3347       }
 3348       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_VectorTest:
 3353       if (UseSSE < 4) {
 3354         return false; // Implementation limitation
 3355       } else if (size_in_bits < 32) {
 3356         return false; // Implementation limitation
 3357       }
 3358       break;
 3359     case Op_VectorLoadShuffle:
 3360     case Op_VectorRearrange:
 3361       if(vlen == 2) {
 3362         return false; // Implementation limitation due to how shuffle is loaded
 3363       } else if (size_in_bits == 256 && UseAVX < 2) {
 3364         return false; // Implementation limitation
 3365       }
 3366       break;
 3367     case Op_VectorLoadMask:
 3368     case Op_VectorMaskCast:
 3369       if (size_in_bits == 256 && UseAVX < 2) {
 3370         return false; // Implementation limitation
 3371       }
 3372       // fallthrough
 3373     case Op_VectorStoreMask:
 3374       if (vlen == 2) {
 3375         return false; // Implementation limitation
 3376       }
 3377       break;
 3378     case Op_PopulateIndex:
 3379       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3380         return false;
 3381       }
 3382       break;
 3383     case Op_VectorCastB2X:
 3384     case Op_VectorCastS2X:
 3385     case Op_VectorCastI2X:
 3386       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorCastL2X:
 3391       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3392         return false;
 3393       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3394         return false;
 3395       }
 3396       break;
 3397     case Op_VectorCastF2X: {
 3398         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3399         // happen after intermediate conversion to integer and special handling
 3400         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3401         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3402         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3403           return false;
 3404         }
 3405       }
 3406       // fallthrough
 3407     case Op_VectorCastD2X:
 3408       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3409         return false;
 3410       }
 3411       break;
 3412     case Op_VectorCastF2HF:
 3413     case Op_VectorCastHF2F:
 3414       if (!VM_Version::supports_f16c() &&
 3415          ((!VM_Version::supports_evex() ||
 3416          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3417         return false;
 3418       }
 3419       break;
 3420     case Op_RoundVD:
 3421       if (!VM_Version::supports_avx512dq()) {
 3422         return false;
 3423       }
 3424       break;
 3425     case Op_MulReductionVI:
 3426       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3427         return false;
 3428       }
 3429       break;
 3430     case Op_LoadVectorGatherMasked:
 3431       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3432         return false;
 3433       }
 3434       if (is_subword_type(bt) &&
 3435          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3436           (size_in_bits < 64)                                      ||
 3437           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3438         return false;
 3439       }
 3440       break;
 3441     case Op_StoreVectorScatterMasked:
 3442     case Op_StoreVectorScatter:
 3443       if (is_subword_type(bt)) {
 3444         return false;
 3445       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3446         return false;
 3447       }
 3448       // fallthrough
 3449     case Op_LoadVectorGather:
 3450       if (!is_subword_type(bt) && size_in_bits == 64) {
 3451         return false;
 3452       }
 3453       if (is_subword_type(bt) && size_in_bits < 64) {
 3454         return false;
 3455       }
 3456       break;
 3457     case Op_SaturatingAddV:
 3458     case Op_SaturatingSubV:
 3459       if (UseAVX < 1) {
 3460         return false; // Implementation limitation
 3461       }
 3462       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3463         return false;
 3464       }
 3465       break;
 3466     case Op_SelectFromTwoVector:
 3467        if (size_in_bits < 128) {
 3468          return false;
 3469        }
 3470        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3471          return false;
 3472        }
 3473        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3474          return false;
 3475        }
 3476        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3477          return false;
 3478        }
 3479        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3480          return false;
 3481        }
 3482        break;
 3483     case Op_MaskAll:
 3484       if (!VM_Version::supports_evex()) {
 3485         return false;
 3486       }
 3487       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3488         return false;
 3489       }
 3490       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3491         return false;
 3492       }
 3493       break;
 3494     case Op_VectorMaskCmp:
 3495       if (vlen < 2 || size_in_bits < 32) {
 3496         return false;
 3497       }
 3498       break;
 3499     case Op_CompressM:
 3500       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3501         return false;
 3502       }
 3503       break;
 3504     case Op_CompressV:
 3505     case Op_ExpandV:
 3506       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3507         return false;
 3508       }
 3509       if (size_in_bits < 128 ) {
 3510         return false;
 3511       }
 3512     case Op_VectorLongToMask:
 3513       if (UseAVX < 1) {
 3514         return false;
 3515       }
 3516       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3517         return false;
 3518       }
 3519       break;
 3520     case Op_SignumVD:
 3521     case Op_SignumVF:
 3522       if (UseAVX < 1) {
 3523         return false;
 3524       }
 3525       break;
 3526     case Op_PopCountVI:
 3527     case Op_PopCountVL: {
 3528         if (!is_pop_count_instr_target(bt) &&
 3529             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3530           return false;
 3531         }
 3532       }
 3533       break;
 3534     case Op_ReverseV:
 3535     case Op_ReverseBytesV:
 3536       if (UseAVX < 2) {
 3537         return false;
 3538       }
 3539       break;
 3540     case Op_CountTrailingZerosV:
 3541     case Op_CountLeadingZerosV:
 3542       if (UseAVX < 2) {
 3543         return false;
 3544       }
 3545       break;
 3546   }
 3547   return true;  // Per default match rules are supported.
 3548 }
 3549 
 3550 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3551   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3552   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3553   // of their non-masked counterpart with mask edge being the differentiator.
 3554   // This routine does a strict check on the existence of masked operation patterns
 3555   // by returning a default false value for all the other opcodes apart from the
 3556   // ones whose masked instruction patterns are defined in this file.
 3557   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3558     return false;
 3559   }
 3560 
 3561   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3562   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3563     return false;
 3564   }
 3565   switch(opcode) {
 3566     // Unary masked operations
 3567     case Op_AbsVB:
 3568     case Op_AbsVS:
 3569       if(!VM_Version::supports_avx512bw()) {
 3570         return false;  // Implementation limitation
 3571       }
 3572     case Op_AbsVI:
 3573     case Op_AbsVL:
 3574       return true;
 3575 
 3576     // Ternary masked operations
 3577     case Op_FmaVF:
 3578     case Op_FmaVD:
 3579       return true;
 3580 
 3581     case Op_MacroLogicV:
 3582       if(bt != T_INT && bt != T_LONG) {
 3583         return false;
 3584       }
 3585       return true;
 3586 
 3587     // Binary masked operations
 3588     case Op_AddVB:
 3589     case Op_AddVS:
 3590     case Op_SubVB:
 3591     case Op_SubVS:
 3592     case Op_MulVS:
 3593     case Op_LShiftVS:
 3594     case Op_RShiftVS:
 3595     case Op_URShiftVS:
 3596       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3597       if (!VM_Version::supports_avx512bw()) {
 3598         return false;  // Implementation limitation
 3599       }
 3600       return true;
 3601 
 3602     case Op_MulVL:
 3603       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3604       if (!VM_Version::supports_avx512dq()) {
 3605         return false;  // Implementation limitation
 3606       }
 3607       return true;
 3608 
 3609     case Op_AndV:
 3610     case Op_OrV:
 3611     case Op_XorV:
 3612     case Op_RotateRightV:
 3613     case Op_RotateLeftV:
 3614       if (bt != T_INT && bt != T_LONG) {
 3615         return false; // Implementation limitation
 3616       }
 3617       return true;
 3618 
 3619     case Op_VectorLoadMask:
 3620       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3621       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3622         return false;
 3623       }
 3624       return true;
 3625 
 3626     case Op_AddVI:
 3627     case Op_AddVL:
 3628     case Op_AddVF:
 3629     case Op_AddVD:
 3630     case Op_SubVI:
 3631     case Op_SubVL:
 3632     case Op_SubVF:
 3633     case Op_SubVD:
 3634     case Op_MulVI:
 3635     case Op_MulVF:
 3636     case Op_MulVD:
 3637     case Op_DivVF:
 3638     case Op_DivVD:
 3639     case Op_SqrtVF:
 3640     case Op_SqrtVD:
 3641     case Op_LShiftVI:
 3642     case Op_LShiftVL:
 3643     case Op_RShiftVI:
 3644     case Op_RShiftVL:
 3645     case Op_URShiftVI:
 3646     case Op_URShiftVL:
 3647     case Op_LoadVectorMasked:
 3648     case Op_StoreVectorMasked:
 3649     case Op_LoadVectorGatherMasked:
 3650     case Op_StoreVectorScatterMasked:
 3651       return true;
 3652 
 3653     case Op_UMinV:
 3654     case Op_UMaxV:
 3655       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3656         return false;
 3657       } // fallthrough
 3658     case Op_MaxV:
 3659     case Op_MinV:
 3660       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3661         return false; // Implementation limitation
 3662       }
 3663       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3664         return false; // Implementation limitation
 3665       }
 3666       return true;
 3667     case Op_SaturatingAddV:
 3668     case Op_SaturatingSubV:
 3669       if (!is_subword_type(bt)) {
 3670         return false;
 3671       }
 3672       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3673         return false; // Implementation limitation
 3674       }
 3675       return true;
 3676 
 3677     case Op_VectorMaskCmp:
 3678       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3679         return false; // Implementation limitation
 3680       }
 3681       return true;
 3682 
 3683     case Op_VectorRearrange:
 3684       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3685         return false; // Implementation limitation
 3686       }
 3687       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3688         return false; // Implementation limitation
 3689       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3690         return false; // Implementation limitation
 3691       }
 3692       return true;
 3693 
 3694     // Binary Logical operations
 3695     case Op_AndVMask:
 3696     case Op_OrVMask:
 3697     case Op_XorVMask:
 3698       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3699         return false; // Implementation limitation
 3700       }
 3701       return true;
 3702 
 3703     case Op_PopCountVI:
 3704     case Op_PopCountVL:
 3705       if (!is_pop_count_instr_target(bt)) {
 3706         return false;
 3707       }
 3708       return true;
 3709 
 3710     case Op_MaskAll:
 3711       return true;
 3712 
 3713     case Op_CountLeadingZerosV:
 3714       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3715         return true;
 3716       }
 3717     default:
 3718       return false;
 3719   }
 3720 }
 3721 
 3722 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3723   return false;
 3724 }
 3725 
 3726 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3727 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3728   switch (elem_bt) {
 3729     case T_BYTE:  return false;
 3730     case T_SHORT: return !VM_Version::supports_avx512bw();
 3731     case T_INT:   return !VM_Version::supports_avx();
 3732     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3733     default:
 3734       ShouldNotReachHere();
 3735       return false;
 3736   }
 3737 }
 3738 
 3739 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3740   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3741   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3742   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3743       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3744     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3745     return new legVecZOper();
 3746   }
 3747   if (legacy) {
 3748     switch (ideal_reg) {
 3749       case Op_VecS: return new legVecSOper();
 3750       case Op_VecD: return new legVecDOper();
 3751       case Op_VecX: return new legVecXOper();
 3752       case Op_VecY: return new legVecYOper();
 3753       case Op_VecZ: return new legVecZOper();
 3754     }
 3755   } else {
 3756     switch (ideal_reg) {
 3757       case Op_VecS: return new vecSOper();
 3758       case Op_VecD: return new vecDOper();
 3759       case Op_VecX: return new vecXOper();
 3760       case Op_VecY: return new vecYOper();
 3761       case Op_VecZ: return new vecZOper();
 3762     }
 3763   }
 3764   ShouldNotReachHere();
 3765   return nullptr;
 3766 }
 3767 
 3768 bool Matcher::is_reg2reg_move(MachNode* m) {
 3769   switch (m->rule()) {
 3770     case MoveVec2Leg_rule:
 3771     case MoveLeg2Vec_rule:
 3772     case MoveF2VL_rule:
 3773     case MoveF2LEG_rule:
 3774     case MoveVL2F_rule:
 3775     case MoveLEG2F_rule:
 3776     case MoveD2VL_rule:
 3777     case MoveD2LEG_rule:
 3778     case MoveVL2D_rule:
 3779     case MoveLEG2D_rule:
 3780       return true;
 3781     default:
 3782       return false;
 3783   }
 3784 }
 3785 
 3786 bool Matcher::is_generic_vector(MachOper* opnd) {
 3787   switch (opnd->opcode()) {
 3788     case VEC:
 3789     case LEGVEC:
 3790       return true;
 3791     default:
 3792       return false;
 3793   }
 3794 }
 3795 
 3796 //------------------------------------------------------------------------
 3797 
 3798 const RegMask* Matcher::predicate_reg_mask(void) {
 3799   return &_VECTMASK_REG_mask;
 3800 }
 3801 
 3802 // Max vector size in bytes. 0 if not supported.
 3803 int Matcher::vector_width_in_bytes(BasicType bt) {
 3804   assert(is_java_primitive(bt), "only primitive type vectors");
 3805   // SSE2 supports 128bit vectors for all types.
 3806   // AVX2 supports 256bit vectors for all types.
 3807   // AVX2/EVEX supports 512bit vectors for all types.
 3808   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3809   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3810   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3811     size = (UseAVX > 2) ? 64 : 32;
 3812   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3813     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3814   // Use flag to limit vector size.
 3815   size = MIN2(size,(int)MaxVectorSize);
 3816   // Minimum 2 values in vector (or 4 for bytes).
 3817   switch (bt) {
 3818   case T_DOUBLE:
 3819   case T_LONG:
 3820     if (size < 16) return 0;
 3821     break;
 3822   case T_FLOAT:
 3823   case T_INT:
 3824     if (size < 8) return 0;
 3825     break;
 3826   case T_BOOLEAN:
 3827     if (size < 4) return 0;
 3828     break;
 3829   case T_CHAR:
 3830     if (size < 4) return 0;
 3831     break;
 3832   case T_BYTE:
 3833     if (size < 4) return 0;
 3834     break;
 3835   case T_SHORT:
 3836     if (size < 4) return 0;
 3837     break;
 3838   default:
 3839     ShouldNotReachHere();
 3840   }
 3841   return size;
 3842 }
 3843 
 3844 // Limits on vector size (number of elements) loaded into vector.
 3845 int Matcher::max_vector_size(const BasicType bt) {
 3846   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3847 }
 3848 int Matcher::min_vector_size(const BasicType bt) {
 3849   int max_size = max_vector_size(bt);
 3850   // Min size which can be loaded into vector is 4 bytes.
 3851   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3852   // Support for calling svml double64 vectors
 3853   if (bt == T_DOUBLE) {
 3854     size = 1;
 3855   }
 3856   return MIN2(size,max_size);
 3857 }
 3858 
 3859 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3860   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3861   // by default on Cascade Lake
 3862   if (VM_Version::is_default_intel_cascade_lake()) {
 3863     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3864   }
 3865   return Matcher::max_vector_size(bt);
 3866 }
 3867 
 3868 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3869   return -1;
 3870 }
 3871 
 3872 // Vector ideal reg corresponding to specified size in bytes
 3873 uint Matcher::vector_ideal_reg(int size) {
 3874   assert(MaxVectorSize >= size, "");
 3875   switch(size) {
 3876     case  4: return Op_VecS;
 3877     case  8: return Op_VecD;
 3878     case 16: return Op_VecX;
 3879     case 32: return Op_VecY;
 3880     case 64: return Op_VecZ;
 3881   }
 3882   ShouldNotReachHere();
 3883   return 0;
 3884 }
 3885 
 3886 // Check for shift by small constant as well
 3887 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3888   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3889       shift->in(2)->get_int() <= 3 &&
 3890       // Are there other uses besides address expressions?
 3891       !matcher->is_visited(shift)) {
 3892     address_visited.set(shift->_idx); // Flag as address_visited
 3893     mstack.push(shift->in(2), Matcher::Visit);
 3894     Node *conv = shift->in(1);
 3895     // Allow Matcher to match the rule which bypass
 3896     // ConvI2L operation for an array index on LP64
 3897     // if the index value is positive.
 3898     if (conv->Opcode() == Op_ConvI2L &&
 3899         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3900         // Are there other uses besides address expressions?
 3901         !matcher->is_visited(conv)) {
 3902       address_visited.set(conv->_idx); // Flag as address_visited
 3903       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3904     } else {
 3905       mstack.push(conv, Matcher::Pre_Visit);
 3906     }
 3907     return true;
 3908   }
 3909   return false;
 3910 }
 3911 
 3912 // This function identifies sub-graphs in which a 'load' node is
 3913 // input to two different nodes, and such that it can be matched
 3914 // with BMI instructions like blsi, blsr, etc.
 3915 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3916 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3917 // refers to the same node.
 3918 //
 3919 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3920 // This is a temporary solution until we make DAGs expressible in ADL.
 3921 template<typename ConType>
 3922 class FusedPatternMatcher {
 3923   Node* _op1_node;
 3924   Node* _mop_node;
 3925   int _con_op;
 3926 
 3927   static int match_next(Node* n, int next_op, int next_op_idx) {
 3928     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3929       return -1;
 3930     }
 3931 
 3932     if (next_op_idx == -1) { // n is commutative, try rotations
 3933       if (n->in(1)->Opcode() == next_op) {
 3934         return 1;
 3935       } else if (n->in(2)->Opcode() == next_op) {
 3936         return 2;
 3937       }
 3938     } else {
 3939       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3940       if (n->in(next_op_idx)->Opcode() == next_op) {
 3941         return next_op_idx;
 3942       }
 3943     }
 3944     return -1;
 3945   }
 3946 
 3947  public:
 3948   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3949     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3950 
 3951   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3952              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 3953              typename ConType::NativeType con_value) {
 3954     if (_op1_node->Opcode() != op1) {
 3955       return false;
 3956     }
 3957     if (_mop_node->outcnt() > 2) {
 3958       return false;
 3959     }
 3960     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 3961     if (op1_op2_idx == -1) {
 3962       return false;
 3963     }
 3964     // Memory operation must be the other edge
 3965     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 3966 
 3967     // Check that the mop node is really what we want
 3968     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 3969       Node* op2_node = _op1_node->in(op1_op2_idx);
 3970       if (op2_node->outcnt() > 1) {
 3971         return false;
 3972       }
 3973       assert(op2_node->Opcode() == op2, "Should be");
 3974       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 3975       if (op2_con_idx == -1) {
 3976         return false;
 3977       }
 3978       // Memory operation must be the other edge
 3979       int op2_mop_idx = (op2_con_idx & 1) + 1;
 3980       // Check that the memory operation is the same node
 3981       if (op2_node->in(op2_mop_idx) == _mop_node) {
 3982         // Now check the constant
 3983         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 3984         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 3985           return true;
 3986         }
 3987       }
 3988     }
 3989     return false;
 3990   }
 3991 };
 3992 
 3993 static bool is_bmi_pattern(Node* n, Node* m) {
 3994   assert(UseBMI1Instructions, "sanity");
 3995   if (n != nullptr && m != nullptr) {
 3996     if (m->Opcode() == Op_LoadI) {
 3997       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 3998       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 3999              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4000              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4001     } else if (m->Opcode() == Op_LoadL) {
 4002       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4003       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4004              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4005              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4006     }
 4007   }
 4008   return false;
 4009 }
 4010 
 4011 // Should the matcher clone input 'm' of node 'n'?
 4012 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4013   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4014   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4015     mstack.push(m, Visit);
 4016     return true;
 4017   }
 4018   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4019     mstack.push(m, Visit);           // m = ShiftCntV
 4020     return true;
 4021   }
 4022   if (is_encode_and_store_pattern(n, m)) {
 4023     mstack.push(m, Visit);
 4024     return true;
 4025   }
 4026   return false;
 4027 }
 4028 
 4029 // Should the Matcher clone shifts on addressing modes, expecting them
 4030 // to be subsumed into complex addressing expressions or compute them
 4031 // into registers?
 4032 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4033   Node *off = m->in(AddPNode::Offset);
 4034   if (off->is_Con()) {
 4035     address_visited.test_set(m->_idx); // Flag as address_visited
 4036     Node *adr = m->in(AddPNode::Address);
 4037 
 4038     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4039     // AtomicAdd is not an addressing expression.
 4040     // Cheap to find it by looking for screwy base.
 4041     if (adr->is_AddP() &&
 4042         !adr->in(AddPNode::Base)->is_top() &&
 4043         !adr->in(AddPNode::Offset)->is_Con() &&
 4044         off->get_long() == (int) (off->get_long()) && // immL32
 4045         // Are there other uses besides address expressions?
 4046         !is_visited(adr)) {
 4047       address_visited.set(adr->_idx); // Flag as address_visited
 4048       Node *shift = adr->in(AddPNode::Offset);
 4049       if (!clone_shift(shift, this, mstack, address_visited)) {
 4050         mstack.push(shift, Pre_Visit);
 4051       }
 4052       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4053       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4054     } else {
 4055       mstack.push(adr, Pre_Visit);
 4056     }
 4057 
 4058     // Clone X+offset as it also folds into most addressing expressions
 4059     mstack.push(off, Visit);
 4060     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4061     return true;
 4062   } else if (clone_shift(off, this, mstack, address_visited)) {
 4063     address_visited.test_set(m->_idx); // Flag as address_visited
 4064     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4065     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4066     return true;
 4067   }
 4068   return false;
 4069 }
 4070 
 4071 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4072   switch (bt) {
 4073     case BoolTest::eq:
 4074       return Assembler::eq;
 4075     case BoolTest::ne:
 4076       return Assembler::neq;
 4077     case BoolTest::le:
 4078     case BoolTest::ule:
 4079       return Assembler::le;
 4080     case BoolTest::ge:
 4081     case BoolTest::uge:
 4082       return Assembler::nlt;
 4083     case BoolTest::lt:
 4084     case BoolTest::ult:
 4085       return Assembler::lt;
 4086     case BoolTest::gt:
 4087     case BoolTest::ugt:
 4088       return Assembler::nle;
 4089     default : ShouldNotReachHere(); return Assembler::_false;
 4090   }
 4091 }
 4092 
 4093 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4094   switch (bt) {
 4095   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4096   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4097   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4098   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4099   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4100   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4101   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4102   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4103   }
 4104 }
 4105 
 4106 // Helper methods for MachSpillCopyNode::implementation().
 4107 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4108                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4109   assert(ireg == Op_VecS || // 32bit vector
 4110          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4111           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4112          "no non-adjacent vector moves" );
 4113   if (masm) {
 4114     switch (ireg) {
 4115     case Op_VecS: // copy whole register
 4116     case Op_VecD:
 4117     case Op_VecX:
 4118       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4119         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4120       } else {
 4121         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4122      }
 4123       break;
 4124     case Op_VecY:
 4125       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4126         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4127       } else {
 4128         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4129      }
 4130       break;
 4131     case Op_VecZ:
 4132       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4133       break;
 4134     default:
 4135       ShouldNotReachHere();
 4136     }
 4137 #ifndef PRODUCT
 4138   } else {
 4139     switch (ireg) {
 4140     case Op_VecS:
 4141     case Op_VecD:
 4142     case Op_VecX:
 4143       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4144       break;
 4145     case Op_VecY:
 4146     case Op_VecZ:
 4147       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4148       break;
 4149     default:
 4150       ShouldNotReachHere();
 4151     }
 4152 #endif
 4153   }
 4154 }
 4155 
 4156 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4157                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4158   if (masm) {
 4159     if (is_load) {
 4160       switch (ireg) {
 4161       case Op_VecS:
 4162         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4163         break;
 4164       case Op_VecD:
 4165         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4166         break;
 4167       case Op_VecX:
 4168         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4169           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4170         } else {
 4171           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4172           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4173         }
 4174         break;
 4175       case Op_VecY:
 4176         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4177           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4178         } else {
 4179           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4180           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4181         }
 4182         break;
 4183       case Op_VecZ:
 4184         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4185         break;
 4186       default:
 4187         ShouldNotReachHere();
 4188       }
 4189     } else { // store
 4190       switch (ireg) {
 4191       case Op_VecS:
 4192         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4193         break;
 4194       case Op_VecD:
 4195         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4196         break;
 4197       case Op_VecX:
 4198         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4199           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4200         }
 4201         else {
 4202           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4203         }
 4204         break;
 4205       case Op_VecY:
 4206         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4207           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4208         }
 4209         else {
 4210           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4211         }
 4212         break;
 4213       case Op_VecZ:
 4214         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4215         break;
 4216       default:
 4217         ShouldNotReachHere();
 4218       }
 4219     }
 4220 #ifndef PRODUCT
 4221   } else {
 4222     if (is_load) {
 4223       switch (ireg) {
 4224       case Op_VecS:
 4225         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4226         break;
 4227       case Op_VecD:
 4228         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4229         break;
 4230        case Op_VecX:
 4231         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4232         break;
 4233       case Op_VecY:
 4234       case Op_VecZ:
 4235         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4236         break;
 4237       default:
 4238         ShouldNotReachHere();
 4239       }
 4240     } else { // store
 4241       switch (ireg) {
 4242       case Op_VecS:
 4243         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4244         break;
 4245       case Op_VecD:
 4246         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4247         break;
 4248        case Op_VecX:
 4249         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4250         break;
 4251       case Op_VecY:
 4252       case Op_VecZ:
 4253         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4254         break;
 4255       default:
 4256         ShouldNotReachHere();
 4257       }
 4258     }
 4259 #endif
 4260   }
 4261 }
 4262 
 4263 template <class T>
 4264 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4265   int size = type2aelembytes(bt) * len;
 4266   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4267   for (int i = 0; i < len; i++) {
 4268     int offset = i * type2aelembytes(bt);
 4269     switch (bt) {
 4270       case T_BYTE: val->at(i) = con; break;
 4271       case T_SHORT: {
 4272         jshort c = con;
 4273         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4274         break;
 4275       }
 4276       case T_INT: {
 4277         jint c = con;
 4278         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4279         break;
 4280       }
 4281       case T_LONG: {
 4282         jlong c = con;
 4283         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4284         break;
 4285       }
 4286       case T_FLOAT: {
 4287         jfloat c = con;
 4288         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4289         break;
 4290       }
 4291       case T_DOUBLE: {
 4292         jdouble c = con;
 4293         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4294         break;
 4295       }
 4296       default: assert(false, "%s", type2name(bt));
 4297     }
 4298   }
 4299   return val;
 4300 }
 4301 
 4302 static inline jlong high_bit_set(BasicType bt) {
 4303   switch (bt) {
 4304     case T_BYTE:  return 0x8080808080808080;
 4305     case T_SHORT: return 0x8000800080008000;
 4306     case T_INT:   return 0x8000000080000000;
 4307     case T_LONG:  return 0x8000000000000000;
 4308     default:
 4309       ShouldNotReachHere();
 4310       return 0;
 4311   }
 4312 }
 4313 
 4314 #ifndef PRODUCT
 4315   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4316     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4317   }
 4318 #endif
 4319 
 4320   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4321     __ nop(_count);
 4322   }
 4323 
 4324   uint MachNopNode::size(PhaseRegAlloc*) const {
 4325     return _count;
 4326   }
 4327 
 4328 #ifndef PRODUCT
 4329   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4330     st->print("# breakpoint");
 4331   }
 4332 #endif
 4333 
 4334   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4335     __ int3();
 4336   }
 4337 
 4338   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4339     return MachNode::size(ra_);
 4340   }
 4341 
 4342 %}
 4343 
 4344 //----------ENCODING BLOCK-----------------------------------------------------
 4345 // This block specifies the encoding classes used by the compiler to
 4346 // output byte streams.  Encoding classes are parameterized macros
 4347 // used by Machine Instruction Nodes in order to generate the bit
 4348 // encoding of the instruction.  Operands specify their base encoding
 4349 // interface with the interface keyword.  There are currently
 4350 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4351 // COND_INTER.  REG_INTER causes an operand to generate a function
 4352 // which returns its register number when queried.  CONST_INTER causes
 4353 // an operand to generate a function which returns the value of the
 4354 // constant when queried.  MEMORY_INTER causes an operand to generate
 4355 // four functions which return the Base Register, the Index Register,
 4356 // the Scale Value, and the Offset Value of the operand when queried.
 4357 // COND_INTER causes an operand to generate six functions which return
 4358 // the encoding code (ie - encoding bits for the instruction)
 4359 // associated with each basic boolean condition for a conditional
 4360 // instruction.
 4361 //
 4362 // Instructions specify two basic values for encoding.  Again, a
 4363 // function is available to check if the constant displacement is an
 4364 // oop. They use the ins_encode keyword to specify their encoding
 4365 // classes (which must be a sequence of enc_class names, and their
 4366 // parameters, specified in the encoding block), and they use the
 4367 // opcode keyword to specify, in order, their primary, secondary, and
 4368 // tertiary opcode.  Only the opcode sections which a particular
 4369 // instruction needs for encoding need to be specified.
 4370 encode %{
 4371   enc_class cdql_enc(no_rax_rdx_RegI div)
 4372   %{
 4373     // Full implementation of Java idiv and irem; checks for
 4374     // special case as described in JVM spec., p.243 & p.271.
 4375     //
 4376     //         normal case                           special case
 4377     //
 4378     // input : rax: dividend                         min_int
 4379     //         reg: divisor                          -1
 4380     //
 4381     // output: rax: quotient  (= rax idiv reg)       min_int
 4382     //         rdx: remainder (= rax irem reg)       0
 4383     //
 4384     //  Code sequnce:
 4385     //
 4386     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4387     //    5:   75 07/08                jne    e <normal>
 4388     //    7:   33 d2                   xor    %edx,%edx
 4389     //  [div >= 8 -> offset + 1]
 4390     //  [REX_B]
 4391     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4392     //    c:   74 03/04                je     11 <done>
 4393     // 000000000000000e <normal>:
 4394     //    e:   99                      cltd
 4395     //  [div >= 8 -> offset + 1]
 4396     //  [REX_B]
 4397     //    f:   f7 f9                   idiv   $div
 4398     // 0000000000000011 <done>:
 4399     Label normal;
 4400     Label done;
 4401 
 4402     // cmp    $0x80000000,%eax
 4403     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4404 
 4405     // jne    e <normal>
 4406     __ jccb(Assembler::notEqual, normal);
 4407 
 4408     // xor    %edx,%edx
 4409     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4410 
 4411     // cmp    $0xffffffffffffffff,%ecx
 4412     __ cmpl($div$$Register, -1);
 4413 
 4414     // je     11 <done>
 4415     __ jccb(Assembler::equal, done);
 4416 
 4417     // <normal>
 4418     // cltd
 4419     __ bind(normal);
 4420     __ cdql();
 4421 
 4422     // idivl
 4423     // <done>
 4424     __ idivl($div$$Register);
 4425     __ bind(done);
 4426   %}
 4427 
 4428   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4429   %{
 4430     // Full implementation of Java ldiv and lrem; checks for
 4431     // special case as described in JVM spec., p.243 & p.271.
 4432     //
 4433     //         normal case                           special case
 4434     //
 4435     // input : rax: dividend                         min_long
 4436     //         reg: divisor                          -1
 4437     //
 4438     // output: rax: quotient  (= rax idiv reg)       min_long
 4439     //         rdx: remainder (= rax irem reg)       0
 4440     //
 4441     //  Code sequnce:
 4442     //
 4443     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4444     //    7:   00 00 80
 4445     //    a:   48 39 d0                cmp    %rdx,%rax
 4446     //    d:   75 08                   jne    17 <normal>
 4447     //    f:   33 d2                   xor    %edx,%edx
 4448     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4449     //   15:   74 05                   je     1c <done>
 4450     // 0000000000000017 <normal>:
 4451     //   17:   48 99                   cqto
 4452     //   19:   48 f7 f9                idiv   $div
 4453     // 000000000000001c <done>:
 4454     Label normal;
 4455     Label done;
 4456 
 4457     // mov    $0x8000000000000000,%rdx
 4458     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4459 
 4460     // cmp    %rdx,%rax
 4461     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4462 
 4463     // jne    17 <normal>
 4464     __ jccb(Assembler::notEqual, normal);
 4465 
 4466     // xor    %edx,%edx
 4467     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4468 
 4469     // cmp    $0xffffffffffffffff,$div
 4470     __ cmpq($div$$Register, -1);
 4471 
 4472     // je     1e <done>
 4473     __ jccb(Assembler::equal, done);
 4474 
 4475     // <normal>
 4476     // cqto
 4477     __ bind(normal);
 4478     __ cdqq();
 4479 
 4480     // idivq (note: must be emitted by the user of this rule)
 4481     // <done>
 4482     __ idivq($div$$Register);
 4483     __ bind(done);
 4484   %}
 4485 
 4486   enc_class clear_avx %{
 4487     DEBUG_ONLY(int off0 = __ offset());
 4488     if (generate_vzeroupper(Compile::current())) {
 4489       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4490       // Clear upper bits of YMM registers when current compiled code uses
 4491       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4492       __ vzeroupper();
 4493     }
 4494     DEBUG_ONLY(int off1 = __ offset());
 4495     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4496   %}
 4497 
 4498   enc_class Java_To_Runtime(method meth) %{
 4499     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4500     __ call(r10);
 4501     __ post_call_nop();
 4502   %}
 4503 
 4504   enc_class Java_Static_Call(method meth)
 4505   %{
 4506     // JAVA STATIC CALL
 4507     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4508     // determine who we intended to call.
 4509     if (!_method) {
 4510       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4511     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4512       // The NOP here is purely to ensure that eliding a call to
 4513       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4514       __ addr_nop_5();
 4515       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4516     } else {
 4517       int method_index = resolved_method_index(masm);
 4518       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4519                                                   : static_call_Relocation::spec(method_index);
 4520       address mark = __ pc();
 4521       int call_offset = __ offset();
 4522       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4523       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4524         // Calls of the same statically bound method can share
 4525         // a stub to the interpreter.
 4526         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4527       } else {
 4528         // Emit stubs for static call.
 4529         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4530         __ clear_inst_mark();
 4531         if (stub == nullptr) {
 4532           ciEnv::current()->record_failure("CodeCache is full");
 4533           return;
 4534         }
 4535       }
 4536     }
 4537     __ post_call_nop();
 4538   %}
 4539 
 4540   enc_class Java_Dynamic_Call(method meth) %{
 4541     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4542     __ post_call_nop();
 4543   %}
 4544 
 4545   enc_class call_epilog %{
 4546     if (VerifyStackAtCalls) {
 4547       // Check that stack depth is unchanged: find majik cookie on stack
 4548       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4549       Label L;
 4550       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4551       __ jccb(Assembler::equal, L);
 4552       // Die if stack mismatch
 4553       __ int3();
 4554       __ bind(L);
 4555     }
 4556   %}
 4557 
 4558 %}
 4559 
 4560 //----------FRAME--------------------------------------------------------------
 4561 // Definition of frame structure and management information.
 4562 //
 4563 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4564 //                             |   (to get allocators register number
 4565 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4566 //  r   CALLER     |        |
 4567 //  o     |        +--------+      pad to even-align allocators stack-slot
 4568 //  w     V        |  pad0  |        numbers; owned by CALLER
 4569 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4570 //  h     ^        |   in   |  5
 4571 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4572 //  |     |        |        |  3
 4573 //  |     |        +--------+
 4574 //  V     |        | old out|      Empty on Intel, window on Sparc
 4575 //        |    old |preserve|      Must be even aligned.
 4576 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4577 //        |        |   in   |  3   area for Intel ret address
 4578 //     Owned by    |preserve|      Empty on Sparc.
 4579 //       SELF      +--------+
 4580 //        |        |  pad2  |  2   pad to align old SP
 4581 //        |        +--------+  1
 4582 //        |        | locks  |  0
 4583 //        |        +--------+----> OptoReg::stack0(), even aligned
 4584 //        |        |  pad1  | 11   pad to align new SP
 4585 //        |        +--------+
 4586 //        |        |        | 10
 4587 //        |        | spills |  9   spills
 4588 //        V        |        |  8   (pad0 slot for callee)
 4589 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4590 //        ^        |  out   |  7
 4591 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4592 //     Owned by    +--------+
 4593 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4594 //        |    new |preserve|      Must be even-aligned.
 4595 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4596 //        |        |        |
 4597 //
 4598 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4599 //         known from SELF's arguments and the Java calling convention.
 4600 //         Region 6-7 is determined per call site.
 4601 // Note 2: If the calling convention leaves holes in the incoming argument
 4602 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4603 //         are owned by the CALLEE.  Holes should not be necessary in the
 4604 //         incoming area, as the Java calling convention is completely under
 4605 //         the control of the AD file.  Doubles can be sorted and packed to
 4606 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4607 //         varargs C calling conventions.
 4608 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4609 //         even aligned with pad0 as needed.
 4610 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4611 //         region 6-11 is even aligned; it may be padded out more so that
 4612 //         the region from SP to FP meets the minimum stack alignment.
 4613 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4614 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4615 //         SP meets the minimum alignment.
 4616 
 4617 frame
 4618 %{
 4619   // These three registers define part of the calling convention
 4620   // between compiled code and the interpreter.
 4621   inline_cache_reg(RAX);                // Inline Cache Register
 4622 
 4623   // Optional: name the operand used by cisc-spilling to access
 4624   // [stack_pointer + offset]
 4625   cisc_spilling_operand_name(indOffset32);
 4626 
 4627   // Number of stack slots consumed by locking an object
 4628   sync_stack_slots(2);
 4629 
 4630   // Compiled code's Frame Pointer
 4631   frame_pointer(RSP);
 4632 
 4633   // Interpreter stores its frame pointer in a register which is
 4634   // stored to the stack by I2CAdaptors.
 4635   // I2CAdaptors convert from interpreted java to compiled java.
 4636   interpreter_frame_pointer(RBP);
 4637 
 4638   // Stack alignment requirement
 4639   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4640 
 4641   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4642   // for calls to C.  Supports the var-args backing area for register parms.
 4643   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4644 
 4645   // The after-PROLOG location of the return address.  Location of
 4646   // return address specifies a type (REG or STACK) and a number
 4647   // representing the register number (i.e. - use a register name) or
 4648   // stack slot.
 4649   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4650   // Otherwise, it is above the locks and verification slot and alignment word
 4651   return_addr(STACK - 2 +
 4652               align_up((Compile::current()->in_preserve_stack_slots() +
 4653                         Compile::current()->fixed_slots()),
 4654                        stack_alignment_in_slots()));
 4655 
 4656   // Location of compiled Java return values.  Same as C for now.
 4657   return_value
 4658   %{
 4659     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4660            "only return normal values");
 4661 
 4662     static const int lo[Op_RegL + 1] = {
 4663       0,
 4664       0,
 4665       RAX_num,  // Op_RegN
 4666       RAX_num,  // Op_RegI
 4667       RAX_num,  // Op_RegP
 4668       XMM0_num, // Op_RegF
 4669       XMM0_num, // Op_RegD
 4670       RAX_num   // Op_RegL
 4671     };
 4672     static const int hi[Op_RegL + 1] = {
 4673       0,
 4674       0,
 4675       OptoReg::Bad, // Op_RegN
 4676       OptoReg::Bad, // Op_RegI
 4677       RAX_H_num,    // Op_RegP
 4678       OptoReg::Bad, // Op_RegF
 4679       XMM0b_num,    // Op_RegD
 4680       RAX_H_num     // Op_RegL
 4681     };
 4682     // Excluded flags and vector registers.
 4683     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4684     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4685   %}
 4686 %}
 4687 
 4688 //----------ATTRIBUTES---------------------------------------------------------
 4689 //----------Operand Attributes-------------------------------------------------
 4690 op_attrib op_cost(0);        // Required cost attribute
 4691 
 4692 //----------Instruction Attributes---------------------------------------------
 4693 ins_attrib ins_cost(100);       // Required cost attribute
 4694 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4695 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4696                                 // a non-matching short branch variant
 4697                                 // of some long branch?
 4698 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4699                                 // be a power of 2) specifies the
 4700                                 // alignment that some part of the
 4701                                 // instruction (not necessarily the
 4702                                 // start) requires.  If > 1, a
 4703                                 // compute_padding() function must be
 4704                                 // provided for the instruction
 4705 
 4706 // Whether this node is expanded during code emission into a sequence of
 4707 // instructions and the first instruction can perform an implicit null check.
 4708 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4709 
 4710 //----------OPERANDS-----------------------------------------------------------
 4711 // Operand definitions must precede instruction definitions for correct parsing
 4712 // in the ADLC because operands constitute user defined types which are used in
 4713 // instruction definitions.
 4714 
 4715 //----------Simple Operands----------------------------------------------------
 4716 // Immediate Operands
 4717 // Integer Immediate
 4718 operand immI()
 4719 %{
 4720   match(ConI);
 4721 
 4722   op_cost(10);
 4723   format %{ %}
 4724   interface(CONST_INTER);
 4725 %}
 4726 
 4727 // Constant for test vs zero
 4728 operand immI_0()
 4729 %{
 4730   predicate(n->get_int() == 0);
 4731   match(ConI);
 4732 
 4733   op_cost(0);
 4734   format %{ %}
 4735   interface(CONST_INTER);
 4736 %}
 4737 
 4738 // Constant for increment
 4739 operand immI_1()
 4740 %{
 4741   predicate(n->get_int() == 1);
 4742   match(ConI);
 4743 
 4744   op_cost(0);
 4745   format %{ %}
 4746   interface(CONST_INTER);
 4747 %}
 4748 
 4749 // Constant for decrement
 4750 operand immI_M1()
 4751 %{
 4752   predicate(n->get_int() == -1);
 4753   match(ConI);
 4754 
 4755   op_cost(0);
 4756   format %{ %}
 4757   interface(CONST_INTER);
 4758 %}
 4759 
 4760 operand immI_2()
 4761 %{
 4762   predicate(n->get_int() == 2);
 4763   match(ConI);
 4764 
 4765   op_cost(0);
 4766   format %{ %}
 4767   interface(CONST_INTER);
 4768 %}
 4769 
 4770 operand immI_4()
 4771 %{
 4772   predicate(n->get_int() == 4);
 4773   match(ConI);
 4774 
 4775   op_cost(0);
 4776   format %{ %}
 4777   interface(CONST_INTER);
 4778 %}
 4779 
 4780 operand immI_8()
 4781 %{
 4782   predicate(n->get_int() == 8);
 4783   match(ConI);
 4784 
 4785   op_cost(0);
 4786   format %{ %}
 4787   interface(CONST_INTER);
 4788 %}
 4789 
 4790 // Valid scale values for addressing modes
 4791 operand immI2()
 4792 %{
 4793   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4794   match(ConI);
 4795 
 4796   format %{ %}
 4797   interface(CONST_INTER);
 4798 %}
 4799 
 4800 operand immU7()
 4801 %{
 4802   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4803   match(ConI);
 4804 
 4805   op_cost(5);
 4806   format %{ %}
 4807   interface(CONST_INTER);
 4808 %}
 4809 
 4810 operand immI8()
 4811 %{
 4812   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4813   match(ConI);
 4814 
 4815   op_cost(5);
 4816   format %{ %}
 4817   interface(CONST_INTER);
 4818 %}
 4819 
 4820 operand immU8()
 4821 %{
 4822   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4823   match(ConI);
 4824 
 4825   op_cost(5);
 4826   format %{ %}
 4827   interface(CONST_INTER);
 4828 %}
 4829 
 4830 operand immI16()
 4831 %{
 4832   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4833   match(ConI);
 4834 
 4835   op_cost(10);
 4836   format %{ %}
 4837   interface(CONST_INTER);
 4838 %}
 4839 
 4840 // Int Immediate non-negative
 4841 operand immU31()
 4842 %{
 4843   predicate(n->get_int() >= 0);
 4844   match(ConI);
 4845 
 4846   op_cost(0);
 4847   format %{ %}
 4848   interface(CONST_INTER);
 4849 %}
 4850 
 4851 // Pointer Immediate
 4852 operand immP()
 4853 %{
 4854   match(ConP);
 4855 
 4856   op_cost(10);
 4857   format %{ %}
 4858   interface(CONST_INTER);
 4859 %}
 4860 
 4861 // Null Pointer Immediate
 4862 operand immP0()
 4863 %{
 4864   predicate(n->get_ptr() == 0);
 4865   match(ConP);
 4866 
 4867   op_cost(5);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 // Pointer Immediate
 4873 operand immN() %{
 4874   match(ConN);
 4875 
 4876   op_cost(10);
 4877   format %{ %}
 4878   interface(CONST_INTER);
 4879 %}
 4880 
 4881 operand immNKlass() %{
 4882   match(ConNKlass);
 4883 
 4884   op_cost(10);
 4885   format %{ %}
 4886   interface(CONST_INTER);
 4887 %}
 4888 
 4889 // Null Pointer Immediate
 4890 operand immN0() %{
 4891   predicate(n->get_narrowcon() == 0);
 4892   match(ConN);
 4893 
 4894   op_cost(5);
 4895   format %{ %}
 4896   interface(CONST_INTER);
 4897 %}
 4898 
 4899 operand immP31()
 4900 %{
 4901   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4902             && (n->get_ptr() >> 31) == 0);
 4903   match(ConP);
 4904 
 4905   op_cost(5);
 4906   format %{ %}
 4907   interface(CONST_INTER);
 4908 %}
 4909 
 4910 
 4911 // Long Immediate
 4912 operand immL()
 4913 %{
 4914   match(ConL);
 4915 
 4916   op_cost(20);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 // Long Immediate 8-bit
 4922 operand immL8()
 4923 %{
 4924   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4925   match(ConL);
 4926 
 4927   op_cost(5);
 4928   format %{ %}
 4929   interface(CONST_INTER);
 4930 %}
 4931 
 4932 // Long Immediate 32-bit unsigned
 4933 operand immUL32()
 4934 %{
 4935   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4936   match(ConL);
 4937 
 4938   op_cost(10);
 4939   format %{ %}
 4940   interface(CONST_INTER);
 4941 %}
 4942 
 4943 // Long Immediate 32-bit signed
 4944 operand immL32()
 4945 %{
 4946   predicate(n->get_long() == (int) (n->get_long()));
 4947   match(ConL);
 4948 
 4949   op_cost(15);
 4950   format %{ %}
 4951   interface(CONST_INTER);
 4952 %}
 4953 
 4954 operand immL_Pow2()
 4955 %{
 4956   predicate(is_power_of_2((julong)n->get_long()));
 4957   match(ConL);
 4958 
 4959   op_cost(15);
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 operand immL_NotPow2()
 4965 %{
 4966   predicate(is_power_of_2((julong)~n->get_long()));
 4967   match(ConL);
 4968 
 4969   op_cost(15);
 4970   format %{ %}
 4971   interface(CONST_INTER);
 4972 %}
 4973 
 4974 // Long Immediate zero
 4975 operand immL0()
 4976 %{
 4977   predicate(n->get_long() == 0L);
 4978   match(ConL);
 4979 
 4980   op_cost(10);
 4981   format %{ %}
 4982   interface(CONST_INTER);
 4983 %}
 4984 
 4985 // Constant for increment
 4986 operand immL1()
 4987 %{
 4988   predicate(n->get_long() == 1);
 4989   match(ConL);
 4990 
 4991   format %{ %}
 4992   interface(CONST_INTER);
 4993 %}
 4994 
 4995 // Constant for decrement
 4996 operand immL_M1()
 4997 %{
 4998   predicate(n->get_long() == -1);
 4999   match(ConL);
 5000 
 5001   format %{ %}
 5002   interface(CONST_INTER);
 5003 %}
 5004 
 5005 // Long Immediate: low 32-bit mask
 5006 operand immL_32bits()
 5007 %{
 5008   predicate(n->get_long() == 0xFFFFFFFFL);
 5009   match(ConL);
 5010   op_cost(20);
 5011 
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 // Int Immediate: 2^n-1, positive
 5017 operand immI_Pow2M1()
 5018 %{
 5019   predicate((n->get_int() > 0)
 5020             && is_power_of_2((juint)n->get_int() + 1));
 5021   match(ConI);
 5022 
 5023   op_cost(20);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Float Immediate zero
 5029 operand immF0()
 5030 %{
 5031   predicate(jint_cast(n->getf()) == 0);
 5032   match(ConF);
 5033 
 5034   op_cost(5);
 5035   format %{ %}
 5036   interface(CONST_INTER);
 5037 %}
 5038 
 5039 // Float Immediate
 5040 operand immF()
 5041 %{
 5042   match(ConF);
 5043 
 5044   op_cost(15);
 5045   format %{ %}
 5046   interface(CONST_INTER);
 5047 %}
 5048 
 5049 // Half Float Immediate
 5050 operand immH()
 5051 %{
 5052   match(ConH);
 5053 
 5054   op_cost(15);
 5055   format %{ %}
 5056   interface(CONST_INTER);
 5057 %}
 5058 
 5059 // Double Immediate zero
 5060 operand immD0()
 5061 %{
 5062   predicate(jlong_cast(n->getd()) == 0);
 5063   match(ConD);
 5064 
 5065   op_cost(5);
 5066   format %{ %}
 5067   interface(CONST_INTER);
 5068 %}
 5069 
 5070 // Double Immediate
 5071 operand immD()
 5072 %{
 5073   match(ConD);
 5074 
 5075   op_cost(15);
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Immediates for special shifts (sign extend)
 5081 
 5082 // Constants for increment
 5083 operand immI_16()
 5084 %{
 5085   predicate(n->get_int() == 16);
 5086   match(ConI);
 5087 
 5088   format %{ %}
 5089   interface(CONST_INTER);
 5090 %}
 5091 
 5092 operand immI_24()
 5093 %{
 5094   predicate(n->get_int() == 24);
 5095   match(ConI);
 5096 
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Constant for byte-wide masking
 5102 operand immI_255()
 5103 %{
 5104   predicate(n->get_int() == 255);
 5105   match(ConI);
 5106 
 5107   format %{ %}
 5108   interface(CONST_INTER);
 5109 %}
 5110 
 5111 // Constant for short-wide masking
 5112 operand immI_65535()
 5113 %{
 5114   predicate(n->get_int() == 65535);
 5115   match(ConI);
 5116 
 5117   format %{ %}
 5118   interface(CONST_INTER);
 5119 %}
 5120 
 5121 // Constant for byte-wide masking
 5122 operand immL_255()
 5123 %{
 5124   predicate(n->get_long() == 255);
 5125   match(ConL);
 5126 
 5127   format %{ %}
 5128   interface(CONST_INTER);
 5129 %}
 5130 
 5131 // Constant for short-wide masking
 5132 operand immL_65535()
 5133 %{
 5134   predicate(n->get_long() == 65535);
 5135   match(ConL);
 5136 
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 operand kReg()
 5142 %{
 5143   constraint(ALLOC_IN_RC(vectmask_reg));
 5144   match(RegVectMask);
 5145   format %{%}
 5146   interface(REG_INTER);
 5147 %}
 5148 
 5149 // Register Operands
 5150 // Integer Register
 5151 operand rRegI()
 5152 %{
 5153   constraint(ALLOC_IN_RC(int_reg));
 5154   match(RegI);
 5155 
 5156   match(rax_RegI);
 5157   match(rbx_RegI);
 5158   match(rcx_RegI);
 5159   match(rdx_RegI);
 5160   match(rdi_RegI);
 5161 
 5162   format %{ %}
 5163   interface(REG_INTER);
 5164 %}
 5165 
 5166 // Special Registers
 5167 operand rax_RegI()
 5168 %{
 5169   constraint(ALLOC_IN_RC(int_rax_reg));
 5170   match(RegI);
 5171   match(rRegI);
 5172 
 5173   format %{ "RAX" %}
 5174   interface(REG_INTER);
 5175 %}
 5176 
 5177 // Special Registers
 5178 operand rbx_RegI()
 5179 %{
 5180   constraint(ALLOC_IN_RC(int_rbx_reg));
 5181   match(RegI);
 5182   match(rRegI);
 5183 
 5184   format %{ "RBX" %}
 5185   interface(REG_INTER);
 5186 %}
 5187 
 5188 operand rcx_RegI()
 5189 %{
 5190   constraint(ALLOC_IN_RC(int_rcx_reg));
 5191   match(RegI);
 5192   match(rRegI);
 5193 
 5194   format %{ "RCX" %}
 5195   interface(REG_INTER);
 5196 %}
 5197 
 5198 operand rdx_RegI()
 5199 %{
 5200   constraint(ALLOC_IN_RC(int_rdx_reg));
 5201   match(RegI);
 5202   match(rRegI);
 5203 
 5204   format %{ "RDX" %}
 5205   interface(REG_INTER);
 5206 %}
 5207 
 5208 operand rdi_RegI()
 5209 %{
 5210   constraint(ALLOC_IN_RC(int_rdi_reg));
 5211   match(RegI);
 5212   match(rRegI);
 5213 
 5214   format %{ "RDI" %}
 5215   interface(REG_INTER);
 5216 %}
 5217 
 5218 operand no_rax_rdx_RegI()
 5219 %{
 5220   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5221   match(RegI);
 5222   match(rbx_RegI);
 5223   match(rcx_RegI);
 5224   match(rdi_RegI);
 5225 
 5226   format %{ %}
 5227   interface(REG_INTER);
 5228 %}
 5229 
 5230 operand no_rbp_r13_RegI()
 5231 %{
 5232   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5233   match(RegI);
 5234   match(rRegI);
 5235   match(rax_RegI);
 5236   match(rbx_RegI);
 5237   match(rcx_RegI);
 5238   match(rdx_RegI);
 5239   match(rdi_RegI);
 5240 
 5241   format %{ %}
 5242   interface(REG_INTER);
 5243 %}
 5244 
 5245 // Pointer Register
 5246 operand any_RegP()
 5247 %{
 5248   constraint(ALLOC_IN_RC(any_reg));
 5249   match(RegP);
 5250   match(rax_RegP);
 5251   match(rbx_RegP);
 5252   match(rdi_RegP);
 5253   match(rsi_RegP);
 5254   match(rbp_RegP);
 5255   match(r15_RegP);
 5256   match(rRegP);
 5257 
 5258   format %{ %}
 5259   interface(REG_INTER);
 5260 %}
 5261 
 5262 operand rRegP()
 5263 %{
 5264   constraint(ALLOC_IN_RC(ptr_reg));
 5265   match(RegP);
 5266   match(rax_RegP);
 5267   match(rbx_RegP);
 5268   match(rdi_RegP);
 5269   match(rsi_RegP);
 5270   match(rbp_RegP);  // See Q&A below about
 5271   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5272 
 5273   format %{ %}
 5274   interface(REG_INTER);
 5275 %}
 5276 
 5277 operand rRegN() %{
 5278   constraint(ALLOC_IN_RC(int_reg));
 5279   match(RegN);
 5280 
 5281   format %{ %}
 5282   interface(REG_INTER);
 5283 %}
 5284 
 5285 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5286 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5287 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5288 // The output of an instruction is controlled by the allocator, which respects
 5289 // register class masks, not match rules.  Unless an instruction mentions
 5290 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5291 // by the allocator as an input.
 5292 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5293 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5294 // result, RBP is not included in the output of the instruction either.
 5295 
 5296 // This operand is not allowed to use RBP even if
 5297 // RBP is not used to hold the frame pointer.
 5298 operand no_rbp_RegP()
 5299 %{
 5300   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5301   match(RegP);
 5302   match(rbx_RegP);
 5303   match(rsi_RegP);
 5304   match(rdi_RegP);
 5305 
 5306   format %{ %}
 5307   interface(REG_INTER);
 5308 %}
 5309 
 5310 // Special Registers
 5311 // Return a pointer value
 5312 operand rax_RegP()
 5313 %{
 5314   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5315   match(RegP);
 5316   match(rRegP);
 5317 
 5318   format %{ %}
 5319   interface(REG_INTER);
 5320 %}
 5321 
 5322 // Special Registers
 5323 // Return a compressed pointer value
 5324 operand rax_RegN()
 5325 %{
 5326   constraint(ALLOC_IN_RC(int_rax_reg));
 5327   match(RegN);
 5328   match(rRegN);
 5329 
 5330   format %{ %}
 5331   interface(REG_INTER);
 5332 %}
 5333 
 5334 // Used in AtomicAdd
 5335 operand rbx_RegP()
 5336 %{
 5337   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5338   match(RegP);
 5339   match(rRegP);
 5340 
 5341   format %{ %}
 5342   interface(REG_INTER);
 5343 %}
 5344 
 5345 operand rsi_RegP()
 5346 %{
 5347   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5348   match(RegP);
 5349   match(rRegP);
 5350 
 5351   format %{ %}
 5352   interface(REG_INTER);
 5353 %}
 5354 
 5355 operand rbp_RegP()
 5356 %{
 5357   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5358   match(RegP);
 5359   match(rRegP);
 5360 
 5361   format %{ %}
 5362   interface(REG_INTER);
 5363 %}
 5364 
 5365 // Used in rep stosq
 5366 operand rdi_RegP()
 5367 %{
 5368   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5369   match(RegP);
 5370   match(rRegP);
 5371 
 5372   format %{ %}
 5373   interface(REG_INTER);
 5374 %}
 5375 
 5376 operand r15_RegP()
 5377 %{
 5378   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5379   match(RegP);
 5380   match(rRegP);
 5381 
 5382   format %{ %}
 5383   interface(REG_INTER);
 5384 %}
 5385 
 5386 operand rRegL()
 5387 %{
 5388   constraint(ALLOC_IN_RC(long_reg));
 5389   match(RegL);
 5390   match(rax_RegL);
 5391   match(rdx_RegL);
 5392 
 5393   format %{ %}
 5394   interface(REG_INTER);
 5395 %}
 5396 
 5397 // Special Registers
 5398 operand no_rax_rdx_RegL()
 5399 %{
 5400   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5401   match(RegL);
 5402   match(rRegL);
 5403 
 5404   format %{ %}
 5405   interface(REG_INTER);
 5406 %}
 5407 
 5408 operand rax_RegL()
 5409 %{
 5410   constraint(ALLOC_IN_RC(long_rax_reg));
 5411   match(RegL);
 5412   match(rRegL);
 5413 
 5414   format %{ "RAX" %}
 5415   interface(REG_INTER);
 5416 %}
 5417 
 5418 operand rcx_RegL()
 5419 %{
 5420   constraint(ALLOC_IN_RC(long_rcx_reg));
 5421   match(RegL);
 5422   match(rRegL);
 5423 
 5424   format %{ %}
 5425   interface(REG_INTER);
 5426 %}
 5427 
 5428 operand rdx_RegL()
 5429 %{
 5430   constraint(ALLOC_IN_RC(long_rdx_reg));
 5431   match(RegL);
 5432   match(rRegL);
 5433 
 5434   format %{ %}
 5435   interface(REG_INTER);
 5436 %}
 5437 
 5438 operand r11_RegL()
 5439 %{
 5440   constraint(ALLOC_IN_RC(long_r11_reg));
 5441   match(RegL);
 5442   match(rRegL);
 5443 
 5444   format %{ %}
 5445   interface(REG_INTER);
 5446 %}
 5447 
 5448 operand no_rbp_r13_RegL()
 5449 %{
 5450   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5451   match(RegL);
 5452   match(rRegL);
 5453   match(rax_RegL);
 5454   match(rcx_RegL);
 5455   match(rdx_RegL);
 5456 
 5457   format %{ %}
 5458   interface(REG_INTER);
 5459 %}
 5460 
 5461 // Flags register, used as output of compare instructions
 5462 operand rFlagsReg()
 5463 %{
 5464   constraint(ALLOC_IN_RC(int_flags));
 5465   match(RegFlags);
 5466 
 5467   format %{ "RFLAGS" %}
 5468   interface(REG_INTER);
 5469 %}
 5470 
 5471 // Flags register, used as output of FLOATING POINT compare instructions
 5472 operand rFlagsRegU()
 5473 %{
 5474   constraint(ALLOC_IN_RC(int_flags));
 5475   match(RegFlags);
 5476 
 5477   format %{ "RFLAGS_U" %}
 5478   interface(REG_INTER);
 5479 %}
 5480 
 5481 operand rFlagsRegUCF() %{
 5482   constraint(ALLOC_IN_RC(int_flags));
 5483   match(RegFlags);
 5484   predicate(false);
 5485 
 5486   format %{ "RFLAGS_U_CF" %}
 5487   interface(REG_INTER);
 5488 %}
 5489 
 5490 // Float register operands
 5491 operand regF() %{
 5492    constraint(ALLOC_IN_RC(float_reg));
 5493    match(RegF);
 5494 
 5495    format %{ %}
 5496    interface(REG_INTER);
 5497 %}
 5498 
 5499 // Float register operands
 5500 operand legRegF() %{
 5501    constraint(ALLOC_IN_RC(float_reg_legacy));
 5502    match(RegF);
 5503 
 5504    format %{ %}
 5505    interface(REG_INTER);
 5506 %}
 5507 
 5508 // Float register operands
 5509 operand vlRegF() %{
 5510    constraint(ALLOC_IN_RC(float_reg_vl));
 5511    match(RegF);
 5512 
 5513    format %{ %}
 5514    interface(REG_INTER);
 5515 %}
 5516 
 5517 // Double register operands
 5518 operand regD() %{
 5519    constraint(ALLOC_IN_RC(double_reg));
 5520    match(RegD);
 5521 
 5522    format %{ %}
 5523    interface(REG_INTER);
 5524 %}
 5525 
 5526 // Double register operands
 5527 operand legRegD() %{
 5528    constraint(ALLOC_IN_RC(double_reg_legacy));
 5529    match(RegD);
 5530 
 5531    format %{ %}
 5532    interface(REG_INTER);
 5533 %}
 5534 
 5535 // Double register operands
 5536 operand vlRegD() %{
 5537    constraint(ALLOC_IN_RC(double_reg_vl));
 5538    match(RegD);
 5539 
 5540    format %{ %}
 5541    interface(REG_INTER);
 5542 %}
 5543 
 5544 //----------Memory Operands----------------------------------------------------
 5545 // Direct Memory Operand
 5546 // operand direct(immP addr)
 5547 // %{
 5548 //   match(addr);
 5549 
 5550 //   format %{ "[$addr]" %}
 5551 //   interface(MEMORY_INTER) %{
 5552 //     base(0xFFFFFFFF);
 5553 //     index(0x4);
 5554 //     scale(0x0);
 5555 //     disp($addr);
 5556 //   %}
 5557 // %}
 5558 
 5559 // Indirect Memory Operand
 5560 operand indirect(any_RegP reg)
 5561 %{
 5562   constraint(ALLOC_IN_RC(ptr_reg));
 5563   match(reg);
 5564 
 5565   format %{ "[$reg]" %}
 5566   interface(MEMORY_INTER) %{
 5567     base($reg);
 5568     index(0x4);
 5569     scale(0x0);
 5570     disp(0x0);
 5571   %}
 5572 %}
 5573 
 5574 // Indirect Memory Plus Short Offset Operand
 5575 operand indOffset8(any_RegP reg, immL8 off)
 5576 %{
 5577   constraint(ALLOC_IN_RC(ptr_reg));
 5578   match(AddP reg off);
 5579 
 5580   format %{ "[$reg + $off (8-bit)]" %}
 5581   interface(MEMORY_INTER) %{
 5582     base($reg);
 5583     index(0x4);
 5584     scale(0x0);
 5585     disp($off);
 5586   %}
 5587 %}
 5588 
 5589 // Indirect Memory Plus Long Offset Operand
 5590 operand indOffset32(any_RegP reg, immL32 off)
 5591 %{
 5592   constraint(ALLOC_IN_RC(ptr_reg));
 5593   match(AddP reg off);
 5594 
 5595   format %{ "[$reg + $off (32-bit)]" %}
 5596   interface(MEMORY_INTER) %{
 5597     base($reg);
 5598     index(0x4);
 5599     scale(0x0);
 5600     disp($off);
 5601   %}
 5602 %}
 5603 
 5604 // Indirect Memory Plus Index Register Plus Offset Operand
 5605 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5606 %{
 5607   constraint(ALLOC_IN_RC(ptr_reg));
 5608   match(AddP (AddP reg lreg) off);
 5609 
 5610   op_cost(10);
 5611   format %{"[$reg + $off + $lreg]" %}
 5612   interface(MEMORY_INTER) %{
 5613     base($reg);
 5614     index($lreg);
 5615     scale(0x0);
 5616     disp($off);
 5617   %}
 5618 %}
 5619 
 5620 // Indirect Memory Plus Index Register Plus Offset Operand
 5621 operand indIndex(any_RegP reg, rRegL lreg)
 5622 %{
 5623   constraint(ALLOC_IN_RC(ptr_reg));
 5624   match(AddP reg lreg);
 5625 
 5626   op_cost(10);
 5627   format %{"[$reg + $lreg]" %}
 5628   interface(MEMORY_INTER) %{
 5629     base($reg);
 5630     index($lreg);
 5631     scale(0x0);
 5632     disp(0x0);
 5633   %}
 5634 %}
 5635 
 5636 // Indirect Memory Times Scale Plus Index Register
 5637 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5638 %{
 5639   constraint(ALLOC_IN_RC(ptr_reg));
 5640   match(AddP reg (LShiftL lreg scale));
 5641 
 5642   op_cost(10);
 5643   format %{"[$reg + $lreg << $scale]" %}
 5644   interface(MEMORY_INTER) %{
 5645     base($reg);
 5646     index($lreg);
 5647     scale($scale);
 5648     disp(0x0);
 5649   %}
 5650 %}
 5651 
 5652 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5653 %{
 5654   constraint(ALLOC_IN_RC(ptr_reg));
 5655   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5656   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5657 
 5658   op_cost(10);
 5659   format %{"[$reg + pos $idx << $scale]" %}
 5660   interface(MEMORY_INTER) %{
 5661     base($reg);
 5662     index($idx);
 5663     scale($scale);
 5664     disp(0x0);
 5665   %}
 5666 %}
 5667 
 5668 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5669 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5673 
 5674   op_cost(10);
 5675   format %{"[$reg + $off + $lreg << $scale]" %}
 5676   interface(MEMORY_INTER) %{
 5677     base($reg);
 5678     index($lreg);
 5679     scale($scale);
 5680     disp($off);
 5681   %}
 5682 %}
 5683 
 5684 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5685 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5686 %{
 5687   constraint(ALLOC_IN_RC(ptr_reg));
 5688   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5689   match(AddP (AddP reg (ConvI2L idx)) off);
 5690 
 5691   op_cost(10);
 5692   format %{"[$reg + $off + $idx]" %}
 5693   interface(MEMORY_INTER) %{
 5694     base($reg);
 5695     index($idx);
 5696     scale(0x0);
 5697     disp($off);
 5698   %}
 5699 %}
 5700 
 5701 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5702 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5703 %{
 5704   constraint(ALLOC_IN_RC(ptr_reg));
 5705   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5706   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5707 
 5708   op_cost(10);
 5709   format %{"[$reg + $off + $idx << $scale]" %}
 5710   interface(MEMORY_INTER) %{
 5711     base($reg);
 5712     index($idx);
 5713     scale($scale);
 5714     disp($off);
 5715   %}
 5716 %}
 5717 
 5718 // Indirect Narrow Oop Plus Offset Operand
 5719 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5720 // we can't free r12 even with CompressedOops::base() == nullptr.
 5721 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5722   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5723   constraint(ALLOC_IN_RC(ptr_reg));
 5724   match(AddP (DecodeN reg) off);
 5725 
 5726   op_cost(10);
 5727   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5728   interface(MEMORY_INTER) %{
 5729     base(0xc); // R12
 5730     index($reg);
 5731     scale(0x3);
 5732     disp($off);
 5733   %}
 5734 %}
 5735 
 5736 // Indirect Memory Operand
 5737 operand indirectNarrow(rRegN reg)
 5738 %{
 5739   predicate(CompressedOops::shift() == 0);
 5740   constraint(ALLOC_IN_RC(ptr_reg));
 5741   match(DecodeN reg);
 5742 
 5743   format %{ "[$reg]" %}
 5744   interface(MEMORY_INTER) %{
 5745     base($reg);
 5746     index(0x4);
 5747     scale(0x0);
 5748     disp(0x0);
 5749   %}
 5750 %}
 5751 
 5752 // Indirect Memory Plus Short Offset Operand
 5753 operand indOffset8Narrow(rRegN reg, immL8 off)
 5754 %{
 5755   predicate(CompressedOops::shift() == 0);
 5756   constraint(ALLOC_IN_RC(ptr_reg));
 5757   match(AddP (DecodeN reg) off);
 5758 
 5759   format %{ "[$reg + $off (8-bit)]" %}
 5760   interface(MEMORY_INTER) %{
 5761     base($reg);
 5762     index(0x4);
 5763     scale(0x0);
 5764     disp($off);
 5765   %}
 5766 %}
 5767 
 5768 // Indirect Memory Plus Long Offset Operand
 5769 operand indOffset32Narrow(rRegN reg, immL32 off)
 5770 %{
 5771   predicate(CompressedOops::shift() == 0);
 5772   constraint(ALLOC_IN_RC(ptr_reg));
 5773   match(AddP (DecodeN reg) off);
 5774 
 5775   format %{ "[$reg + $off (32-bit)]" %}
 5776   interface(MEMORY_INTER) %{
 5777     base($reg);
 5778     index(0x4);
 5779     scale(0x0);
 5780     disp($off);
 5781   %}
 5782 %}
 5783 
 5784 // Indirect Memory Plus Index Register Plus Offset Operand
 5785 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5786 %{
 5787   predicate(CompressedOops::shift() == 0);
 5788   constraint(ALLOC_IN_RC(ptr_reg));
 5789   match(AddP (AddP (DecodeN reg) lreg) off);
 5790 
 5791   op_cost(10);
 5792   format %{"[$reg + $off + $lreg]" %}
 5793   interface(MEMORY_INTER) %{
 5794     base($reg);
 5795     index($lreg);
 5796     scale(0x0);
 5797     disp($off);
 5798   %}
 5799 %}
 5800 
 5801 // Indirect Memory Plus Index Register Plus Offset Operand
 5802 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5803 %{
 5804   predicate(CompressedOops::shift() == 0);
 5805   constraint(ALLOC_IN_RC(ptr_reg));
 5806   match(AddP (DecodeN reg) lreg);
 5807 
 5808   op_cost(10);
 5809   format %{"[$reg + $lreg]" %}
 5810   interface(MEMORY_INTER) %{
 5811     base($reg);
 5812     index($lreg);
 5813     scale(0x0);
 5814     disp(0x0);
 5815   %}
 5816 %}
 5817 
 5818 // Indirect Memory Times Scale Plus Index Register
 5819 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5820 %{
 5821   predicate(CompressedOops::shift() == 0);
 5822   constraint(ALLOC_IN_RC(ptr_reg));
 5823   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5824 
 5825   op_cost(10);
 5826   format %{"[$reg + $lreg << $scale]" %}
 5827   interface(MEMORY_INTER) %{
 5828     base($reg);
 5829     index($lreg);
 5830     scale($scale);
 5831     disp(0x0);
 5832   %}
 5833 %}
 5834 
 5835 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5836 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5837 %{
 5838   predicate(CompressedOops::shift() == 0);
 5839   constraint(ALLOC_IN_RC(ptr_reg));
 5840   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5841 
 5842   op_cost(10);
 5843   format %{"[$reg + $off + $lreg << $scale]" %}
 5844   interface(MEMORY_INTER) %{
 5845     base($reg);
 5846     index($lreg);
 5847     scale($scale);
 5848     disp($off);
 5849   %}
 5850 %}
 5851 
 5852 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5853 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5854 %{
 5855   constraint(ALLOC_IN_RC(ptr_reg));
 5856   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5857   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5858 
 5859   op_cost(10);
 5860   format %{"[$reg + $off + $idx]" %}
 5861   interface(MEMORY_INTER) %{
 5862     base($reg);
 5863     index($idx);
 5864     scale(0x0);
 5865     disp($off);
 5866   %}
 5867 %}
 5868 
 5869 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5870 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5871 %{
 5872   constraint(ALLOC_IN_RC(ptr_reg));
 5873   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5874   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5875 
 5876   op_cost(10);
 5877   format %{"[$reg + $off + $idx << $scale]" %}
 5878   interface(MEMORY_INTER) %{
 5879     base($reg);
 5880     index($idx);
 5881     scale($scale);
 5882     disp($off);
 5883   %}
 5884 %}
 5885 
 5886 //----------Special Memory Operands--------------------------------------------
 5887 // Stack Slot Operand - This operand is used for loading and storing temporary
 5888 //                      values on the stack where a match requires a value to
 5889 //                      flow through memory.
 5890 operand stackSlotP(sRegP reg)
 5891 %{
 5892   constraint(ALLOC_IN_RC(stack_slots));
 5893   // No match rule because this operand is only generated in matching
 5894 
 5895   format %{ "[$reg]" %}
 5896   interface(MEMORY_INTER) %{
 5897     base(0x4);   // RSP
 5898     index(0x4);  // No Index
 5899     scale(0x0);  // No Scale
 5900     disp($reg);  // Stack Offset
 5901   %}
 5902 %}
 5903 
 5904 operand stackSlotI(sRegI reg)
 5905 %{
 5906   constraint(ALLOC_IN_RC(stack_slots));
 5907   // No match rule because this operand is only generated in matching
 5908 
 5909   format %{ "[$reg]" %}
 5910   interface(MEMORY_INTER) %{
 5911     base(0x4);   // RSP
 5912     index(0x4);  // No Index
 5913     scale(0x0);  // No Scale
 5914     disp($reg);  // Stack Offset
 5915   %}
 5916 %}
 5917 
 5918 operand stackSlotF(sRegF reg)
 5919 %{
 5920   constraint(ALLOC_IN_RC(stack_slots));
 5921   // No match rule because this operand is only generated in matching
 5922 
 5923   format %{ "[$reg]" %}
 5924   interface(MEMORY_INTER) %{
 5925     base(0x4);   // RSP
 5926     index(0x4);  // No Index
 5927     scale(0x0);  // No Scale
 5928     disp($reg);  // Stack Offset
 5929   %}
 5930 %}
 5931 
 5932 operand stackSlotD(sRegD reg)
 5933 %{
 5934   constraint(ALLOC_IN_RC(stack_slots));
 5935   // No match rule because this operand is only generated in matching
 5936 
 5937   format %{ "[$reg]" %}
 5938   interface(MEMORY_INTER) %{
 5939     base(0x4);   // RSP
 5940     index(0x4);  // No Index
 5941     scale(0x0);  // No Scale
 5942     disp($reg);  // Stack Offset
 5943   %}
 5944 %}
 5945 operand stackSlotL(sRegL reg)
 5946 %{
 5947   constraint(ALLOC_IN_RC(stack_slots));
 5948   // No match rule because this operand is only generated in matching
 5949 
 5950   format %{ "[$reg]" %}
 5951   interface(MEMORY_INTER) %{
 5952     base(0x4);   // RSP
 5953     index(0x4);  // No Index
 5954     scale(0x0);  // No Scale
 5955     disp($reg);  // Stack Offset
 5956   %}
 5957 %}
 5958 
 5959 //----------Conditional Branch Operands----------------------------------------
 5960 // Comparison Op  - This is the operation of the comparison, and is limited to
 5961 //                  the following set of codes:
 5962 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 5963 //
 5964 // Other attributes of the comparison, such as unsignedness, are specified
 5965 // by the comparison instruction that sets a condition code flags register.
 5966 // That result is represented by a flags operand whose subtype is appropriate
 5967 // to the unsignedness (etc.) of the comparison.
 5968 //
 5969 // Later, the instruction which matches both the Comparison Op (a Bool) and
 5970 // the flags (produced by the Cmp) specifies the coding of the comparison op
 5971 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 5972 
 5973 // Comparison Code
 5974 operand cmpOp()
 5975 %{
 5976   match(Bool);
 5977 
 5978   format %{ "" %}
 5979   interface(COND_INTER) %{
 5980     equal(0x4, "e");
 5981     not_equal(0x5, "ne");
 5982     less(0xC, "l");
 5983     greater_equal(0xD, "ge");
 5984     less_equal(0xE, "le");
 5985     greater(0xF, "g");
 5986     overflow(0x0, "o");
 5987     no_overflow(0x1, "no");
 5988   %}
 5989 %}
 5990 
 5991 // Comparison Code, unsigned compare.  Used by FP also, with
 5992 // C2 (unordered) turned into GT or LT already.  The other bits
 5993 // C0 and C3 are turned into Carry & Zero flags.
 5994 operand cmpOpU()
 5995 %{
 5996   match(Bool);
 5997 
 5998   format %{ "" %}
 5999   interface(COND_INTER) %{
 6000     equal(0x4, "e");
 6001     not_equal(0x5, "ne");
 6002     less(0x2, "b");
 6003     greater_equal(0x3, "ae");
 6004     less_equal(0x6, "be");
 6005     greater(0x7, "a");
 6006     overflow(0x0, "o");
 6007     no_overflow(0x1, "no");
 6008   %}
 6009 %}
 6010 
 6011 
 6012 // Floating comparisons that don't require any fixup for the unordered case,
 6013 // If both inputs of the comparison are the same, ZF is always set so we
 6014 // don't need to use cmpOpUCF2 for eq/ne
 6015 operand cmpOpUCF() %{
 6016   match(Bool);
 6017   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6018             n->as_Bool()->_test._test == BoolTest::ge ||
 6019             n->as_Bool()->_test._test == BoolTest::le ||
 6020             n->as_Bool()->_test._test == BoolTest::gt ||
 6021             n->in(1)->in(1) == n->in(1)->in(2));
 6022   format %{ "" %}
 6023   interface(COND_INTER) %{
 6024     equal(0xb, "np");
 6025     not_equal(0xa, "p");
 6026     less(0x2, "b");
 6027     greater_equal(0x3, "ae");
 6028     less_equal(0x6, "be");
 6029     greater(0x7, "a");
 6030     overflow(0x0, "o");
 6031     no_overflow(0x1, "no");
 6032   %}
 6033 %}
 6034 
 6035 
 6036 // Floating comparisons that can be fixed up with extra conditional jumps
 6037 operand cmpOpUCF2() %{
 6038   match(Bool);
 6039   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6040              n->as_Bool()->_test._test == BoolTest::eq) &&
 6041             n->in(1)->in(1) != n->in(1)->in(2));
 6042   format %{ "" %}
 6043   interface(COND_INTER) %{
 6044     equal(0x4, "e");
 6045     not_equal(0x5, "ne");
 6046     less(0x2, "b");
 6047     greater_equal(0x3, "ae");
 6048     less_equal(0x6, "be");
 6049     greater(0x7, "a");
 6050     overflow(0x0, "o");
 6051     no_overflow(0x1, "no");
 6052   %}
 6053 %}
 6054 
 6055 // Operands for bound floating pointer register arguments
 6056 operand rxmm0() %{
 6057   constraint(ALLOC_IN_RC(xmm0_reg));
 6058   match(VecX);
 6059   format%{%}
 6060   interface(REG_INTER);
 6061 %}
 6062 
 6063 // Vectors
 6064 
 6065 // Dummy generic vector class. Should be used for all vector operands.
 6066 // Replaced with vec[SDXYZ] during post-selection pass.
 6067 operand vec() %{
 6068   constraint(ALLOC_IN_RC(dynamic));
 6069   match(VecX);
 6070   match(VecY);
 6071   match(VecZ);
 6072   match(VecS);
 6073   match(VecD);
 6074 
 6075   format %{ %}
 6076   interface(REG_INTER);
 6077 %}
 6078 
 6079 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6080 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6081 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6082 // runtime code generation via reg_class_dynamic.
 6083 operand legVec() %{
 6084   constraint(ALLOC_IN_RC(dynamic));
 6085   match(VecX);
 6086   match(VecY);
 6087   match(VecZ);
 6088   match(VecS);
 6089   match(VecD);
 6090 
 6091   format %{ %}
 6092   interface(REG_INTER);
 6093 %}
 6094 
 6095 // Replaces vec during post-selection cleanup. See above.
 6096 operand vecS() %{
 6097   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6098   match(VecS);
 6099 
 6100   format %{ %}
 6101   interface(REG_INTER);
 6102 %}
 6103 
 6104 // Replaces legVec during post-selection cleanup. See above.
 6105 operand legVecS() %{
 6106   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6107   match(VecS);
 6108 
 6109   format %{ %}
 6110   interface(REG_INTER);
 6111 %}
 6112 
 6113 // Replaces vec during post-selection cleanup. See above.
 6114 operand vecD() %{
 6115   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6116   match(VecD);
 6117 
 6118   format %{ %}
 6119   interface(REG_INTER);
 6120 %}
 6121 
 6122 // Replaces legVec during post-selection cleanup. See above.
 6123 operand legVecD() %{
 6124   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6125   match(VecD);
 6126 
 6127   format %{ %}
 6128   interface(REG_INTER);
 6129 %}
 6130 
 6131 // Replaces vec during post-selection cleanup. See above.
 6132 operand vecX() %{
 6133   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6134   match(VecX);
 6135 
 6136   format %{ %}
 6137   interface(REG_INTER);
 6138 %}
 6139 
 6140 // Replaces legVec during post-selection cleanup. See above.
 6141 operand legVecX() %{
 6142   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6143   match(VecX);
 6144 
 6145   format %{ %}
 6146   interface(REG_INTER);
 6147 %}
 6148 
 6149 // Replaces vec during post-selection cleanup. See above.
 6150 operand vecY() %{
 6151   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6152   match(VecY);
 6153 
 6154   format %{ %}
 6155   interface(REG_INTER);
 6156 %}
 6157 
 6158 // Replaces legVec during post-selection cleanup. See above.
 6159 operand legVecY() %{
 6160   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6161   match(VecY);
 6162 
 6163   format %{ %}
 6164   interface(REG_INTER);
 6165 %}
 6166 
 6167 // Replaces vec during post-selection cleanup. See above.
 6168 operand vecZ() %{
 6169   constraint(ALLOC_IN_RC(vectorz_reg));
 6170   match(VecZ);
 6171 
 6172   format %{ %}
 6173   interface(REG_INTER);
 6174 %}
 6175 
 6176 // Replaces legVec during post-selection cleanup. See above.
 6177 operand legVecZ() %{
 6178   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6179   match(VecZ);
 6180 
 6181   format %{ %}
 6182   interface(REG_INTER);
 6183 %}
 6184 
 6185 //----------OPERAND CLASSES----------------------------------------------------
 6186 // Operand Classes are groups of operands that are used as to simplify
 6187 // instruction definitions by not requiring the AD writer to specify separate
 6188 // instructions for every form of operand when the instruction accepts
 6189 // multiple operand types with the same basic encoding and format.  The classic
 6190 // case of this is memory operands.
 6191 
 6192 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6193                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6194                indCompressedOopOffset,
 6195                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6196                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6197                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6198 
 6199 //----------PIPELINE-----------------------------------------------------------
 6200 // Rules which define the behavior of the target architectures pipeline.
 6201 pipeline %{
 6202 
 6203 //----------ATTRIBUTES---------------------------------------------------------
 6204 attributes %{
 6205   variable_size_instructions;        // Fixed size instructions
 6206   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6207   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6208   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6209   instruction_fetch_units = 1;       // of 16 bytes
 6210 %}
 6211 
 6212 //----------RESOURCES----------------------------------------------------------
 6213 // Resources are the functional units available to the machine
 6214 
 6215 // Generic P2/P3 pipeline
 6216 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6217 // 3 instructions decoded per cycle.
 6218 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6219 // 3 ALU op, only ALU0 handles mul instructions.
 6220 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6221            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6222            BR, FPU,
 6223            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6224 
 6225 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6226 // Pipeline Description specifies the stages in the machine's pipeline
 6227 
 6228 // Generic P2/P3 pipeline
 6229 pipe_desc(S0, S1, S2, S3, S4, S5);
 6230 
 6231 //----------PIPELINE CLASSES---------------------------------------------------
 6232 // Pipeline Classes describe the stages in which input and output are
 6233 // referenced by the hardware pipeline.
 6234 
 6235 // Naming convention: ialu or fpu
 6236 // Then: _reg
 6237 // Then: _reg if there is a 2nd register
 6238 // Then: _long if it's a pair of instructions implementing a long
 6239 // Then: _fat if it requires the big decoder
 6240 //   Or: _mem if it requires the big decoder and a memory unit.
 6241 
 6242 // Integer ALU reg operation
 6243 pipe_class ialu_reg(rRegI dst)
 6244 %{
 6245     single_instruction;
 6246     dst    : S4(write);
 6247     dst    : S3(read);
 6248     DECODE : S0;        // any decoder
 6249     ALU    : S3;        // any alu
 6250 %}
 6251 
 6252 // Long ALU reg operation
 6253 pipe_class ialu_reg_long(rRegL dst)
 6254 %{
 6255     instruction_count(2);
 6256     dst    : S4(write);
 6257     dst    : S3(read);
 6258     DECODE : S0(2);     // any 2 decoders
 6259     ALU    : S3(2);     // both alus
 6260 %}
 6261 
 6262 // Integer ALU reg operation using big decoder
 6263 pipe_class ialu_reg_fat(rRegI dst)
 6264 %{
 6265     single_instruction;
 6266     dst    : S4(write);
 6267     dst    : S3(read);
 6268     D0     : S0;        // big decoder only
 6269     ALU    : S3;        // any alu
 6270 %}
 6271 
 6272 // Integer ALU reg-reg operation
 6273 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6274 %{
 6275     single_instruction;
 6276     dst    : S4(write);
 6277     src    : S3(read);
 6278     DECODE : S0;        // any decoder
 6279     ALU    : S3;        // any alu
 6280 %}
 6281 
 6282 // Integer ALU reg-reg operation
 6283 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6284 %{
 6285     single_instruction;
 6286     dst    : S4(write);
 6287     src    : S3(read);
 6288     D0     : S0;        // big decoder only
 6289     ALU    : S3;        // any alu
 6290 %}
 6291 
 6292 // Integer ALU reg-mem operation
 6293 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6294 %{
 6295     single_instruction;
 6296     dst    : S5(write);
 6297     mem    : S3(read);
 6298     D0     : S0;        // big decoder only
 6299     ALU    : S4;        // any alu
 6300     MEM    : S3;        // any mem
 6301 %}
 6302 
 6303 // Integer mem operation (prefetch)
 6304 pipe_class ialu_mem(memory mem)
 6305 %{
 6306     single_instruction;
 6307     mem    : S3(read);
 6308     D0     : S0;        // big decoder only
 6309     MEM    : S3;        // any mem
 6310 %}
 6311 
 6312 // Integer Store to Memory
 6313 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6314 %{
 6315     single_instruction;
 6316     mem    : S3(read);
 6317     src    : S5(read);
 6318     D0     : S0;        // big decoder only
 6319     ALU    : S4;        // any alu
 6320     MEM    : S3;
 6321 %}
 6322 
 6323 // // Long Store to Memory
 6324 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6325 // %{
 6326 //     instruction_count(2);
 6327 //     mem    : S3(read);
 6328 //     src    : S5(read);
 6329 //     D0     : S0(2);          // big decoder only; twice
 6330 //     ALU    : S4(2);     // any 2 alus
 6331 //     MEM    : S3(2);  // Both mems
 6332 // %}
 6333 
 6334 // Integer Store to Memory
 6335 pipe_class ialu_mem_imm(memory mem)
 6336 %{
 6337     single_instruction;
 6338     mem    : S3(read);
 6339     D0     : S0;        // big decoder only
 6340     ALU    : S4;        // any alu
 6341     MEM    : S3;
 6342 %}
 6343 
 6344 // Integer ALU0 reg-reg operation
 6345 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6346 %{
 6347     single_instruction;
 6348     dst    : S4(write);
 6349     src    : S3(read);
 6350     D0     : S0;        // Big decoder only
 6351     ALU0   : S3;        // only alu0
 6352 %}
 6353 
 6354 // Integer ALU0 reg-mem operation
 6355 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6356 %{
 6357     single_instruction;
 6358     dst    : S5(write);
 6359     mem    : S3(read);
 6360     D0     : S0;        // big decoder only
 6361     ALU0   : S4;        // ALU0 only
 6362     MEM    : S3;        // any mem
 6363 %}
 6364 
 6365 // Integer ALU reg-reg operation
 6366 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6367 %{
 6368     single_instruction;
 6369     cr     : S4(write);
 6370     src1   : S3(read);
 6371     src2   : S3(read);
 6372     DECODE : S0;        // any decoder
 6373     ALU    : S3;        // any alu
 6374 %}
 6375 
 6376 // Integer ALU reg-imm operation
 6377 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6378 %{
 6379     single_instruction;
 6380     cr     : S4(write);
 6381     src1   : S3(read);
 6382     DECODE : S0;        // any decoder
 6383     ALU    : S3;        // any alu
 6384 %}
 6385 
 6386 // Integer ALU reg-mem operation
 6387 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6388 %{
 6389     single_instruction;
 6390     cr     : S4(write);
 6391     src1   : S3(read);
 6392     src2   : S3(read);
 6393     D0     : S0;        // big decoder only
 6394     ALU    : S4;        // any alu
 6395     MEM    : S3;
 6396 %}
 6397 
 6398 // Conditional move reg-reg
 6399 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6400 %{
 6401     instruction_count(4);
 6402     y      : S4(read);
 6403     q      : S3(read);
 6404     p      : S3(read);
 6405     DECODE : S0(4);     // any decoder
 6406 %}
 6407 
 6408 // Conditional move reg-reg
 6409 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6410 %{
 6411     single_instruction;
 6412     dst    : S4(write);
 6413     src    : S3(read);
 6414     cr     : S3(read);
 6415     DECODE : S0;        // any decoder
 6416 %}
 6417 
 6418 // Conditional move reg-mem
 6419 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6420 %{
 6421     single_instruction;
 6422     dst    : S4(write);
 6423     src    : S3(read);
 6424     cr     : S3(read);
 6425     DECODE : S0;        // any decoder
 6426     MEM    : S3;
 6427 %}
 6428 
 6429 // Conditional move reg-reg long
 6430 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6431 %{
 6432     single_instruction;
 6433     dst    : S4(write);
 6434     src    : S3(read);
 6435     cr     : S3(read);
 6436     DECODE : S0(2);     // any 2 decoders
 6437 %}
 6438 
 6439 // Float reg-reg operation
 6440 pipe_class fpu_reg(regD dst)
 6441 %{
 6442     instruction_count(2);
 6443     dst    : S3(read);
 6444     DECODE : S0(2);     // any 2 decoders
 6445     FPU    : S3;
 6446 %}
 6447 
 6448 // Float reg-reg operation
 6449 pipe_class fpu_reg_reg(regD dst, regD src)
 6450 %{
 6451     instruction_count(2);
 6452     dst    : S4(write);
 6453     src    : S3(read);
 6454     DECODE : S0(2);     // any 2 decoders
 6455     FPU    : S3;
 6456 %}
 6457 
 6458 // Float reg-reg operation
 6459 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6460 %{
 6461     instruction_count(3);
 6462     dst    : S4(write);
 6463     src1   : S3(read);
 6464     src2   : S3(read);
 6465     DECODE : S0(3);     // any 3 decoders
 6466     FPU    : S3(2);
 6467 %}
 6468 
 6469 // Float reg-reg operation
 6470 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6471 %{
 6472     instruction_count(4);
 6473     dst    : S4(write);
 6474     src1   : S3(read);
 6475     src2   : S3(read);
 6476     src3   : S3(read);
 6477     DECODE : S0(4);     // any 3 decoders
 6478     FPU    : S3(2);
 6479 %}
 6480 
 6481 // Float reg-reg operation
 6482 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6483 %{
 6484     instruction_count(4);
 6485     dst    : S4(write);
 6486     src1   : S3(read);
 6487     src2   : S3(read);
 6488     src3   : S3(read);
 6489     DECODE : S1(3);     // any 3 decoders
 6490     D0     : S0;        // Big decoder only
 6491     FPU    : S3(2);
 6492     MEM    : S3;
 6493 %}
 6494 
 6495 // Float reg-mem operation
 6496 pipe_class fpu_reg_mem(regD dst, memory mem)
 6497 %{
 6498     instruction_count(2);
 6499     dst    : S5(write);
 6500     mem    : S3(read);
 6501     D0     : S0;        // big decoder only
 6502     DECODE : S1;        // any decoder for FPU POP
 6503     FPU    : S4;
 6504     MEM    : S3;        // any mem
 6505 %}
 6506 
 6507 // Float reg-mem operation
 6508 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6509 %{
 6510     instruction_count(3);
 6511     dst    : S5(write);
 6512     src1   : S3(read);
 6513     mem    : S3(read);
 6514     D0     : S0;        // big decoder only
 6515     DECODE : S1(2);     // any decoder for FPU POP
 6516     FPU    : S4;
 6517     MEM    : S3;        // any mem
 6518 %}
 6519 
 6520 // Float mem-reg operation
 6521 pipe_class fpu_mem_reg(memory mem, regD src)
 6522 %{
 6523     instruction_count(2);
 6524     src    : S5(read);
 6525     mem    : S3(read);
 6526     DECODE : S0;        // any decoder for FPU PUSH
 6527     D0     : S1;        // big decoder only
 6528     FPU    : S4;
 6529     MEM    : S3;        // any mem
 6530 %}
 6531 
 6532 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6533 %{
 6534     instruction_count(3);
 6535     src1   : S3(read);
 6536     src2   : S3(read);
 6537     mem    : S3(read);
 6538     DECODE : S0(2);     // any decoder for FPU PUSH
 6539     D0     : S1;        // big decoder only
 6540     FPU    : S4;
 6541     MEM    : S3;        // any mem
 6542 %}
 6543 
 6544 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6545 %{
 6546     instruction_count(3);
 6547     src1   : S3(read);
 6548     src2   : S3(read);
 6549     mem    : S4(read);
 6550     DECODE : S0;        // any decoder for FPU PUSH
 6551     D0     : S0(2);     // big decoder only
 6552     FPU    : S4;
 6553     MEM    : S3(2);     // any mem
 6554 %}
 6555 
 6556 pipe_class fpu_mem_mem(memory dst, memory src1)
 6557 %{
 6558     instruction_count(2);
 6559     src1   : S3(read);
 6560     dst    : S4(read);
 6561     D0     : S0(2);     // big decoder only
 6562     MEM    : S3(2);     // any mem
 6563 %}
 6564 
 6565 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6566 %{
 6567     instruction_count(3);
 6568     src1   : S3(read);
 6569     src2   : S3(read);
 6570     dst    : S4(read);
 6571     D0     : S0(3);     // big decoder only
 6572     FPU    : S4;
 6573     MEM    : S3(3);     // any mem
 6574 %}
 6575 
 6576 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6577 %{
 6578     instruction_count(3);
 6579     src1   : S4(read);
 6580     mem    : S4(read);
 6581     DECODE : S0;        // any decoder for FPU PUSH
 6582     D0     : S0(2);     // big decoder only
 6583     FPU    : S4;
 6584     MEM    : S3(2);     // any mem
 6585 %}
 6586 
 6587 // Float load constant
 6588 pipe_class fpu_reg_con(regD dst)
 6589 %{
 6590     instruction_count(2);
 6591     dst    : S5(write);
 6592     D0     : S0;        // big decoder only for the load
 6593     DECODE : S1;        // any decoder for FPU POP
 6594     FPU    : S4;
 6595     MEM    : S3;        // any mem
 6596 %}
 6597 
 6598 // Float load constant
 6599 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6600 %{
 6601     instruction_count(3);
 6602     dst    : S5(write);
 6603     src    : S3(read);
 6604     D0     : S0;        // big decoder only for the load
 6605     DECODE : S1(2);     // any decoder for FPU POP
 6606     FPU    : S4;
 6607     MEM    : S3;        // any mem
 6608 %}
 6609 
 6610 // UnConditional branch
 6611 pipe_class pipe_jmp(label labl)
 6612 %{
 6613     single_instruction;
 6614     BR   : S3;
 6615 %}
 6616 
 6617 // Conditional branch
 6618 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6619 %{
 6620     single_instruction;
 6621     cr    : S1(read);
 6622     BR    : S3;
 6623 %}
 6624 
 6625 // Allocation idiom
 6626 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6627 %{
 6628     instruction_count(1); force_serialization;
 6629     fixed_latency(6);
 6630     heap_ptr : S3(read);
 6631     DECODE   : S0(3);
 6632     D0       : S2;
 6633     MEM      : S3;
 6634     ALU      : S3(2);
 6635     dst      : S5(write);
 6636     BR       : S5;
 6637 %}
 6638 
 6639 // Generic big/slow expanded idiom
 6640 pipe_class pipe_slow()
 6641 %{
 6642     instruction_count(10); multiple_bundles; force_serialization;
 6643     fixed_latency(100);
 6644     D0  : S0(2);
 6645     MEM : S3(2);
 6646 %}
 6647 
 6648 // The real do-nothing guy
 6649 pipe_class empty()
 6650 %{
 6651     instruction_count(0);
 6652 %}
 6653 
 6654 // Define the class for the Nop node
 6655 define
 6656 %{
 6657    MachNop = empty;
 6658 %}
 6659 
 6660 %}
 6661 
 6662 //----------INSTRUCTIONS-------------------------------------------------------
 6663 //
 6664 // match      -- States which machine-independent subtree may be replaced
 6665 //               by this instruction.
 6666 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6667 //               selection to identify a minimum cost tree of machine
 6668 //               instructions that matches a tree of machine-independent
 6669 //               instructions.
 6670 // format     -- A string providing the disassembly for this instruction.
 6671 //               The value of an instruction's operand may be inserted
 6672 //               by referring to it with a '$' prefix.
 6673 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6674 //               to within an encode class as $primary, $secondary, and $tertiary
 6675 //               rrspectively.  The primary opcode is commonly used to
 6676 //               indicate the type of machine instruction, while secondary
 6677 //               and tertiary are often used for prefix options or addressing
 6678 //               modes.
 6679 // ins_encode -- A list of encode classes with parameters. The encode class
 6680 //               name must have been defined in an 'enc_class' specification
 6681 //               in the encode section of the architecture description.
 6682 
 6683 // ============================================================================
 6684 
 6685 instruct ShouldNotReachHere() %{
 6686   match(Halt);
 6687   format %{ "stop\t# ShouldNotReachHere" %}
 6688   ins_encode %{
 6689     if (is_reachable()) {
 6690       const char* str = __ code_string(_halt_reason);
 6691       __ stop(str);
 6692     }
 6693   %}
 6694   ins_pipe(pipe_slow);
 6695 %}
 6696 
 6697 // ============================================================================
 6698 
 6699 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6700 // Load Float
 6701 instruct MoveF2VL(vlRegF dst, regF src) %{
 6702   match(Set dst src);
 6703   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6704   ins_encode %{
 6705     ShouldNotReachHere();
 6706   %}
 6707   ins_pipe( fpu_reg_reg );
 6708 %}
 6709 
 6710 // Load Float
 6711 instruct MoveF2LEG(legRegF dst, regF src) %{
 6712   match(Set dst src);
 6713   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6714   ins_encode %{
 6715     ShouldNotReachHere();
 6716   %}
 6717   ins_pipe( fpu_reg_reg );
 6718 %}
 6719 
 6720 // Load Float
 6721 instruct MoveVL2F(regF dst, vlRegF src) %{
 6722   match(Set dst src);
 6723   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6724   ins_encode %{
 6725     ShouldNotReachHere();
 6726   %}
 6727   ins_pipe( fpu_reg_reg );
 6728 %}
 6729 
 6730 // Load Float
 6731 instruct MoveLEG2F(regF dst, legRegF src) %{
 6732   match(Set dst src);
 6733   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6734   ins_encode %{
 6735     ShouldNotReachHere();
 6736   %}
 6737   ins_pipe( fpu_reg_reg );
 6738 %}
 6739 
 6740 // Load Double
 6741 instruct MoveD2VL(vlRegD dst, regD src) %{
 6742   match(Set dst src);
 6743   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6744   ins_encode %{
 6745     ShouldNotReachHere();
 6746   %}
 6747   ins_pipe( fpu_reg_reg );
 6748 %}
 6749 
 6750 // Load Double
 6751 instruct MoveD2LEG(legRegD dst, regD src) %{
 6752   match(Set dst src);
 6753   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6754   ins_encode %{
 6755     ShouldNotReachHere();
 6756   %}
 6757   ins_pipe( fpu_reg_reg );
 6758 %}
 6759 
 6760 // Load Double
 6761 instruct MoveVL2D(regD dst, vlRegD src) %{
 6762   match(Set dst src);
 6763   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6764   ins_encode %{
 6765     ShouldNotReachHere();
 6766   %}
 6767   ins_pipe( fpu_reg_reg );
 6768 %}
 6769 
 6770 // Load Double
 6771 instruct MoveLEG2D(regD dst, legRegD src) %{
 6772   match(Set dst src);
 6773   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6774   ins_encode %{
 6775     ShouldNotReachHere();
 6776   %}
 6777   ins_pipe( fpu_reg_reg );
 6778 %}
 6779 
 6780 //----------Load/Store/Move Instructions---------------------------------------
 6781 //----------Load Instructions--------------------------------------------------
 6782 
 6783 // Load Byte (8 bit signed)
 6784 instruct loadB(rRegI dst, memory mem)
 6785 %{
 6786   match(Set dst (LoadB mem));
 6787 
 6788   ins_cost(125);
 6789   format %{ "movsbl  $dst, $mem\t# byte" %}
 6790 
 6791   ins_encode %{
 6792     __ movsbl($dst$$Register, $mem$$Address);
 6793   %}
 6794 
 6795   ins_pipe(ialu_reg_mem);
 6796 %}
 6797 
 6798 // Load Byte (8 bit signed) into Long Register
 6799 instruct loadB2L(rRegL dst, memory mem)
 6800 %{
 6801   match(Set dst (ConvI2L (LoadB mem)));
 6802 
 6803   ins_cost(125);
 6804   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6805 
 6806   ins_encode %{
 6807     __ movsbq($dst$$Register, $mem$$Address);
 6808   %}
 6809 
 6810   ins_pipe(ialu_reg_mem);
 6811 %}
 6812 
 6813 // Load Unsigned Byte (8 bit UNsigned)
 6814 instruct loadUB(rRegI dst, memory mem)
 6815 %{
 6816   match(Set dst (LoadUB mem));
 6817 
 6818   ins_cost(125);
 6819   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6820 
 6821   ins_encode %{
 6822     __ movzbl($dst$$Register, $mem$$Address);
 6823   %}
 6824 
 6825   ins_pipe(ialu_reg_mem);
 6826 %}
 6827 
 6828 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6829 instruct loadUB2L(rRegL dst, memory mem)
 6830 %{
 6831   match(Set dst (ConvI2L (LoadUB mem)));
 6832 
 6833   ins_cost(125);
 6834   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6835 
 6836   ins_encode %{
 6837     __ movzbq($dst$$Register, $mem$$Address);
 6838   %}
 6839 
 6840   ins_pipe(ialu_reg_mem);
 6841 %}
 6842 
 6843 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6844 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6845   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6846   effect(KILL cr);
 6847 
 6848   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6849             "andl    $dst, right_n_bits($mask, 8)" %}
 6850   ins_encode %{
 6851     Register Rdst = $dst$$Register;
 6852     __ movzbq(Rdst, $mem$$Address);
 6853     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6854   %}
 6855   ins_pipe(ialu_reg_mem);
 6856 %}
 6857 
 6858 // Load Short (16 bit signed)
 6859 instruct loadS(rRegI dst, memory mem)
 6860 %{
 6861   match(Set dst (LoadS mem));
 6862 
 6863   ins_cost(125);
 6864   format %{ "movswl $dst, $mem\t# short" %}
 6865 
 6866   ins_encode %{
 6867     __ movswl($dst$$Register, $mem$$Address);
 6868   %}
 6869 
 6870   ins_pipe(ialu_reg_mem);
 6871 %}
 6872 
 6873 // Load Short (16 bit signed) to Byte (8 bit signed)
 6874 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6875   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6876 
 6877   ins_cost(125);
 6878   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6879   ins_encode %{
 6880     __ movsbl($dst$$Register, $mem$$Address);
 6881   %}
 6882   ins_pipe(ialu_reg_mem);
 6883 %}
 6884 
 6885 // Load Short (16 bit signed) into Long Register
 6886 instruct loadS2L(rRegL dst, memory mem)
 6887 %{
 6888   match(Set dst (ConvI2L (LoadS mem)));
 6889 
 6890   ins_cost(125);
 6891   format %{ "movswq $dst, $mem\t# short -> long" %}
 6892 
 6893   ins_encode %{
 6894     __ movswq($dst$$Register, $mem$$Address);
 6895   %}
 6896 
 6897   ins_pipe(ialu_reg_mem);
 6898 %}
 6899 
 6900 // Load Unsigned Short/Char (16 bit UNsigned)
 6901 instruct loadUS(rRegI dst, memory mem)
 6902 %{
 6903   match(Set dst (LoadUS mem));
 6904 
 6905   ins_cost(125);
 6906   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6907 
 6908   ins_encode %{
 6909     __ movzwl($dst$$Register, $mem$$Address);
 6910   %}
 6911 
 6912   ins_pipe(ialu_reg_mem);
 6913 %}
 6914 
 6915 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6916 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6917   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6918 
 6919   ins_cost(125);
 6920   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6921   ins_encode %{
 6922     __ movsbl($dst$$Register, $mem$$Address);
 6923   %}
 6924   ins_pipe(ialu_reg_mem);
 6925 %}
 6926 
 6927 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6928 instruct loadUS2L(rRegL dst, memory mem)
 6929 %{
 6930   match(Set dst (ConvI2L (LoadUS mem)));
 6931 
 6932   ins_cost(125);
 6933   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6934 
 6935   ins_encode %{
 6936     __ movzwq($dst$$Register, $mem$$Address);
 6937   %}
 6938 
 6939   ins_pipe(ialu_reg_mem);
 6940 %}
 6941 
 6942 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 6943 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 6944   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6945 
 6946   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 6947   ins_encode %{
 6948     __ movzbq($dst$$Register, $mem$$Address);
 6949   %}
 6950   ins_pipe(ialu_reg_mem);
 6951 %}
 6952 
 6953 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 6954 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6955   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6956   effect(KILL cr);
 6957 
 6958   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 6959             "andl    $dst, right_n_bits($mask, 16)" %}
 6960   ins_encode %{
 6961     Register Rdst = $dst$$Register;
 6962     __ movzwq(Rdst, $mem$$Address);
 6963     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 6964   %}
 6965   ins_pipe(ialu_reg_mem);
 6966 %}
 6967 
 6968 // Load Integer
 6969 instruct loadI(rRegI dst, memory mem)
 6970 %{
 6971   match(Set dst (LoadI mem));
 6972 
 6973   ins_cost(125);
 6974   format %{ "movl    $dst, $mem\t# int" %}
 6975 
 6976   ins_encode %{
 6977     __ movl($dst$$Register, $mem$$Address);
 6978   %}
 6979 
 6980   ins_pipe(ialu_reg_mem);
 6981 %}
 6982 
 6983 // Load Integer (32 bit signed) to Byte (8 bit signed)
 6984 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6985   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 6986 
 6987   ins_cost(125);
 6988   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 6989   ins_encode %{
 6990     __ movsbl($dst$$Register, $mem$$Address);
 6991   %}
 6992   ins_pipe(ialu_reg_mem);
 6993 %}
 6994 
 6995 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 6996 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 6997   match(Set dst (AndI (LoadI mem) mask));
 6998 
 6999   ins_cost(125);
 7000   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7001   ins_encode %{
 7002     __ movzbl($dst$$Register, $mem$$Address);
 7003   %}
 7004   ins_pipe(ialu_reg_mem);
 7005 %}
 7006 
 7007 // Load Integer (32 bit signed) to Short (16 bit signed)
 7008 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7009   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7010 
 7011   ins_cost(125);
 7012   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7013   ins_encode %{
 7014     __ movswl($dst$$Register, $mem$$Address);
 7015   %}
 7016   ins_pipe(ialu_reg_mem);
 7017 %}
 7018 
 7019 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7020 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7021   match(Set dst (AndI (LoadI mem) mask));
 7022 
 7023   ins_cost(125);
 7024   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7025   ins_encode %{
 7026     __ movzwl($dst$$Register, $mem$$Address);
 7027   %}
 7028   ins_pipe(ialu_reg_mem);
 7029 %}
 7030 
 7031 // Load Integer into Long Register
 7032 instruct loadI2L(rRegL dst, memory mem)
 7033 %{
 7034   match(Set dst (ConvI2L (LoadI mem)));
 7035 
 7036   ins_cost(125);
 7037   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7038 
 7039   ins_encode %{
 7040     __ movslq($dst$$Register, $mem$$Address);
 7041   %}
 7042 
 7043   ins_pipe(ialu_reg_mem);
 7044 %}
 7045 
 7046 // Load Integer with mask 0xFF into Long Register
 7047 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7048   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7049 
 7050   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7051   ins_encode %{
 7052     __ movzbq($dst$$Register, $mem$$Address);
 7053   %}
 7054   ins_pipe(ialu_reg_mem);
 7055 %}
 7056 
 7057 // Load Integer with mask 0xFFFF into Long Register
 7058 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7059   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7060 
 7061   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7062   ins_encode %{
 7063     __ movzwq($dst$$Register, $mem$$Address);
 7064   %}
 7065   ins_pipe(ialu_reg_mem);
 7066 %}
 7067 
 7068 // Load Integer with a 31-bit mask into Long Register
 7069 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7070   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7071   effect(KILL cr);
 7072 
 7073   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7074             "andl    $dst, $mask" %}
 7075   ins_encode %{
 7076     Register Rdst = $dst$$Register;
 7077     __ movl(Rdst, $mem$$Address);
 7078     __ andl(Rdst, $mask$$constant);
 7079   %}
 7080   ins_pipe(ialu_reg_mem);
 7081 %}
 7082 
 7083 // Load Unsigned Integer into Long Register
 7084 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7085 %{
 7086   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7087 
 7088   ins_cost(125);
 7089   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7090 
 7091   ins_encode %{
 7092     __ movl($dst$$Register, $mem$$Address);
 7093   %}
 7094 
 7095   ins_pipe(ialu_reg_mem);
 7096 %}
 7097 
 7098 // Load Long
 7099 instruct loadL(rRegL dst, memory mem)
 7100 %{
 7101   match(Set dst (LoadL mem));
 7102 
 7103   ins_cost(125);
 7104   format %{ "movq    $dst, $mem\t# long" %}
 7105 
 7106   ins_encode %{
 7107     __ movq($dst$$Register, $mem$$Address);
 7108   %}
 7109 
 7110   ins_pipe(ialu_reg_mem); // XXX
 7111 %}
 7112 
 7113 // Load Range
 7114 instruct loadRange(rRegI dst, memory mem)
 7115 %{
 7116   match(Set dst (LoadRange mem));
 7117 
 7118   ins_cost(125); // XXX
 7119   format %{ "movl    $dst, $mem\t# range" %}
 7120   ins_encode %{
 7121     __ movl($dst$$Register, $mem$$Address);
 7122   %}
 7123   ins_pipe(ialu_reg_mem);
 7124 %}
 7125 
 7126 // Load Pointer
 7127 instruct loadP(rRegP dst, memory mem)
 7128 %{
 7129   match(Set dst (LoadP mem));
 7130   predicate(n->as_Load()->barrier_data() == 0);
 7131 
 7132   ins_cost(125); // XXX
 7133   format %{ "movq    $dst, $mem\t# ptr" %}
 7134   ins_encode %{
 7135     __ movq($dst$$Register, $mem$$Address);
 7136   %}
 7137   ins_pipe(ialu_reg_mem); // XXX
 7138 %}
 7139 
 7140 // Load Compressed Pointer
 7141 instruct loadN(rRegN dst, memory mem)
 7142 %{
 7143    predicate(n->as_Load()->barrier_data() == 0);
 7144    match(Set dst (LoadN mem));
 7145 
 7146    ins_cost(125); // XXX
 7147    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7148    ins_encode %{
 7149      __ movl($dst$$Register, $mem$$Address);
 7150    %}
 7151    ins_pipe(ialu_reg_mem); // XXX
 7152 %}
 7153 
 7154 
 7155 // Load Klass Pointer
 7156 instruct loadKlass(rRegP dst, memory mem)
 7157 %{
 7158   match(Set dst (LoadKlass mem));
 7159 
 7160   ins_cost(125); // XXX
 7161   format %{ "movq    $dst, $mem\t# class" %}
 7162   ins_encode %{
 7163     __ movq($dst$$Register, $mem$$Address);
 7164   %}
 7165   ins_pipe(ialu_reg_mem); // XXX
 7166 %}
 7167 
 7168 // Load narrow Klass Pointer
 7169 instruct loadNKlass(rRegN dst, memory mem)
 7170 %{
 7171   predicate(!UseCompactObjectHeaders);
 7172   match(Set dst (LoadNKlass mem));
 7173 
 7174   ins_cost(125); // XXX
 7175   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7176   ins_encode %{
 7177     __ movl($dst$$Register, $mem$$Address);
 7178   %}
 7179   ins_pipe(ialu_reg_mem); // XXX
 7180 %}
 7181 
 7182 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7183 %{
 7184   predicate(UseCompactObjectHeaders);
 7185   match(Set dst (LoadNKlass mem));
 7186   effect(KILL cr);
 7187   ins_cost(125);
 7188   format %{
 7189     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7190     "shrl    $dst, markWord::klass_shift_at_offset"
 7191   %}
 7192   ins_encode %{
 7193     if (UseAPX) {
 7194       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7195     }
 7196     else {
 7197       __ movl($dst$$Register, $mem$$Address);
 7198       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7199     }
 7200   %}
 7201   ins_pipe(ialu_reg_mem);
 7202 %}
 7203 
 7204 // Load Float
 7205 instruct loadF(regF dst, memory mem)
 7206 %{
 7207   match(Set dst (LoadF mem));
 7208 
 7209   ins_cost(145); // XXX
 7210   format %{ "movss   $dst, $mem\t# float" %}
 7211   ins_encode %{
 7212     __ movflt($dst$$XMMRegister, $mem$$Address);
 7213   %}
 7214   ins_pipe(pipe_slow); // XXX
 7215 %}
 7216 
 7217 // Load Double
 7218 instruct loadD_partial(regD dst, memory mem)
 7219 %{
 7220   predicate(!UseXmmLoadAndClearUpper);
 7221   match(Set dst (LoadD mem));
 7222 
 7223   ins_cost(145); // XXX
 7224   format %{ "movlpd  $dst, $mem\t# double" %}
 7225   ins_encode %{
 7226     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7227   %}
 7228   ins_pipe(pipe_slow); // XXX
 7229 %}
 7230 
 7231 instruct loadD(regD dst, memory mem)
 7232 %{
 7233   predicate(UseXmmLoadAndClearUpper);
 7234   match(Set dst (LoadD mem));
 7235 
 7236   ins_cost(145); // XXX
 7237   format %{ "movsd   $dst, $mem\t# double" %}
 7238   ins_encode %{
 7239     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7240   %}
 7241   ins_pipe(pipe_slow); // XXX
 7242 %}
 7243 
 7244 // max = java.lang.Math.max(float a, float b)
 7245 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
 7246   predicate(VM_Version::supports_avx10_2());
 7247   match(Set dst (MaxF a b));
 7248   format %{ "maxF $dst, $a, $b" %}
 7249   ins_encode %{
 7250     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7251   %}
 7252   ins_pipe( pipe_slow );
 7253 %}
 7254 
 7255 // max = java.lang.Math.max(float a, float b)
 7256 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7257   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7258   match(Set dst (MaxF a b));
 7259   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7260   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7261   ins_encode %{
 7262     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7263   %}
 7264   ins_pipe( pipe_slow );
 7265 %}
 7266 
 7267 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7268   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7269   match(Set dst (MaxF a b));
 7270   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7271 
 7272   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7273   ins_encode %{
 7274     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7275                     false /*min*/, true /*single*/);
 7276   %}
 7277   ins_pipe( pipe_slow );
 7278 %}
 7279 
 7280 // max = java.lang.Math.max(double a, double b)
 7281 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
 7282   predicate(VM_Version::supports_avx10_2());
 7283   match(Set dst (MaxD a b));
 7284   format %{ "maxD $dst, $a, $b" %}
 7285   ins_encode %{
 7286     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7287   %}
 7288   ins_pipe( pipe_slow );
 7289 %}
 7290 
 7291 // max = java.lang.Math.max(double a, double b)
 7292 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7293   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7294   match(Set dst (MaxD a b));
 7295   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7296   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7297   ins_encode %{
 7298     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7299   %}
 7300   ins_pipe( pipe_slow );
 7301 %}
 7302 
 7303 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7304   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7305   match(Set dst (MaxD a b));
 7306   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7307 
 7308   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7309   ins_encode %{
 7310     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7311                     false /*min*/, false /*single*/);
 7312   %}
 7313   ins_pipe( pipe_slow );
 7314 %}
 7315 
 7316 // max = java.lang.Math.min(float a, float b)
 7317 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
 7318   predicate(VM_Version::supports_avx10_2());
 7319   match(Set dst (MinF a b));
 7320   format %{ "minF $dst, $a, $b" %}
 7321   ins_encode %{
 7322     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7323   %}
 7324   ins_pipe( pipe_slow );
 7325 %}
 7326 
 7327 // min = java.lang.Math.min(float a, float b)
 7328 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7329   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7330   match(Set dst (MinF a b));
 7331   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7332   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7333   ins_encode %{
 7334     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7335   %}
 7336   ins_pipe( pipe_slow );
 7337 %}
 7338 
 7339 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7340   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7341   match(Set dst (MinF a b));
 7342   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7343 
 7344   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7345   ins_encode %{
 7346     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7347                     true /*min*/, true /*single*/);
 7348   %}
 7349   ins_pipe( pipe_slow );
 7350 %}
 7351 
 7352 // max = java.lang.Math.min(double a, double b)
 7353 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
 7354   predicate(VM_Version::supports_avx10_2());
 7355   match(Set dst (MinD a b));
 7356   format %{ "minD $dst, $a, $b" %}
 7357   ins_encode %{
 7358     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7359   %}
 7360   ins_pipe( pipe_slow );
 7361 %}
 7362 
 7363 // min = java.lang.Math.min(double a, double b)
 7364 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7365   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7366   match(Set dst (MinD a b));
 7367   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7368     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7369   ins_encode %{
 7370     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7371   %}
 7372   ins_pipe( pipe_slow );
 7373 %}
 7374 
 7375 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7376   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7377   match(Set dst (MinD a b));
 7378   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7379 
 7380   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7381   ins_encode %{
 7382     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7383                     true /*min*/, false /*single*/);
 7384   %}
 7385   ins_pipe( pipe_slow );
 7386 %}
 7387 
 7388 // Load Effective Address
 7389 instruct leaP8(rRegP dst, indOffset8 mem)
 7390 %{
 7391   match(Set dst mem);
 7392 
 7393   ins_cost(110); // XXX
 7394   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7395   ins_encode %{
 7396     __ leaq($dst$$Register, $mem$$Address);
 7397   %}
 7398   ins_pipe(ialu_reg_reg_fat);
 7399 %}
 7400 
 7401 instruct leaP32(rRegP dst, indOffset32 mem)
 7402 %{
 7403   match(Set dst mem);
 7404 
 7405   ins_cost(110);
 7406   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7407   ins_encode %{
 7408     __ leaq($dst$$Register, $mem$$Address);
 7409   %}
 7410   ins_pipe(ialu_reg_reg_fat);
 7411 %}
 7412 
 7413 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7414 %{
 7415   match(Set dst mem);
 7416 
 7417   ins_cost(110);
 7418   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7419   ins_encode %{
 7420     __ leaq($dst$$Register, $mem$$Address);
 7421   %}
 7422   ins_pipe(ialu_reg_reg_fat);
 7423 %}
 7424 
 7425 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7426 %{
 7427   match(Set dst mem);
 7428 
 7429   ins_cost(110);
 7430   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7431   ins_encode %{
 7432     __ leaq($dst$$Register, $mem$$Address);
 7433   %}
 7434   ins_pipe(ialu_reg_reg_fat);
 7435 %}
 7436 
 7437 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7438 %{
 7439   match(Set dst mem);
 7440 
 7441   ins_cost(110);
 7442   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7443   ins_encode %{
 7444     __ leaq($dst$$Register, $mem$$Address);
 7445   %}
 7446   ins_pipe(ialu_reg_reg_fat);
 7447 %}
 7448 
 7449 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7450 %{
 7451   match(Set dst mem);
 7452 
 7453   ins_cost(110);
 7454   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7455   ins_encode %{
 7456     __ leaq($dst$$Register, $mem$$Address);
 7457   %}
 7458   ins_pipe(ialu_reg_reg_fat);
 7459 %}
 7460 
 7461 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7462 %{
 7463   match(Set dst mem);
 7464 
 7465   ins_cost(110);
 7466   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7467   ins_encode %{
 7468     __ leaq($dst$$Register, $mem$$Address);
 7469   %}
 7470   ins_pipe(ialu_reg_reg_fat);
 7471 %}
 7472 
 7473 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7474 %{
 7475   match(Set dst mem);
 7476 
 7477   ins_cost(110);
 7478   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7479   ins_encode %{
 7480     __ leaq($dst$$Register, $mem$$Address);
 7481   %}
 7482   ins_pipe(ialu_reg_reg_fat);
 7483 %}
 7484 
 7485 // Load Effective Address which uses Narrow (32-bits) oop
 7486 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7487 %{
 7488   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7489   match(Set dst mem);
 7490 
 7491   ins_cost(110);
 7492   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7493   ins_encode %{
 7494     __ leaq($dst$$Register, $mem$$Address);
 7495   %}
 7496   ins_pipe(ialu_reg_reg_fat);
 7497 %}
 7498 
 7499 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7500 %{
 7501   predicate(CompressedOops::shift() == 0);
 7502   match(Set dst mem);
 7503 
 7504   ins_cost(110); // XXX
 7505   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7506   ins_encode %{
 7507     __ leaq($dst$$Register, $mem$$Address);
 7508   %}
 7509   ins_pipe(ialu_reg_reg_fat);
 7510 %}
 7511 
 7512 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7513 %{
 7514   predicate(CompressedOops::shift() == 0);
 7515   match(Set dst mem);
 7516 
 7517   ins_cost(110);
 7518   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7519   ins_encode %{
 7520     __ leaq($dst$$Register, $mem$$Address);
 7521   %}
 7522   ins_pipe(ialu_reg_reg_fat);
 7523 %}
 7524 
 7525 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7526 %{
 7527   predicate(CompressedOops::shift() == 0);
 7528   match(Set dst mem);
 7529 
 7530   ins_cost(110);
 7531   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7532   ins_encode %{
 7533     __ leaq($dst$$Register, $mem$$Address);
 7534   %}
 7535   ins_pipe(ialu_reg_reg_fat);
 7536 %}
 7537 
 7538 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7539 %{
 7540   predicate(CompressedOops::shift() == 0);
 7541   match(Set dst mem);
 7542 
 7543   ins_cost(110);
 7544   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7545   ins_encode %{
 7546     __ leaq($dst$$Register, $mem$$Address);
 7547   %}
 7548   ins_pipe(ialu_reg_reg_fat);
 7549 %}
 7550 
 7551 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7552 %{
 7553   predicate(CompressedOops::shift() == 0);
 7554   match(Set dst mem);
 7555 
 7556   ins_cost(110);
 7557   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7558   ins_encode %{
 7559     __ leaq($dst$$Register, $mem$$Address);
 7560   %}
 7561   ins_pipe(ialu_reg_reg_fat);
 7562 %}
 7563 
 7564 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7565 %{
 7566   predicate(CompressedOops::shift() == 0);
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110);
 7570   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7578 %{
 7579   predicate(CompressedOops::shift() == 0);
 7580   match(Set dst mem);
 7581 
 7582   ins_cost(110);
 7583   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7584   ins_encode %{
 7585     __ leaq($dst$$Register, $mem$$Address);
 7586   %}
 7587   ins_pipe(ialu_reg_reg_fat);
 7588 %}
 7589 
 7590 instruct loadConI(rRegI dst, immI src)
 7591 %{
 7592   match(Set dst src);
 7593 
 7594   format %{ "movl    $dst, $src\t# int" %}
 7595   ins_encode %{
 7596     __ movl($dst$$Register, $src$$constant);
 7597   %}
 7598   ins_pipe(ialu_reg_fat); // XXX
 7599 %}
 7600 
 7601 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7602 %{
 7603   match(Set dst src);
 7604   effect(KILL cr);
 7605 
 7606   ins_cost(50);
 7607   format %{ "xorl    $dst, $dst\t# int" %}
 7608   ins_encode %{
 7609     __ xorl($dst$$Register, $dst$$Register);
 7610   %}
 7611   ins_pipe(ialu_reg);
 7612 %}
 7613 
 7614 instruct loadConL(rRegL dst, immL src)
 7615 %{
 7616   match(Set dst src);
 7617 
 7618   ins_cost(150);
 7619   format %{ "movq    $dst, $src\t# long" %}
 7620   ins_encode %{
 7621     __ mov64($dst$$Register, $src$$constant);
 7622   %}
 7623   ins_pipe(ialu_reg);
 7624 %}
 7625 
 7626 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7627 %{
 7628   match(Set dst src);
 7629   effect(KILL cr);
 7630 
 7631   ins_cost(50);
 7632   format %{ "xorl    $dst, $dst\t# long" %}
 7633   ins_encode %{
 7634     __ xorl($dst$$Register, $dst$$Register);
 7635   %}
 7636   ins_pipe(ialu_reg); // XXX
 7637 %}
 7638 
 7639 instruct loadConUL32(rRegL dst, immUL32 src)
 7640 %{
 7641   match(Set dst src);
 7642 
 7643   ins_cost(60);
 7644   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7645   ins_encode %{
 7646     __ movl($dst$$Register, $src$$constant);
 7647   %}
 7648   ins_pipe(ialu_reg);
 7649 %}
 7650 
 7651 instruct loadConL32(rRegL dst, immL32 src)
 7652 %{
 7653   match(Set dst src);
 7654 
 7655   ins_cost(70);
 7656   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7657   ins_encode %{
 7658     __ movq($dst$$Register, $src$$constant);
 7659   %}
 7660   ins_pipe(ialu_reg);
 7661 %}
 7662 
 7663 instruct loadConP(rRegP dst, immP con) %{
 7664   match(Set dst con);
 7665 
 7666   format %{ "movq    $dst, $con\t# ptr" %}
 7667   ins_encode %{
 7668     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7669   %}
 7670   ins_pipe(ialu_reg_fat); // XXX
 7671 %}
 7672 
 7673 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7674 %{
 7675   match(Set dst src);
 7676   effect(KILL cr);
 7677 
 7678   ins_cost(50);
 7679   format %{ "xorl    $dst, $dst\t# ptr" %}
 7680   ins_encode %{
 7681     __ xorl($dst$$Register, $dst$$Register);
 7682   %}
 7683   ins_pipe(ialu_reg);
 7684 %}
 7685 
 7686 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7687 %{
 7688   match(Set dst src);
 7689   effect(KILL cr);
 7690 
 7691   ins_cost(60);
 7692   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7693   ins_encode %{
 7694     __ movl($dst$$Register, $src$$constant);
 7695   %}
 7696   ins_pipe(ialu_reg);
 7697 %}
 7698 
 7699 instruct loadConF(regF dst, immF con) %{
 7700   match(Set dst con);
 7701   ins_cost(125);
 7702   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7703   ins_encode %{
 7704     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7705   %}
 7706   ins_pipe(pipe_slow);
 7707 %}
 7708 
 7709 instruct loadConH(regF dst, immH con) %{
 7710   match(Set dst con);
 7711   ins_cost(125);
 7712   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7713   ins_encode %{
 7714     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7715   %}
 7716   ins_pipe(pipe_slow);
 7717 %}
 7718 
 7719 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7720   match(Set dst src);
 7721   effect(KILL cr);
 7722   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7723   ins_encode %{
 7724     __ xorq($dst$$Register, $dst$$Register);
 7725   %}
 7726   ins_pipe(ialu_reg);
 7727 %}
 7728 
 7729 instruct loadConN(rRegN dst, immN src) %{
 7730   match(Set dst src);
 7731 
 7732   ins_cost(125);
 7733   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7734   ins_encode %{
 7735     address con = (address)$src$$constant;
 7736     if (con == nullptr) {
 7737       ShouldNotReachHere();
 7738     } else {
 7739       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7740     }
 7741   %}
 7742   ins_pipe(ialu_reg_fat); // XXX
 7743 %}
 7744 
 7745 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7746   match(Set dst src);
 7747 
 7748   ins_cost(125);
 7749   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7750   ins_encode %{
 7751     address con = (address)$src$$constant;
 7752     if (con == nullptr) {
 7753       ShouldNotReachHere();
 7754     } else {
 7755       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7756     }
 7757   %}
 7758   ins_pipe(ialu_reg_fat); // XXX
 7759 %}
 7760 
 7761 instruct loadConF0(regF dst, immF0 src)
 7762 %{
 7763   match(Set dst src);
 7764   ins_cost(100);
 7765 
 7766   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7767   ins_encode %{
 7768     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7769   %}
 7770   ins_pipe(pipe_slow);
 7771 %}
 7772 
 7773 // Use the same format since predicate() can not be used here.
 7774 instruct loadConD(regD dst, immD con) %{
 7775   match(Set dst con);
 7776   ins_cost(125);
 7777   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7778   ins_encode %{
 7779     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7780   %}
 7781   ins_pipe(pipe_slow);
 7782 %}
 7783 
 7784 instruct loadConD0(regD dst, immD0 src)
 7785 %{
 7786   match(Set dst src);
 7787   ins_cost(100);
 7788 
 7789   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7790   ins_encode %{
 7791     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7792   %}
 7793   ins_pipe(pipe_slow);
 7794 %}
 7795 
 7796 instruct loadSSI(rRegI dst, stackSlotI src)
 7797 %{
 7798   match(Set dst src);
 7799 
 7800   ins_cost(125);
 7801   format %{ "movl    $dst, $src\t# int stk" %}
 7802   ins_encode %{
 7803     __ movl($dst$$Register, $src$$Address);
 7804   %}
 7805   ins_pipe(ialu_reg_mem);
 7806 %}
 7807 
 7808 instruct loadSSL(rRegL dst, stackSlotL src)
 7809 %{
 7810   match(Set dst src);
 7811 
 7812   ins_cost(125);
 7813   format %{ "movq    $dst, $src\t# long stk" %}
 7814   ins_encode %{
 7815     __ movq($dst$$Register, $src$$Address);
 7816   %}
 7817   ins_pipe(ialu_reg_mem);
 7818 %}
 7819 
 7820 instruct loadSSP(rRegP dst, stackSlotP src)
 7821 %{
 7822   match(Set dst src);
 7823 
 7824   ins_cost(125);
 7825   format %{ "movq    $dst, $src\t# ptr stk" %}
 7826   ins_encode %{
 7827     __ movq($dst$$Register, $src$$Address);
 7828   %}
 7829   ins_pipe(ialu_reg_mem);
 7830 %}
 7831 
 7832 instruct loadSSF(regF dst, stackSlotF src)
 7833 %{
 7834   match(Set dst src);
 7835 
 7836   ins_cost(125);
 7837   format %{ "movss   $dst, $src\t# float stk" %}
 7838   ins_encode %{
 7839     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7840   %}
 7841   ins_pipe(pipe_slow); // XXX
 7842 %}
 7843 
 7844 // Use the same format since predicate() can not be used here.
 7845 instruct loadSSD(regD dst, stackSlotD src)
 7846 %{
 7847   match(Set dst src);
 7848 
 7849   ins_cost(125);
 7850   format %{ "movsd   $dst, $src\t# double stk" %}
 7851   ins_encode  %{
 7852     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7853   %}
 7854   ins_pipe(pipe_slow); // XXX
 7855 %}
 7856 
 7857 // Prefetch instructions for allocation.
 7858 // Must be safe to execute with invalid address (cannot fault).
 7859 
 7860 instruct prefetchAlloc( memory mem ) %{
 7861   predicate(AllocatePrefetchInstr==3);
 7862   match(PrefetchAllocation mem);
 7863   ins_cost(125);
 7864 
 7865   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7866   ins_encode %{
 7867     __ prefetchw($mem$$Address);
 7868   %}
 7869   ins_pipe(ialu_mem);
 7870 %}
 7871 
 7872 instruct prefetchAllocNTA( memory mem ) %{
 7873   predicate(AllocatePrefetchInstr==0);
 7874   match(PrefetchAllocation mem);
 7875   ins_cost(125);
 7876 
 7877   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7878   ins_encode %{
 7879     __ prefetchnta($mem$$Address);
 7880   %}
 7881   ins_pipe(ialu_mem);
 7882 %}
 7883 
 7884 instruct prefetchAllocT0( memory mem ) %{
 7885   predicate(AllocatePrefetchInstr==1);
 7886   match(PrefetchAllocation mem);
 7887   ins_cost(125);
 7888 
 7889   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7890   ins_encode %{
 7891     __ prefetcht0($mem$$Address);
 7892   %}
 7893   ins_pipe(ialu_mem);
 7894 %}
 7895 
 7896 instruct prefetchAllocT2( memory mem ) %{
 7897   predicate(AllocatePrefetchInstr==2);
 7898   match(PrefetchAllocation mem);
 7899   ins_cost(125);
 7900 
 7901   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7902   ins_encode %{
 7903     __ prefetcht2($mem$$Address);
 7904   %}
 7905   ins_pipe(ialu_mem);
 7906 %}
 7907 
 7908 //----------Store Instructions-------------------------------------------------
 7909 
 7910 // Store Byte
 7911 instruct storeB(memory mem, rRegI src)
 7912 %{
 7913   match(Set mem (StoreB mem src));
 7914 
 7915   ins_cost(125); // XXX
 7916   format %{ "movb    $mem, $src\t# byte" %}
 7917   ins_encode %{
 7918     __ movb($mem$$Address, $src$$Register);
 7919   %}
 7920   ins_pipe(ialu_mem_reg);
 7921 %}
 7922 
 7923 // Store Char/Short
 7924 instruct storeC(memory mem, rRegI src)
 7925 %{
 7926   match(Set mem (StoreC mem src));
 7927 
 7928   ins_cost(125); // XXX
 7929   format %{ "movw    $mem, $src\t# char/short" %}
 7930   ins_encode %{
 7931     __ movw($mem$$Address, $src$$Register);
 7932   %}
 7933   ins_pipe(ialu_mem_reg);
 7934 %}
 7935 
 7936 // Store Integer
 7937 instruct storeI(memory mem, rRegI src)
 7938 %{
 7939   match(Set mem (StoreI mem src));
 7940 
 7941   ins_cost(125); // XXX
 7942   format %{ "movl    $mem, $src\t# int" %}
 7943   ins_encode %{
 7944     __ movl($mem$$Address, $src$$Register);
 7945   %}
 7946   ins_pipe(ialu_mem_reg);
 7947 %}
 7948 
 7949 // Store Long
 7950 instruct storeL(memory mem, rRegL src)
 7951 %{
 7952   match(Set mem (StoreL mem src));
 7953 
 7954   ins_cost(125); // XXX
 7955   format %{ "movq    $mem, $src\t# long" %}
 7956   ins_encode %{
 7957     __ movq($mem$$Address, $src$$Register);
 7958   %}
 7959   ins_pipe(ialu_mem_reg); // XXX
 7960 %}
 7961 
 7962 // Store Pointer
 7963 instruct storeP(memory mem, any_RegP src)
 7964 %{
 7965   predicate(n->as_Store()->barrier_data() == 0);
 7966   match(Set mem (StoreP mem src));
 7967 
 7968   ins_cost(125); // XXX
 7969   format %{ "movq    $mem, $src\t# ptr" %}
 7970   ins_encode %{
 7971     __ movq($mem$$Address, $src$$Register);
 7972   %}
 7973   ins_pipe(ialu_mem_reg);
 7974 %}
 7975 
 7976 instruct storeImmP0(memory mem, immP0 zero)
 7977 %{
 7978   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 7979   match(Set mem (StoreP mem zero));
 7980 
 7981   ins_cost(125); // XXX
 7982   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 7983   ins_encode %{
 7984     __ movq($mem$$Address, r12);
 7985   %}
 7986   ins_pipe(ialu_mem_reg);
 7987 %}
 7988 
 7989 // Store Null Pointer, mark word, or other simple pointer constant.
 7990 instruct storeImmP(memory mem, immP31 src)
 7991 %{
 7992   predicate(n->as_Store()->barrier_data() == 0);
 7993   match(Set mem (StoreP mem src));
 7994 
 7995   ins_cost(150); // XXX
 7996   format %{ "movq    $mem, $src\t# ptr" %}
 7997   ins_encode %{
 7998     __ movq($mem$$Address, $src$$constant);
 7999   %}
 8000   ins_pipe(ialu_mem_imm);
 8001 %}
 8002 
 8003 // Store Compressed Pointer
 8004 instruct storeN(memory mem, rRegN src)
 8005 %{
 8006   predicate(n->as_Store()->barrier_data() == 0);
 8007   match(Set mem (StoreN mem src));
 8008 
 8009   ins_cost(125); // XXX
 8010   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8011   ins_encode %{
 8012     __ movl($mem$$Address, $src$$Register);
 8013   %}
 8014   ins_pipe(ialu_mem_reg);
 8015 %}
 8016 
 8017 instruct storeNKlass(memory mem, rRegN src)
 8018 %{
 8019   match(Set mem (StoreNKlass mem src));
 8020 
 8021   ins_cost(125); // XXX
 8022   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8023   ins_encode %{
 8024     __ movl($mem$$Address, $src$$Register);
 8025   %}
 8026   ins_pipe(ialu_mem_reg);
 8027 %}
 8028 
 8029 instruct storeImmN0(memory mem, immN0 zero)
 8030 %{
 8031   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8032   match(Set mem (StoreN mem zero));
 8033 
 8034   ins_cost(125); // XXX
 8035   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8036   ins_encode %{
 8037     __ movl($mem$$Address, r12);
 8038   %}
 8039   ins_pipe(ialu_mem_reg);
 8040 %}
 8041 
 8042 instruct storeImmN(memory mem, immN src)
 8043 %{
 8044   predicate(n->as_Store()->barrier_data() == 0);
 8045   match(Set mem (StoreN mem src));
 8046 
 8047   ins_cost(150); // XXX
 8048   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8049   ins_encode %{
 8050     address con = (address)$src$$constant;
 8051     if (con == nullptr) {
 8052       __ movl($mem$$Address, 0);
 8053     } else {
 8054       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8055     }
 8056   %}
 8057   ins_pipe(ialu_mem_imm);
 8058 %}
 8059 
 8060 instruct storeImmNKlass(memory mem, immNKlass src)
 8061 %{
 8062   match(Set mem (StoreNKlass mem src));
 8063 
 8064   ins_cost(150); // XXX
 8065   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8066   ins_encode %{
 8067     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8068   %}
 8069   ins_pipe(ialu_mem_imm);
 8070 %}
 8071 
 8072 // Store Integer Immediate
 8073 instruct storeImmI0(memory mem, immI_0 zero)
 8074 %{
 8075   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8076   match(Set mem (StoreI mem zero));
 8077 
 8078   ins_cost(125); // XXX
 8079   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8080   ins_encode %{
 8081     __ movl($mem$$Address, r12);
 8082   %}
 8083   ins_pipe(ialu_mem_reg);
 8084 %}
 8085 
 8086 instruct storeImmI(memory mem, immI src)
 8087 %{
 8088   match(Set mem (StoreI mem src));
 8089 
 8090   ins_cost(150);
 8091   format %{ "movl    $mem, $src\t# int" %}
 8092   ins_encode %{
 8093     __ movl($mem$$Address, $src$$constant);
 8094   %}
 8095   ins_pipe(ialu_mem_imm);
 8096 %}
 8097 
 8098 // Store Long Immediate
 8099 instruct storeImmL0(memory mem, immL0 zero)
 8100 %{
 8101   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8102   match(Set mem (StoreL mem zero));
 8103 
 8104   ins_cost(125); // XXX
 8105   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8106   ins_encode %{
 8107     __ movq($mem$$Address, r12);
 8108   %}
 8109   ins_pipe(ialu_mem_reg);
 8110 %}
 8111 
 8112 instruct storeImmL(memory mem, immL32 src)
 8113 %{
 8114   match(Set mem (StoreL mem src));
 8115 
 8116   ins_cost(150);
 8117   format %{ "movq    $mem, $src\t# long" %}
 8118   ins_encode %{
 8119     __ movq($mem$$Address, $src$$constant);
 8120   %}
 8121   ins_pipe(ialu_mem_imm);
 8122 %}
 8123 
 8124 // Store Short/Char Immediate
 8125 instruct storeImmC0(memory mem, immI_0 zero)
 8126 %{
 8127   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8128   match(Set mem (StoreC mem zero));
 8129 
 8130   ins_cost(125); // XXX
 8131   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8132   ins_encode %{
 8133     __ movw($mem$$Address, r12);
 8134   %}
 8135   ins_pipe(ialu_mem_reg);
 8136 %}
 8137 
 8138 instruct storeImmI16(memory mem, immI16 src)
 8139 %{
 8140   predicate(UseStoreImmI16);
 8141   match(Set mem (StoreC mem src));
 8142 
 8143   ins_cost(150);
 8144   format %{ "movw    $mem, $src\t# short/char" %}
 8145   ins_encode %{
 8146     __ movw($mem$$Address, $src$$constant);
 8147   %}
 8148   ins_pipe(ialu_mem_imm);
 8149 %}
 8150 
 8151 // Store Byte Immediate
 8152 instruct storeImmB0(memory mem, immI_0 zero)
 8153 %{
 8154   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8155   match(Set mem (StoreB mem zero));
 8156 
 8157   ins_cost(125); // XXX
 8158   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8159   ins_encode %{
 8160     __ movb($mem$$Address, r12);
 8161   %}
 8162   ins_pipe(ialu_mem_reg);
 8163 %}
 8164 
 8165 instruct storeImmB(memory mem, immI8 src)
 8166 %{
 8167   match(Set mem (StoreB mem src));
 8168 
 8169   ins_cost(150); // XXX
 8170   format %{ "movb    $mem, $src\t# byte" %}
 8171   ins_encode %{
 8172     __ movb($mem$$Address, $src$$constant);
 8173   %}
 8174   ins_pipe(ialu_mem_imm);
 8175 %}
 8176 
 8177 // Store Float
 8178 instruct storeF(memory mem, regF src)
 8179 %{
 8180   match(Set mem (StoreF mem src));
 8181 
 8182   ins_cost(95); // XXX
 8183   format %{ "movss   $mem, $src\t# float" %}
 8184   ins_encode %{
 8185     __ movflt($mem$$Address, $src$$XMMRegister);
 8186   %}
 8187   ins_pipe(pipe_slow); // XXX
 8188 %}
 8189 
 8190 // Store immediate Float value (it is faster than store from XMM register)
 8191 instruct storeF0(memory mem, immF0 zero)
 8192 %{
 8193   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8194   match(Set mem (StoreF mem zero));
 8195 
 8196   ins_cost(25); // XXX
 8197   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8198   ins_encode %{
 8199     __ movl($mem$$Address, r12);
 8200   %}
 8201   ins_pipe(ialu_mem_reg);
 8202 %}
 8203 
 8204 instruct storeF_imm(memory mem, immF src)
 8205 %{
 8206   match(Set mem (StoreF mem src));
 8207 
 8208   ins_cost(50);
 8209   format %{ "movl    $mem, $src\t# float" %}
 8210   ins_encode %{
 8211     __ movl($mem$$Address, jint_cast($src$$constant));
 8212   %}
 8213   ins_pipe(ialu_mem_imm);
 8214 %}
 8215 
 8216 // Store Double
 8217 instruct storeD(memory mem, regD src)
 8218 %{
 8219   match(Set mem (StoreD mem src));
 8220 
 8221   ins_cost(95); // XXX
 8222   format %{ "movsd   $mem, $src\t# double" %}
 8223   ins_encode %{
 8224     __ movdbl($mem$$Address, $src$$XMMRegister);
 8225   %}
 8226   ins_pipe(pipe_slow); // XXX
 8227 %}
 8228 
 8229 // Store immediate double 0.0 (it is faster than store from XMM register)
 8230 instruct storeD0_imm(memory mem, immD0 src)
 8231 %{
 8232   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8233   match(Set mem (StoreD mem src));
 8234 
 8235   ins_cost(50);
 8236   format %{ "movq    $mem, $src\t# double 0." %}
 8237   ins_encode %{
 8238     __ movq($mem$$Address, $src$$constant);
 8239   %}
 8240   ins_pipe(ialu_mem_imm);
 8241 %}
 8242 
 8243 instruct storeD0(memory mem, immD0 zero)
 8244 %{
 8245   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8246   match(Set mem (StoreD mem zero));
 8247 
 8248   ins_cost(25); // XXX
 8249   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8250   ins_encode %{
 8251     __ movq($mem$$Address, r12);
 8252   %}
 8253   ins_pipe(ialu_mem_reg);
 8254 %}
 8255 
 8256 instruct storeSSI(stackSlotI dst, rRegI src)
 8257 %{
 8258   match(Set dst src);
 8259 
 8260   ins_cost(100);
 8261   format %{ "movl    $dst, $src\t# int stk" %}
 8262   ins_encode %{
 8263     __ movl($dst$$Address, $src$$Register);
 8264   %}
 8265   ins_pipe( ialu_mem_reg );
 8266 %}
 8267 
 8268 instruct storeSSL(stackSlotL dst, rRegL src)
 8269 %{
 8270   match(Set dst src);
 8271 
 8272   ins_cost(100);
 8273   format %{ "movq    $dst, $src\t# long stk" %}
 8274   ins_encode %{
 8275     __ movq($dst$$Address, $src$$Register);
 8276   %}
 8277   ins_pipe(ialu_mem_reg);
 8278 %}
 8279 
 8280 instruct storeSSP(stackSlotP dst, rRegP src)
 8281 %{
 8282   match(Set dst src);
 8283 
 8284   ins_cost(100);
 8285   format %{ "movq    $dst, $src\t# ptr stk" %}
 8286   ins_encode %{
 8287     __ movq($dst$$Address, $src$$Register);
 8288   %}
 8289   ins_pipe(ialu_mem_reg);
 8290 %}
 8291 
 8292 instruct storeSSF(stackSlotF dst, regF src)
 8293 %{
 8294   match(Set dst src);
 8295 
 8296   ins_cost(95); // XXX
 8297   format %{ "movss   $dst, $src\t# float stk" %}
 8298   ins_encode %{
 8299     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8300   %}
 8301   ins_pipe(pipe_slow); // XXX
 8302 %}
 8303 
 8304 instruct storeSSD(stackSlotD dst, regD src)
 8305 %{
 8306   match(Set dst src);
 8307 
 8308   ins_cost(95); // XXX
 8309   format %{ "movsd   $dst, $src\t# double stk" %}
 8310   ins_encode %{
 8311     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8312   %}
 8313   ins_pipe(pipe_slow); // XXX
 8314 %}
 8315 
 8316 instruct cacheWB(indirect addr)
 8317 %{
 8318   predicate(VM_Version::supports_data_cache_line_flush());
 8319   match(CacheWB addr);
 8320 
 8321   ins_cost(100);
 8322   format %{"cache wb $addr" %}
 8323   ins_encode %{
 8324     assert($addr->index_position() < 0, "should be");
 8325     assert($addr$$disp == 0, "should be");
 8326     __ cache_wb(Address($addr$$base$$Register, 0));
 8327   %}
 8328   ins_pipe(pipe_slow); // XXX
 8329 %}
 8330 
 8331 instruct cacheWBPreSync()
 8332 %{
 8333   predicate(VM_Version::supports_data_cache_line_flush());
 8334   match(CacheWBPreSync);
 8335 
 8336   ins_cost(100);
 8337   format %{"cache wb presync" %}
 8338   ins_encode %{
 8339     __ cache_wbsync(true);
 8340   %}
 8341   ins_pipe(pipe_slow); // XXX
 8342 %}
 8343 
 8344 instruct cacheWBPostSync()
 8345 %{
 8346   predicate(VM_Version::supports_data_cache_line_flush());
 8347   match(CacheWBPostSync);
 8348 
 8349   ins_cost(100);
 8350   format %{"cache wb postsync" %}
 8351   ins_encode %{
 8352     __ cache_wbsync(false);
 8353   %}
 8354   ins_pipe(pipe_slow); // XXX
 8355 %}
 8356 
 8357 //----------BSWAP Instructions-------------------------------------------------
 8358 instruct bytes_reverse_int(rRegI dst) %{
 8359   match(Set dst (ReverseBytesI dst));
 8360 
 8361   format %{ "bswapl  $dst" %}
 8362   ins_encode %{
 8363     __ bswapl($dst$$Register);
 8364   %}
 8365   ins_pipe( ialu_reg );
 8366 %}
 8367 
 8368 instruct bytes_reverse_long(rRegL dst) %{
 8369   match(Set dst (ReverseBytesL dst));
 8370 
 8371   format %{ "bswapq  $dst" %}
 8372   ins_encode %{
 8373     __ bswapq($dst$$Register);
 8374   %}
 8375   ins_pipe( ialu_reg);
 8376 %}
 8377 
 8378 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8379   match(Set dst (ReverseBytesUS dst));
 8380   effect(KILL cr);
 8381 
 8382   format %{ "bswapl  $dst\n\t"
 8383             "shrl    $dst,16\n\t" %}
 8384   ins_encode %{
 8385     __ bswapl($dst$$Register);
 8386     __ shrl($dst$$Register, 16);
 8387   %}
 8388   ins_pipe( ialu_reg );
 8389 %}
 8390 
 8391 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8392   match(Set dst (ReverseBytesS dst));
 8393   effect(KILL cr);
 8394 
 8395   format %{ "bswapl  $dst\n\t"
 8396             "sar     $dst,16\n\t" %}
 8397   ins_encode %{
 8398     __ bswapl($dst$$Register);
 8399     __ sarl($dst$$Register, 16);
 8400   %}
 8401   ins_pipe( ialu_reg );
 8402 %}
 8403 
 8404 //---------- Zeros Count Instructions ------------------------------------------
 8405 
 8406 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8407   predicate(UseCountLeadingZerosInstruction);
 8408   match(Set dst (CountLeadingZerosI src));
 8409   effect(KILL cr);
 8410 
 8411   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8412   ins_encode %{
 8413     __ lzcntl($dst$$Register, $src$$Register);
 8414   %}
 8415   ins_pipe(ialu_reg);
 8416 %}
 8417 
 8418 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8419   predicate(UseCountLeadingZerosInstruction);
 8420   match(Set dst (CountLeadingZerosI (LoadI src)));
 8421   effect(KILL cr);
 8422   ins_cost(175);
 8423   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8424   ins_encode %{
 8425     __ lzcntl($dst$$Register, $src$$Address);
 8426   %}
 8427   ins_pipe(ialu_reg_mem);
 8428 %}
 8429 
 8430 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8431   predicate(!UseCountLeadingZerosInstruction);
 8432   match(Set dst (CountLeadingZerosI src));
 8433   effect(KILL cr);
 8434 
 8435   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8436             "jnz     skip\n\t"
 8437             "movl    $dst, -1\n"
 8438       "skip:\n\t"
 8439             "negl    $dst\n\t"
 8440             "addl    $dst, 31" %}
 8441   ins_encode %{
 8442     Register Rdst = $dst$$Register;
 8443     Register Rsrc = $src$$Register;
 8444     Label skip;
 8445     __ bsrl(Rdst, Rsrc);
 8446     __ jccb(Assembler::notZero, skip);
 8447     __ movl(Rdst, -1);
 8448     __ bind(skip);
 8449     __ negl(Rdst);
 8450     __ addl(Rdst, BitsPerInt - 1);
 8451   %}
 8452   ins_pipe(ialu_reg);
 8453 %}
 8454 
 8455 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8456   predicate(UseCountLeadingZerosInstruction);
 8457   match(Set dst (CountLeadingZerosL src));
 8458   effect(KILL cr);
 8459 
 8460   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8461   ins_encode %{
 8462     __ lzcntq($dst$$Register, $src$$Register);
 8463   %}
 8464   ins_pipe(ialu_reg);
 8465 %}
 8466 
 8467 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8468   predicate(UseCountLeadingZerosInstruction);
 8469   match(Set dst (CountLeadingZerosL (LoadL src)));
 8470   effect(KILL cr);
 8471   ins_cost(175);
 8472   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8473   ins_encode %{
 8474     __ lzcntq($dst$$Register, $src$$Address);
 8475   %}
 8476   ins_pipe(ialu_reg_mem);
 8477 %}
 8478 
 8479 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8480   predicate(!UseCountLeadingZerosInstruction);
 8481   match(Set dst (CountLeadingZerosL src));
 8482   effect(KILL cr);
 8483 
 8484   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8485             "jnz     skip\n\t"
 8486             "movl    $dst, -1\n"
 8487       "skip:\n\t"
 8488             "negl    $dst\n\t"
 8489             "addl    $dst, 63" %}
 8490   ins_encode %{
 8491     Register Rdst = $dst$$Register;
 8492     Register Rsrc = $src$$Register;
 8493     Label skip;
 8494     __ bsrq(Rdst, Rsrc);
 8495     __ jccb(Assembler::notZero, skip);
 8496     __ movl(Rdst, -1);
 8497     __ bind(skip);
 8498     __ negl(Rdst);
 8499     __ addl(Rdst, BitsPerLong - 1);
 8500   %}
 8501   ins_pipe(ialu_reg);
 8502 %}
 8503 
 8504 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8505   predicate(UseCountTrailingZerosInstruction);
 8506   match(Set dst (CountTrailingZerosI src));
 8507   effect(KILL cr);
 8508 
 8509   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8510   ins_encode %{
 8511     __ tzcntl($dst$$Register, $src$$Register);
 8512   %}
 8513   ins_pipe(ialu_reg);
 8514 %}
 8515 
 8516 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8517   predicate(UseCountTrailingZerosInstruction);
 8518   match(Set dst (CountTrailingZerosI (LoadI src)));
 8519   effect(KILL cr);
 8520   ins_cost(175);
 8521   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8522   ins_encode %{
 8523     __ tzcntl($dst$$Register, $src$$Address);
 8524   %}
 8525   ins_pipe(ialu_reg_mem);
 8526 %}
 8527 
 8528 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8529   predicate(!UseCountTrailingZerosInstruction);
 8530   match(Set dst (CountTrailingZerosI src));
 8531   effect(KILL cr);
 8532 
 8533   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8534             "jnz     done\n\t"
 8535             "movl    $dst, 32\n"
 8536       "done:" %}
 8537   ins_encode %{
 8538     Register Rdst = $dst$$Register;
 8539     Label done;
 8540     __ bsfl(Rdst, $src$$Register);
 8541     __ jccb(Assembler::notZero, done);
 8542     __ movl(Rdst, BitsPerInt);
 8543     __ bind(done);
 8544   %}
 8545   ins_pipe(ialu_reg);
 8546 %}
 8547 
 8548 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8549   predicate(UseCountTrailingZerosInstruction);
 8550   match(Set dst (CountTrailingZerosL src));
 8551   effect(KILL cr);
 8552 
 8553   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8554   ins_encode %{
 8555     __ tzcntq($dst$$Register, $src$$Register);
 8556   %}
 8557   ins_pipe(ialu_reg);
 8558 %}
 8559 
 8560 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8561   predicate(UseCountTrailingZerosInstruction);
 8562   match(Set dst (CountTrailingZerosL (LoadL src)));
 8563   effect(KILL cr);
 8564   ins_cost(175);
 8565   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8566   ins_encode %{
 8567     __ tzcntq($dst$$Register, $src$$Address);
 8568   %}
 8569   ins_pipe(ialu_reg_mem);
 8570 %}
 8571 
 8572 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8573   predicate(!UseCountTrailingZerosInstruction);
 8574   match(Set dst (CountTrailingZerosL src));
 8575   effect(KILL cr);
 8576 
 8577   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8578             "jnz     done\n\t"
 8579             "movl    $dst, 64\n"
 8580       "done:" %}
 8581   ins_encode %{
 8582     Register Rdst = $dst$$Register;
 8583     Label done;
 8584     __ bsfq(Rdst, $src$$Register);
 8585     __ jccb(Assembler::notZero, done);
 8586     __ movl(Rdst, BitsPerLong);
 8587     __ bind(done);
 8588   %}
 8589   ins_pipe(ialu_reg);
 8590 %}
 8591 
 8592 //--------------- Reverse Operation Instructions ----------------
 8593 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8594   predicate(!VM_Version::supports_gfni());
 8595   match(Set dst (ReverseI src));
 8596   effect(TEMP dst, TEMP rtmp, KILL cr);
 8597   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8598   ins_encode %{
 8599     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8600   %}
 8601   ins_pipe( ialu_reg );
 8602 %}
 8603 
 8604 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8605   predicate(VM_Version::supports_gfni());
 8606   match(Set dst (ReverseI src));
 8607   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8608   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8609   ins_encode %{
 8610     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8611   %}
 8612   ins_pipe( ialu_reg );
 8613 %}
 8614 
 8615 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8616   predicate(!VM_Version::supports_gfni());
 8617   match(Set dst (ReverseL src));
 8618   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8619   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8620   ins_encode %{
 8621     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8622   %}
 8623   ins_pipe( ialu_reg );
 8624 %}
 8625 
 8626 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8627   predicate(VM_Version::supports_gfni());
 8628   match(Set dst (ReverseL src));
 8629   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8630   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8631   ins_encode %{
 8632     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8633   %}
 8634   ins_pipe( ialu_reg );
 8635 %}
 8636 
 8637 //---------- Population Count Instructions -------------------------------------
 8638 
 8639 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8640   predicate(UsePopCountInstruction);
 8641   match(Set dst (PopCountI src));
 8642   effect(KILL cr);
 8643 
 8644   format %{ "popcnt  $dst, $src" %}
 8645   ins_encode %{
 8646     __ popcntl($dst$$Register, $src$$Register);
 8647   %}
 8648   ins_pipe(ialu_reg);
 8649 %}
 8650 
 8651 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8652   predicate(UsePopCountInstruction);
 8653   match(Set dst (PopCountI (LoadI mem)));
 8654   effect(KILL cr);
 8655 
 8656   format %{ "popcnt  $dst, $mem" %}
 8657   ins_encode %{
 8658     __ popcntl($dst$$Register, $mem$$Address);
 8659   %}
 8660   ins_pipe(ialu_reg);
 8661 %}
 8662 
 8663 // Note: Long.bitCount(long) returns an int.
 8664 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8665   predicate(UsePopCountInstruction);
 8666   match(Set dst (PopCountL src));
 8667   effect(KILL cr);
 8668 
 8669   format %{ "popcnt  $dst, $src" %}
 8670   ins_encode %{
 8671     __ popcntq($dst$$Register, $src$$Register);
 8672   %}
 8673   ins_pipe(ialu_reg);
 8674 %}
 8675 
 8676 // Note: Long.bitCount(long) returns an int.
 8677 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8678   predicate(UsePopCountInstruction);
 8679   match(Set dst (PopCountL (LoadL mem)));
 8680   effect(KILL cr);
 8681 
 8682   format %{ "popcnt  $dst, $mem" %}
 8683   ins_encode %{
 8684     __ popcntq($dst$$Register, $mem$$Address);
 8685   %}
 8686   ins_pipe(ialu_reg);
 8687 %}
 8688 
 8689 
 8690 //----------MemBar Instructions-----------------------------------------------
 8691 // Memory barrier flavors
 8692 
 8693 instruct membar_acquire()
 8694 %{
 8695   match(MemBarAcquire);
 8696   match(LoadFence);
 8697   ins_cost(0);
 8698 
 8699   size(0);
 8700   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8701   ins_encode();
 8702   ins_pipe(empty);
 8703 %}
 8704 
 8705 instruct membar_acquire_lock()
 8706 %{
 8707   match(MemBarAcquireLock);
 8708   ins_cost(0);
 8709 
 8710   size(0);
 8711   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8712   ins_encode();
 8713   ins_pipe(empty);
 8714 %}
 8715 
 8716 instruct membar_release()
 8717 %{
 8718   match(MemBarRelease);
 8719   match(StoreFence);
 8720   ins_cost(0);
 8721 
 8722   size(0);
 8723   format %{ "MEMBAR-release ! (empty encoding)" %}
 8724   ins_encode();
 8725   ins_pipe(empty);
 8726 %}
 8727 
 8728 instruct membar_release_lock()
 8729 %{
 8730   match(MemBarReleaseLock);
 8731   ins_cost(0);
 8732 
 8733   size(0);
 8734   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8735   ins_encode();
 8736   ins_pipe(empty);
 8737 %}
 8738 
 8739 instruct membar_volatile(rFlagsReg cr) %{
 8740   match(MemBarVolatile);
 8741   effect(KILL cr);
 8742   ins_cost(400);
 8743 
 8744   format %{
 8745     $$template
 8746     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8747   %}
 8748   ins_encode %{
 8749     __ membar(Assembler::StoreLoad);
 8750   %}
 8751   ins_pipe(pipe_slow);
 8752 %}
 8753 
 8754 instruct unnecessary_membar_volatile()
 8755 %{
 8756   match(MemBarVolatile);
 8757   predicate(Matcher::post_store_load_barrier(n));
 8758   ins_cost(0);
 8759 
 8760   size(0);
 8761   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8762   ins_encode();
 8763   ins_pipe(empty);
 8764 %}
 8765 
 8766 instruct membar_storestore() %{
 8767   match(MemBarStoreStore);
 8768   match(StoreStoreFence);
 8769   ins_cost(0);
 8770 
 8771   size(0);
 8772   format %{ "MEMBAR-storestore (empty encoding)" %}
 8773   ins_encode( );
 8774   ins_pipe(empty);
 8775 %}
 8776 
 8777 //----------Move Instructions--------------------------------------------------
 8778 
 8779 instruct castX2P(rRegP dst, rRegL src)
 8780 %{
 8781   match(Set dst (CastX2P src));
 8782 
 8783   format %{ "movq    $dst, $src\t# long->ptr" %}
 8784   ins_encode %{
 8785     if ($dst$$reg != $src$$reg) {
 8786       __ movptr($dst$$Register, $src$$Register);
 8787     }
 8788   %}
 8789   ins_pipe(ialu_reg_reg); // XXX
 8790 %}
 8791 
 8792 instruct castP2X(rRegL dst, rRegP src)
 8793 %{
 8794   match(Set dst (CastP2X src));
 8795 
 8796   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8797   ins_encode %{
 8798     if ($dst$$reg != $src$$reg) {
 8799       __ movptr($dst$$Register, $src$$Register);
 8800     }
 8801   %}
 8802   ins_pipe(ialu_reg_reg); // XXX
 8803 %}
 8804 
 8805 // Convert oop into int for vectors alignment masking
 8806 instruct convP2I(rRegI dst, rRegP src)
 8807 %{
 8808   match(Set dst (ConvL2I (CastP2X src)));
 8809 
 8810   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8811   ins_encode %{
 8812     __ movl($dst$$Register, $src$$Register);
 8813   %}
 8814   ins_pipe(ialu_reg_reg); // XXX
 8815 %}
 8816 
 8817 // Convert compressed oop into int for vectors alignment masking
 8818 // in case of 32bit oops (heap < 4Gb).
 8819 instruct convN2I(rRegI dst, rRegN src)
 8820 %{
 8821   predicate(CompressedOops::shift() == 0);
 8822   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8823 
 8824   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8825   ins_encode %{
 8826     __ movl($dst$$Register, $src$$Register);
 8827   %}
 8828   ins_pipe(ialu_reg_reg); // XXX
 8829 %}
 8830 
 8831 // Convert oop pointer into compressed form
 8832 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8833   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8834   match(Set dst (EncodeP src));
 8835   effect(KILL cr);
 8836   format %{ "encode_heap_oop $dst,$src" %}
 8837   ins_encode %{
 8838     Register s = $src$$Register;
 8839     Register d = $dst$$Register;
 8840     if (s != d) {
 8841       __ movq(d, s);
 8842     }
 8843     __ encode_heap_oop(d);
 8844   %}
 8845   ins_pipe(ialu_reg_long);
 8846 %}
 8847 
 8848 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8849   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8850   match(Set dst (EncodeP src));
 8851   effect(KILL cr);
 8852   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8853   ins_encode %{
 8854     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8855   %}
 8856   ins_pipe(ialu_reg_long);
 8857 %}
 8858 
 8859 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8860   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8861             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8862   match(Set dst (DecodeN src));
 8863   effect(KILL cr);
 8864   format %{ "decode_heap_oop $dst,$src" %}
 8865   ins_encode %{
 8866     Register s = $src$$Register;
 8867     Register d = $dst$$Register;
 8868     if (s != d) {
 8869       __ movq(d, s);
 8870     }
 8871     __ decode_heap_oop(d);
 8872   %}
 8873   ins_pipe(ialu_reg_long);
 8874 %}
 8875 
 8876 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8877   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8878             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8879   match(Set dst (DecodeN src));
 8880   effect(KILL cr);
 8881   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8882   ins_encode %{
 8883     Register s = $src$$Register;
 8884     Register d = $dst$$Register;
 8885     if (s != d) {
 8886       __ decode_heap_oop_not_null(d, s);
 8887     } else {
 8888       __ decode_heap_oop_not_null(d);
 8889     }
 8890   %}
 8891   ins_pipe(ialu_reg_long);
 8892 %}
 8893 
 8894 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8895   match(Set dst (EncodePKlass src));
 8896   effect(TEMP dst, KILL cr);
 8897   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8898   ins_encode %{
 8899     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8900   %}
 8901   ins_pipe(ialu_reg_long);
 8902 %}
 8903 
 8904 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8905   match(Set dst (DecodeNKlass src));
 8906   effect(TEMP dst, KILL cr);
 8907   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8908   ins_encode %{
 8909     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8910   %}
 8911   ins_pipe(ialu_reg_long);
 8912 %}
 8913 
 8914 //----------Conditional Move---------------------------------------------------
 8915 // Jump
 8916 // dummy instruction for generating temp registers
 8917 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8918   match(Jump (LShiftL switch_val shift));
 8919   ins_cost(350);
 8920   predicate(false);
 8921   effect(TEMP dest);
 8922 
 8923   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8924             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8925   ins_encode %{
 8926     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8927     // to do that and the compiler is using that register as one it can allocate.
 8928     // So we build it all by hand.
 8929     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 8930     // ArrayAddress dispatch(table, index);
 8931     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 8932     __ lea($dest$$Register, $constantaddress);
 8933     __ jmp(dispatch);
 8934   %}
 8935   ins_pipe(pipe_jmp);
 8936 %}
 8937 
 8938 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 8939   match(Jump (AddL (LShiftL switch_val shift) offset));
 8940   ins_cost(350);
 8941   effect(TEMP dest);
 8942 
 8943   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8944             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 8945   ins_encode %{
 8946     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8947     // to do that and the compiler is using that register as one it can allocate.
 8948     // So we build it all by hand.
 8949     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8950     // ArrayAddress dispatch(table, index);
 8951     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8952     __ lea($dest$$Register, $constantaddress);
 8953     __ jmp(dispatch);
 8954   %}
 8955   ins_pipe(pipe_jmp);
 8956 %}
 8957 
 8958 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 8959   match(Jump switch_val);
 8960   ins_cost(350);
 8961   effect(TEMP dest);
 8962 
 8963   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8964             "jmp     [$dest + $switch_val]\n\t" %}
 8965   ins_encode %{
 8966     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8967     // to do that and the compiler is using that register as one it can allocate.
 8968     // So we build it all by hand.
 8969     // Address index(noreg, switch_reg, Address::times_1);
 8970     // ArrayAddress dispatch(table, index);
 8971     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 8972     __ lea($dest$$Register, $constantaddress);
 8973     __ jmp(dispatch);
 8974   %}
 8975   ins_pipe(pipe_jmp);
 8976 %}
 8977 
 8978 // Conditional move
 8979 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 8980 %{
 8981   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 8982   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 8983 
 8984   ins_cost(100); // XXX
 8985   format %{ "setbn$cop $dst\t# signed, int" %}
 8986   ins_encode %{
 8987     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 8988     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 8989   %}
 8990   ins_pipe(ialu_reg);
 8991 %}
 8992 
 8993 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 8994 %{
 8995   predicate(!UseAPX);
 8996   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 8997 
 8998   ins_cost(200); // XXX
 8999   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9000   ins_encode %{
 9001     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9002   %}
 9003   ins_pipe(pipe_cmov_reg);
 9004 %}
 9005 
 9006 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9007 %{
 9008   predicate(UseAPX);
 9009   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9010 
 9011   ins_cost(200);
 9012   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9013   ins_encode %{
 9014     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9015   %}
 9016   ins_pipe(pipe_cmov_reg);
 9017 %}
 9018 
 9019 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9020 %{
 9021   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9022   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9023 
 9024   ins_cost(100); // XXX
 9025   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9026   ins_encode %{
 9027     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9028     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9029   %}
 9030   ins_pipe(ialu_reg);
 9031 %}
 9032 
 9033 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9034   predicate(!UseAPX);
 9035   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9036 
 9037   ins_cost(200); // XXX
 9038   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9039   ins_encode %{
 9040     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9041   %}
 9042   ins_pipe(pipe_cmov_reg);
 9043 %}
 9044 
 9045 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9046   predicate(UseAPX);
 9047   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9048 
 9049   ins_cost(200);
 9050   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9051   ins_encode %{
 9052     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9053   %}
 9054   ins_pipe(pipe_cmov_reg);
 9055 %}
 9056 
 9057 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9058 %{
 9059   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9060   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9061 
 9062   ins_cost(100); // XXX
 9063   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9064   ins_encode %{
 9065     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9066     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9067   %}
 9068   ins_pipe(ialu_reg);
 9069 %}
 9070 
 9071 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9072   predicate(!UseAPX);
 9073   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9074   ins_cost(200);
 9075   expand %{
 9076     cmovI_regU(cop, cr, dst, src);
 9077   %}
 9078 %}
 9079 
 9080 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9081   predicate(UseAPX);
 9082   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9083   ins_cost(200);
 9084   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9085   ins_encode %{
 9086     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9087   %}
 9088   ins_pipe(pipe_cmov_reg);
 9089 %}
 9090 
 9091 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9092   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9093   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9094 
 9095   ins_cost(200); // XXX
 9096   format %{ "cmovpl  $dst, $src\n\t"
 9097             "cmovnel $dst, $src" %}
 9098   ins_encode %{
 9099     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9100     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9101   %}
 9102   ins_pipe(pipe_cmov_reg);
 9103 %}
 9104 
 9105 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9106   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9107   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9108   effect(TEMP dst);
 9109 
 9110   ins_cost(200);
 9111   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9112             "cmovnel  $dst, $src2" %}
 9113   ins_encode %{
 9114     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9115     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9116   %}
 9117   ins_pipe(pipe_cmov_reg);
 9118 %}
 9119 
 9120 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9121 // inputs of the CMove
 9122 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9123   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9124   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9125   effect(TEMP dst);
 9126 
 9127   ins_cost(200); // XXX
 9128   format %{ "cmovpl  $dst, $src\n\t"
 9129             "cmovnel $dst, $src" %}
 9130   ins_encode %{
 9131     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9132     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9133   %}
 9134   ins_pipe(pipe_cmov_reg);
 9135 %}
 9136 
 9137 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9138 // and parity flag bit is set if any of the operand is a NaN.
 9139 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9140   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9141   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9142   effect(TEMP dst);
 9143 
 9144   ins_cost(200);
 9145   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9146             "cmovnel  $dst, $src2" %}
 9147   ins_encode %{
 9148     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9149     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9150   %}
 9151   ins_pipe(pipe_cmov_reg);
 9152 %}
 9153 
 9154 // Conditional move
 9155 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9156   predicate(!UseAPX);
 9157   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9158 
 9159   ins_cost(250); // XXX
 9160   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9161   ins_encode %{
 9162     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9163   %}
 9164   ins_pipe(pipe_cmov_mem);
 9165 %}
 9166 
 9167 // Conditional move
 9168 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9169 %{
 9170   predicate(UseAPX);
 9171   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9172 
 9173   ins_cost(250);
 9174   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9175   ins_encode %{
 9176     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9177   %}
 9178   ins_pipe(pipe_cmov_mem);
 9179 %}
 9180 
 9181 // Conditional move
 9182 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9183 %{
 9184   predicate(!UseAPX);
 9185   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9186 
 9187   ins_cost(250); // XXX
 9188   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9189   ins_encode %{
 9190     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9191   %}
 9192   ins_pipe(pipe_cmov_mem);
 9193 %}
 9194 
 9195 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9196   predicate(!UseAPX);
 9197   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9198   ins_cost(250);
 9199   expand %{
 9200     cmovI_memU(cop, cr, dst, src);
 9201   %}
 9202 %}
 9203 
 9204 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9205 %{
 9206   predicate(UseAPX);
 9207   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9208 
 9209   ins_cost(250);
 9210   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9211   ins_encode %{
 9212     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9213   %}
 9214   ins_pipe(pipe_cmov_mem);
 9215 %}
 9216 
 9217 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9218 %{
 9219   predicate(UseAPX);
 9220   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9221   ins_cost(250);
 9222   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9223   ins_encode %{
 9224     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9225   %}
 9226   ins_pipe(pipe_cmov_mem);
 9227 %}
 9228 
 9229 // Conditional move
 9230 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9231 %{
 9232   predicate(!UseAPX);
 9233   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9234 
 9235   ins_cost(200); // XXX
 9236   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9237   ins_encode %{
 9238     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9239   %}
 9240   ins_pipe(pipe_cmov_reg);
 9241 %}
 9242 
 9243 // Conditional move ndd
 9244 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9245 %{
 9246   predicate(UseAPX);
 9247   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9248 
 9249   ins_cost(200);
 9250   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9251   ins_encode %{
 9252     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9253   %}
 9254   ins_pipe(pipe_cmov_reg);
 9255 %}
 9256 
 9257 // Conditional move
 9258 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9259 %{
 9260   predicate(!UseAPX);
 9261   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9262 
 9263   ins_cost(200); // XXX
 9264   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9265   ins_encode %{
 9266     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9267   %}
 9268   ins_pipe(pipe_cmov_reg);
 9269 %}
 9270 
 9271 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9272   predicate(!UseAPX);
 9273   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9274   ins_cost(200);
 9275   expand %{
 9276     cmovN_regU(cop, cr, dst, src);
 9277   %}
 9278 %}
 9279 
 9280 // Conditional move ndd
 9281 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9282 %{
 9283   predicate(UseAPX);
 9284   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9285 
 9286   ins_cost(200);
 9287   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9288   ins_encode %{
 9289     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9290   %}
 9291   ins_pipe(pipe_cmov_reg);
 9292 %}
 9293 
 9294 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9295   predicate(UseAPX);
 9296   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9297   ins_cost(200);
 9298   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9299   ins_encode %{
 9300     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9301   %}
 9302   ins_pipe(pipe_cmov_reg);
 9303 %}
 9304 
 9305 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9306   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9307   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9308 
 9309   ins_cost(200); // XXX
 9310   format %{ "cmovpl  $dst, $src\n\t"
 9311             "cmovnel $dst, $src" %}
 9312   ins_encode %{
 9313     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9314     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9315   %}
 9316   ins_pipe(pipe_cmov_reg);
 9317 %}
 9318 
 9319 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9320 // inputs of the CMove
 9321 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9322   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9323   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9324 
 9325   ins_cost(200); // XXX
 9326   format %{ "cmovpl  $dst, $src\n\t"
 9327             "cmovnel $dst, $src" %}
 9328   ins_encode %{
 9329     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9330     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9331   %}
 9332   ins_pipe(pipe_cmov_reg);
 9333 %}
 9334 
 9335 // Conditional move
 9336 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9337 %{
 9338   predicate(!UseAPX);
 9339   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9340 
 9341   ins_cost(200); // XXX
 9342   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9343   ins_encode %{
 9344     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9345   %}
 9346   ins_pipe(pipe_cmov_reg);  // XXX
 9347 %}
 9348 
 9349 // Conditional move ndd
 9350 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9351 %{
 9352   predicate(UseAPX);
 9353   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9354 
 9355   ins_cost(200);
 9356   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9357   ins_encode %{
 9358     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9359   %}
 9360   ins_pipe(pipe_cmov_reg);
 9361 %}
 9362 
 9363 // Conditional move
 9364 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9365 %{
 9366   predicate(!UseAPX);
 9367   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9368 
 9369   ins_cost(200); // XXX
 9370   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9371   ins_encode %{
 9372     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9373   %}
 9374   ins_pipe(pipe_cmov_reg); // XXX
 9375 %}
 9376 
 9377 // Conditional move ndd
 9378 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9379 %{
 9380   predicate(UseAPX);
 9381   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9382 
 9383   ins_cost(200);
 9384   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9385   ins_encode %{
 9386     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9387   %}
 9388   ins_pipe(pipe_cmov_reg);
 9389 %}
 9390 
 9391 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9392   predicate(!UseAPX);
 9393   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9394   ins_cost(200);
 9395   expand %{
 9396     cmovP_regU(cop, cr, dst, src);
 9397   %}
 9398 %}
 9399 
 9400 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9401   predicate(UseAPX);
 9402   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9403   ins_cost(200);
 9404   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9405   ins_encode %{
 9406     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9407   %}
 9408   ins_pipe(pipe_cmov_reg);
 9409 %}
 9410 
 9411 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9412   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9413   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9414 
 9415   ins_cost(200); // XXX
 9416   format %{ "cmovpq  $dst, $src\n\t"
 9417             "cmovneq $dst, $src" %}
 9418   ins_encode %{
 9419     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9420     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9421   %}
 9422   ins_pipe(pipe_cmov_reg);
 9423 %}
 9424 
 9425 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9426   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9427   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9428   effect(TEMP dst);
 9429 
 9430   ins_cost(200);
 9431   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9432             "cmovneq  $dst, $src2" %}
 9433   ins_encode %{
 9434     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9435     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9436   %}
 9437   ins_pipe(pipe_cmov_reg);
 9438 %}
 9439 
 9440 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9441 // inputs of the CMove
 9442 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9443   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9444   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9445 
 9446   ins_cost(200); // XXX
 9447   format %{ "cmovpq  $dst, $src\n\t"
 9448             "cmovneq $dst, $src" %}
 9449   ins_encode %{
 9450     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9451     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9452   %}
 9453   ins_pipe(pipe_cmov_reg);
 9454 %}
 9455 
 9456 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9457   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9458   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9459   effect(TEMP dst);
 9460 
 9461   ins_cost(200);
 9462   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9463             "cmovneq  $dst, $src2" %}
 9464   ins_encode %{
 9465     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9466     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9467   %}
 9468   ins_pipe(pipe_cmov_reg);
 9469 %}
 9470 
 9471 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9472 %{
 9473   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9474   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9475 
 9476   ins_cost(100); // XXX
 9477   format %{ "setbn$cop $dst\t# signed, long" %}
 9478   ins_encode %{
 9479     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9480     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9481   %}
 9482   ins_pipe(ialu_reg);
 9483 %}
 9484 
 9485 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9486 %{
 9487   predicate(!UseAPX);
 9488   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9489 
 9490   ins_cost(200); // XXX
 9491   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9492   ins_encode %{
 9493     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9494   %}
 9495   ins_pipe(pipe_cmov_reg);  // XXX
 9496 %}
 9497 
 9498 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9499 %{
 9500   predicate(UseAPX);
 9501   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9502 
 9503   ins_cost(200);
 9504   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9505   ins_encode %{
 9506     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9507   %}
 9508   ins_pipe(pipe_cmov_reg);
 9509 %}
 9510 
 9511 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9512 %{
 9513   predicate(!UseAPX);
 9514   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9515 
 9516   ins_cost(200); // XXX
 9517   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9518   ins_encode %{
 9519     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9520   %}
 9521   ins_pipe(pipe_cmov_mem);  // XXX
 9522 %}
 9523 
 9524 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9525 %{
 9526   predicate(UseAPX);
 9527   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9528 
 9529   ins_cost(200);
 9530   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9531   ins_encode %{
 9532     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9533   %}
 9534   ins_pipe(pipe_cmov_mem);
 9535 %}
 9536 
 9537 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9538 %{
 9539   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9540   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9541 
 9542   ins_cost(100); // XXX
 9543   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9544   ins_encode %{
 9545     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9546     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9547   %}
 9548   ins_pipe(ialu_reg);
 9549 %}
 9550 
 9551 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9552 %{
 9553   predicate(!UseAPX);
 9554   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9555 
 9556   ins_cost(200); // XXX
 9557   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9558   ins_encode %{
 9559     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9560   %}
 9561   ins_pipe(pipe_cmov_reg); // XXX
 9562 %}
 9563 
 9564 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9565 %{
 9566   predicate(UseAPX);
 9567   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9568 
 9569   ins_cost(200);
 9570   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9571   ins_encode %{
 9572     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9573   %}
 9574   ins_pipe(pipe_cmov_reg);
 9575 %}
 9576 
 9577 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9578 %{
 9579   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9580   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9581 
 9582   ins_cost(100); // XXX
 9583   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9584   ins_encode %{
 9585     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9586     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9587   %}
 9588   ins_pipe(ialu_reg);
 9589 %}
 9590 
 9591 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9592   predicate(!UseAPX);
 9593   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9594   ins_cost(200);
 9595   expand %{
 9596     cmovL_regU(cop, cr, dst, src);
 9597   %}
 9598 %}
 9599 
 9600 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9601 %{
 9602   predicate(UseAPX);
 9603   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9604   ins_cost(200);
 9605   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9606   ins_encode %{
 9607     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9608   %}
 9609   ins_pipe(pipe_cmov_reg);
 9610 %}
 9611 
 9612 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9613   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9614   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9615 
 9616   ins_cost(200); // XXX
 9617   format %{ "cmovpq  $dst, $src\n\t"
 9618             "cmovneq $dst, $src" %}
 9619   ins_encode %{
 9620     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9621     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9622   %}
 9623   ins_pipe(pipe_cmov_reg);
 9624 %}
 9625 
 9626 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9627   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9628   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9629   effect(TEMP dst);
 9630 
 9631   ins_cost(200);
 9632   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9633             "cmovneq  $dst, $src2" %}
 9634   ins_encode %{
 9635     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9636     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9637   %}
 9638   ins_pipe(pipe_cmov_reg);
 9639 %}
 9640 
 9641 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9642 // inputs of the CMove
 9643 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9644   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9645   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9646 
 9647   ins_cost(200); // XXX
 9648   format %{ "cmovpq  $dst, $src\n\t"
 9649             "cmovneq $dst, $src" %}
 9650   ins_encode %{
 9651     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9652     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9653   %}
 9654   ins_pipe(pipe_cmov_reg);
 9655 %}
 9656 
 9657 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9658   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9659   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9660   effect(TEMP dst);
 9661 
 9662   ins_cost(200);
 9663   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9664             "cmovneq $dst, $src2" %}
 9665   ins_encode %{
 9666     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9667     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9668   %}
 9669   ins_pipe(pipe_cmov_reg);
 9670 %}
 9671 
 9672 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9673 %{
 9674   predicate(!UseAPX);
 9675   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9676 
 9677   ins_cost(200); // XXX
 9678   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9679   ins_encode %{
 9680     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9681   %}
 9682   ins_pipe(pipe_cmov_mem); // XXX
 9683 %}
 9684 
 9685 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9686   predicate(!UseAPX);
 9687   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9688   ins_cost(200);
 9689   expand %{
 9690     cmovL_memU(cop, cr, dst, src);
 9691   %}
 9692 %}
 9693 
 9694 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9695 %{
 9696   predicate(UseAPX);
 9697   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9698 
 9699   ins_cost(200);
 9700   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9701   ins_encode %{
 9702     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9703   %}
 9704   ins_pipe(pipe_cmov_mem);
 9705 %}
 9706 
 9707 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9708 %{
 9709   predicate(UseAPX);
 9710   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9711   ins_cost(200);
 9712   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9713   ins_encode %{
 9714     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9715   %}
 9716   ins_pipe(pipe_cmov_mem);
 9717 %}
 9718 
 9719 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9720 %{
 9721   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9722 
 9723   ins_cost(200); // XXX
 9724   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9725             "movss     $dst, $src\n"
 9726     "skip:" %}
 9727   ins_encode %{
 9728     Label Lskip;
 9729     // Invert sense of branch from sense of CMOV
 9730     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9731     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9732     __ bind(Lskip);
 9733   %}
 9734   ins_pipe(pipe_slow);
 9735 %}
 9736 
 9737 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9738 %{
 9739   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9740 
 9741   ins_cost(200); // XXX
 9742   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9743             "movss     $dst, $src\n"
 9744     "skip:" %}
 9745   ins_encode %{
 9746     Label Lskip;
 9747     // Invert sense of branch from sense of CMOV
 9748     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9749     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9750     __ bind(Lskip);
 9751   %}
 9752   ins_pipe(pipe_slow);
 9753 %}
 9754 
 9755 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9756   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9757   ins_cost(200);
 9758   expand %{
 9759     cmovF_regU(cop, cr, dst, src);
 9760   %}
 9761 %}
 9762 
 9763 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9764 %{
 9765   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9766 
 9767   ins_cost(200); // XXX
 9768   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9769             "movsd     $dst, $src\n"
 9770     "skip:" %}
 9771   ins_encode %{
 9772     Label Lskip;
 9773     // Invert sense of branch from sense of CMOV
 9774     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9775     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9776     __ bind(Lskip);
 9777   %}
 9778   ins_pipe(pipe_slow);
 9779 %}
 9780 
 9781 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9782 %{
 9783   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9784 
 9785   ins_cost(200); // XXX
 9786   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9787             "movsd     $dst, $src\n"
 9788     "skip:" %}
 9789   ins_encode %{
 9790     Label Lskip;
 9791     // Invert sense of branch from sense of CMOV
 9792     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9793     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9794     __ bind(Lskip);
 9795   %}
 9796   ins_pipe(pipe_slow);
 9797 %}
 9798 
 9799 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9800   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9801   ins_cost(200);
 9802   expand %{
 9803     cmovD_regU(cop, cr, dst, src);
 9804   %}
 9805 %}
 9806 
 9807 //----------Arithmetic Instructions--------------------------------------------
 9808 //----------Addition Instructions----------------------------------------------
 9809 
 9810 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9811 %{
 9812   predicate(!UseAPX);
 9813   match(Set dst (AddI dst src));
 9814   effect(KILL cr);
 9815   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9816   format %{ "addl    $dst, $src\t# int" %}
 9817   ins_encode %{
 9818     __ addl($dst$$Register, $src$$Register);
 9819   %}
 9820   ins_pipe(ialu_reg_reg);
 9821 %}
 9822 
 9823 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9824 %{
 9825   predicate(UseAPX);
 9826   match(Set dst (AddI src1 src2));
 9827   effect(KILL cr);
 9828   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9829 
 9830   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9831   ins_encode %{
 9832     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9833   %}
 9834   ins_pipe(ialu_reg_reg);
 9835 %}
 9836 
 9837 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9838 %{
 9839   predicate(!UseAPX);
 9840   match(Set dst (AddI dst src));
 9841   effect(KILL cr);
 9842   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9843 
 9844   format %{ "addl    $dst, $src\t# int" %}
 9845   ins_encode %{
 9846     __ addl($dst$$Register, $src$$constant);
 9847   %}
 9848   ins_pipe( ialu_reg );
 9849 %}
 9850 
 9851 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9852 %{
 9853   predicate(UseAPX);
 9854   match(Set dst (AddI src1 src2));
 9855   effect(KILL cr);
 9856   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9857 
 9858   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9859   ins_encode %{
 9860     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9861   %}
 9862   ins_pipe( ialu_reg );
 9863 %}
 9864 
 9865 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9866 %{
 9867   predicate(UseAPX);
 9868   match(Set dst (AddI (LoadI src1) src2));
 9869   effect(KILL cr);
 9870   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9871 
 9872   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9873   ins_encode %{
 9874     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9875   %}
 9876   ins_pipe( ialu_reg );
 9877 %}
 9878 
 9879 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9880 %{
 9881   predicate(!UseAPX);
 9882   match(Set dst (AddI dst (LoadI src)));
 9883   effect(KILL cr);
 9884   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9885 
 9886   ins_cost(150); // XXX
 9887   format %{ "addl    $dst, $src\t# int" %}
 9888   ins_encode %{
 9889     __ addl($dst$$Register, $src$$Address);
 9890   %}
 9891   ins_pipe(ialu_reg_mem);
 9892 %}
 9893 
 9894 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9895 %{
 9896   predicate(UseAPX);
 9897   match(Set dst (AddI src1 (LoadI src2)));
 9898   effect(KILL cr);
 9899   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9900 
 9901   ins_cost(150);
 9902   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9903   ins_encode %{
 9904     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9905   %}
 9906   ins_pipe(ialu_reg_mem);
 9907 %}
 9908 
 9909 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9910 %{
 9911   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9912   effect(KILL cr);
 9913   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9914 
 9915   ins_cost(150); // XXX
 9916   format %{ "addl    $dst, $src\t# int" %}
 9917   ins_encode %{
 9918     __ addl($dst$$Address, $src$$Register);
 9919   %}
 9920   ins_pipe(ialu_mem_reg);
 9921 %}
 9922 
 9923 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9924 %{
 9925   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9926   effect(KILL cr);
 9927   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9928 
 9929 
 9930   ins_cost(125); // XXX
 9931   format %{ "addl    $dst, $src\t# int" %}
 9932   ins_encode %{
 9933     __ addl($dst$$Address, $src$$constant);
 9934   %}
 9935   ins_pipe(ialu_mem_imm);
 9936 %}
 9937 
 9938 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9939 %{
 9940   predicate(!UseAPX && UseIncDec);
 9941   match(Set dst (AddI dst src));
 9942   effect(KILL cr);
 9943 
 9944   format %{ "incl    $dst\t# int" %}
 9945   ins_encode %{
 9946     __ incrementl($dst$$Register);
 9947   %}
 9948   ins_pipe(ialu_reg);
 9949 %}
 9950 
 9951 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9952 %{
 9953   predicate(UseAPX && UseIncDec);
 9954   match(Set dst (AddI src val));
 9955   effect(KILL cr);
 9956 
 9957   format %{ "eincl    $dst, $src\t# int ndd" %}
 9958   ins_encode %{
 9959     __ eincl($dst$$Register, $src$$Register, false);
 9960   %}
 9961   ins_pipe(ialu_reg);
 9962 %}
 9963 
 9964 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
 9965 %{
 9966   predicate(UseAPX && UseIncDec);
 9967   match(Set dst (AddI (LoadI src) val));
 9968   effect(KILL cr);
 9969 
 9970   format %{ "eincl    $dst, $src\t# int ndd" %}
 9971   ins_encode %{
 9972     __ eincl($dst$$Register, $src$$Address, false);
 9973   %}
 9974   ins_pipe(ialu_reg);
 9975 %}
 9976 
 9977 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 9978 %{
 9979   predicate(UseIncDec);
 9980   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9981   effect(KILL cr);
 9982 
 9983   ins_cost(125); // XXX
 9984   format %{ "incl    $dst\t# int" %}
 9985   ins_encode %{
 9986     __ incrementl($dst$$Address);
 9987   %}
 9988   ins_pipe(ialu_mem_imm);
 9989 %}
 9990 
 9991 // XXX why does that use AddI
 9992 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 9993 %{
 9994   predicate(!UseAPX && UseIncDec);
 9995   match(Set dst (AddI dst src));
 9996   effect(KILL cr);
 9997 
 9998   format %{ "decl    $dst\t# int" %}
 9999   ins_encode %{
10000     __ decrementl($dst$$Register);
10001   %}
10002   ins_pipe(ialu_reg);
10003 %}
10004 
10005 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10006 %{
10007   predicate(UseAPX && UseIncDec);
10008   match(Set dst (AddI src val));
10009   effect(KILL cr);
10010 
10011   format %{ "edecl    $dst, $src\t# int ndd" %}
10012   ins_encode %{
10013     __ edecl($dst$$Register, $src$$Register, false);
10014   %}
10015   ins_pipe(ialu_reg);
10016 %}
10017 
10018 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10019 %{
10020   predicate(UseAPX && UseIncDec);
10021   match(Set dst (AddI (LoadI src) val));
10022   effect(KILL cr);
10023 
10024   format %{ "edecl    $dst, $src\t# int ndd" %}
10025   ins_encode %{
10026     __ edecl($dst$$Register, $src$$Address, false);
10027   %}
10028   ins_pipe(ialu_reg);
10029 %}
10030 
10031 // XXX why does that use AddI
10032 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10033 %{
10034   predicate(UseIncDec);
10035   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10036   effect(KILL cr);
10037 
10038   ins_cost(125); // XXX
10039   format %{ "decl    $dst\t# int" %}
10040   ins_encode %{
10041     __ decrementl($dst$$Address);
10042   %}
10043   ins_pipe(ialu_mem_imm);
10044 %}
10045 
10046 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10047 %{
10048   predicate(VM_Version::supports_fast_2op_lea());
10049   match(Set dst (AddI (LShiftI index scale) disp));
10050 
10051   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10052   ins_encode %{
10053     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10054     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10055   %}
10056   ins_pipe(ialu_reg_reg);
10057 %}
10058 
10059 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10060 %{
10061   predicate(VM_Version::supports_fast_3op_lea());
10062   match(Set dst (AddI (AddI base index) disp));
10063 
10064   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10065   ins_encode %{
10066     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10067   %}
10068   ins_pipe(ialu_reg_reg);
10069 %}
10070 
10071 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10072 %{
10073   predicate(VM_Version::supports_fast_2op_lea());
10074   match(Set dst (AddI base (LShiftI index scale)));
10075 
10076   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10077   ins_encode %{
10078     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10079     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10080   %}
10081   ins_pipe(ialu_reg_reg);
10082 %}
10083 
10084 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10085 %{
10086   predicate(VM_Version::supports_fast_3op_lea());
10087   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10088 
10089   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10090   ins_encode %{
10091     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10092     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10093   %}
10094   ins_pipe(ialu_reg_reg);
10095 %}
10096 
10097 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10098 %{
10099   predicate(!UseAPX);
10100   match(Set dst (AddL dst src));
10101   effect(KILL cr);
10102   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10103 
10104   format %{ "addq    $dst, $src\t# long" %}
10105   ins_encode %{
10106     __ addq($dst$$Register, $src$$Register);
10107   %}
10108   ins_pipe(ialu_reg_reg);
10109 %}
10110 
10111 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10112 %{
10113   predicate(UseAPX);
10114   match(Set dst (AddL src1 src2));
10115   effect(KILL cr);
10116   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10117 
10118   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10119   ins_encode %{
10120     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10121   %}
10122   ins_pipe(ialu_reg_reg);
10123 %}
10124 
10125 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10126 %{
10127   predicate(!UseAPX);
10128   match(Set dst (AddL dst src));
10129   effect(KILL cr);
10130   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10131 
10132   format %{ "addq    $dst, $src\t# long" %}
10133   ins_encode %{
10134     __ addq($dst$$Register, $src$$constant);
10135   %}
10136   ins_pipe( ialu_reg );
10137 %}
10138 
10139 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10140 %{
10141   predicate(UseAPX);
10142   match(Set dst (AddL src1 src2));
10143   effect(KILL cr);
10144   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10145 
10146   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10147   ins_encode %{
10148     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10149   %}
10150   ins_pipe( ialu_reg );
10151 %}
10152 
10153 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10154 %{
10155   predicate(UseAPX);
10156   match(Set dst (AddL (LoadL src1) src2));
10157   effect(KILL cr);
10158   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10159 
10160   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10161   ins_encode %{
10162     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10163   %}
10164   ins_pipe( ialu_reg );
10165 %}
10166 
10167 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10168 %{
10169   predicate(!UseAPX);
10170   match(Set dst (AddL dst (LoadL src)));
10171   effect(KILL cr);
10172   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10173 
10174   ins_cost(150); // XXX
10175   format %{ "addq    $dst, $src\t# long" %}
10176   ins_encode %{
10177     __ addq($dst$$Register, $src$$Address);
10178   %}
10179   ins_pipe(ialu_reg_mem);
10180 %}
10181 
10182 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10183 %{
10184   predicate(UseAPX);
10185   match(Set dst (AddL src1 (LoadL src2)));
10186   effect(KILL cr);
10187   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10188 
10189   ins_cost(150);
10190   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10191   ins_encode %{
10192     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10193   %}
10194   ins_pipe(ialu_reg_mem);
10195 %}
10196 
10197 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10198 %{
10199   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10200   effect(KILL cr);
10201   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10202 
10203   ins_cost(150); // XXX
10204   format %{ "addq    $dst, $src\t# long" %}
10205   ins_encode %{
10206     __ addq($dst$$Address, $src$$Register);
10207   %}
10208   ins_pipe(ialu_mem_reg);
10209 %}
10210 
10211 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10212 %{
10213   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10214   effect(KILL cr);
10215   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10216 
10217   ins_cost(125); // XXX
10218   format %{ "addq    $dst, $src\t# long" %}
10219   ins_encode %{
10220     __ addq($dst$$Address, $src$$constant);
10221   %}
10222   ins_pipe(ialu_mem_imm);
10223 %}
10224 
10225 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10226 %{
10227   predicate(!UseAPX && UseIncDec);
10228   match(Set dst (AddL dst src));
10229   effect(KILL cr);
10230 
10231   format %{ "incq    $dst\t# long" %}
10232   ins_encode %{
10233     __ incrementq($dst$$Register);
10234   %}
10235   ins_pipe(ialu_reg);
10236 %}
10237 
10238 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10239 %{
10240   predicate(UseAPX && UseIncDec);
10241   match(Set dst (AddL src val));
10242   effect(KILL cr);
10243 
10244   format %{ "eincq    $dst, $src\t# long ndd" %}
10245   ins_encode %{
10246     __ eincq($dst$$Register, $src$$Register, false);
10247   %}
10248   ins_pipe(ialu_reg);
10249 %}
10250 
10251 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10252 %{
10253   predicate(UseAPX && UseIncDec);
10254   match(Set dst (AddL (LoadL src) val));
10255   effect(KILL cr);
10256 
10257   format %{ "eincq    $dst, $src\t# long ndd" %}
10258   ins_encode %{
10259     __ eincq($dst$$Register, $src$$Address, false);
10260   %}
10261   ins_pipe(ialu_reg);
10262 %}
10263 
10264 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10265 %{
10266   predicate(UseIncDec);
10267   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10268   effect(KILL cr);
10269 
10270   ins_cost(125); // XXX
10271   format %{ "incq    $dst\t# long" %}
10272   ins_encode %{
10273     __ incrementq($dst$$Address);
10274   %}
10275   ins_pipe(ialu_mem_imm);
10276 %}
10277 
10278 // XXX why does that use AddL
10279 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10280 %{
10281   predicate(!UseAPX && UseIncDec);
10282   match(Set dst (AddL dst src));
10283   effect(KILL cr);
10284 
10285   format %{ "decq    $dst\t# long" %}
10286   ins_encode %{
10287     __ decrementq($dst$$Register);
10288   %}
10289   ins_pipe(ialu_reg);
10290 %}
10291 
10292 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10293 %{
10294   predicate(UseAPX && UseIncDec);
10295   match(Set dst (AddL src val));
10296   effect(KILL cr);
10297 
10298   format %{ "edecq    $dst, $src\t# long ndd" %}
10299   ins_encode %{
10300     __ edecq($dst$$Register, $src$$Register, false);
10301   %}
10302   ins_pipe(ialu_reg);
10303 %}
10304 
10305 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10306 %{
10307   predicate(UseAPX && UseIncDec);
10308   match(Set dst (AddL (LoadL src) val));
10309   effect(KILL cr);
10310 
10311   format %{ "edecq    $dst, $src\t# long ndd" %}
10312   ins_encode %{
10313     __ edecq($dst$$Register, $src$$Address, false);
10314   %}
10315   ins_pipe(ialu_reg);
10316 %}
10317 
10318 // XXX why does that use AddL
10319 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10320 %{
10321   predicate(UseIncDec);
10322   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10323   effect(KILL cr);
10324 
10325   ins_cost(125); // XXX
10326   format %{ "decq    $dst\t# long" %}
10327   ins_encode %{
10328     __ decrementq($dst$$Address);
10329   %}
10330   ins_pipe(ialu_mem_imm);
10331 %}
10332 
10333 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10334 %{
10335   predicate(VM_Version::supports_fast_2op_lea());
10336   match(Set dst (AddL (LShiftL index scale) disp));
10337 
10338   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10339   ins_encode %{
10340     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10341     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10342   %}
10343   ins_pipe(ialu_reg_reg);
10344 %}
10345 
10346 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10347 %{
10348   predicate(VM_Version::supports_fast_3op_lea());
10349   match(Set dst (AddL (AddL base index) disp));
10350 
10351   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10352   ins_encode %{
10353     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10354   %}
10355   ins_pipe(ialu_reg_reg);
10356 %}
10357 
10358 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10359 %{
10360   predicate(VM_Version::supports_fast_2op_lea());
10361   match(Set dst (AddL base (LShiftL index scale)));
10362 
10363   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10364   ins_encode %{
10365     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10366     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10367   %}
10368   ins_pipe(ialu_reg_reg);
10369 %}
10370 
10371 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10372 %{
10373   predicate(VM_Version::supports_fast_3op_lea());
10374   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10375 
10376   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10377   ins_encode %{
10378     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10379     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10380   %}
10381   ins_pipe(ialu_reg_reg);
10382 %}
10383 
10384 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10385 %{
10386   match(Set dst (AddP dst src));
10387   effect(KILL cr);
10388   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10389 
10390   format %{ "addq    $dst, $src\t# ptr" %}
10391   ins_encode %{
10392     __ addq($dst$$Register, $src$$Register);
10393   %}
10394   ins_pipe(ialu_reg_reg);
10395 %}
10396 
10397 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10398 %{
10399   match(Set dst (AddP dst src));
10400   effect(KILL cr);
10401   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10402 
10403   format %{ "addq    $dst, $src\t# ptr" %}
10404   ins_encode %{
10405     __ addq($dst$$Register, $src$$constant);
10406   %}
10407   ins_pipe( ialu_reg );
10408 %}
10409 
10410 // XXX addP mem ops ????
10411 
10412 instruct checkCastPP(rRegP dst)
10413 %{
10414   match(Set dst (CheckCastPP dst));
10415 
10416   size(0);
10417   format %{ "# checkcastPP of $dst" %}
10418   ins_encode(/* empty encoding */);
10419   ins_pipe(empty);
10420 %}
10421 
10422 instruct castPP(rRegP dst)
10423 %{
10424   match(Set dst (CastPP dst));
10425 
10426   size(0);
10427   format %{ "# castPP of $dst" %}
10428   ins_encode(/* empty encoding */);
10429   ins_pipe(empty);
10430 %}
10431 
10432 instruct castII(rRegI dst)
10433 %{
10434   predicate(VerifyConstraintCasts == 0);
10435   match(Set dst (CastII dst));
10436 
10437   size(0);
10438   format %{ "# castII of $dst" %}
10439   ins_encode(/* empty encoding */);
10440   ins_cost(0);
10441   ins_pipe(empty);
10442 %}
10443 
10444 instruct castII_checked(rRegI dst, rFlagsReg cr)
10445 %{
10446   predicate(VerifyConstraintCasts > 0);
10447   match(Set dst (CastII dst));
10448 
10449   effect(KILL cr);
10450   format %{ "# cast_checked_II $dst" %}
10451   ins_encode %{
10452     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10453   %}
10454   ins_pipe(pipe_slow);
10455 %}
10456 
10457 instruct castLL(rRegL dst)
10458 %{
10459   predicate(VerifyConstraintCasts == 0);
10460   match(Set dst (CastLL dst));
10461 
10462   size(0);
10463   format %{ "# castLL of $dst" %}
10464   ins_encode(/* empty encoding */);
10465   ins_cost(0);
10466   ins_pipe(empty);
10467 %}
10468 
10469 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10470 %{
10471   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10472   match(Set dst (CastLL dst));
10473 
10474   effect(KILL cr);
10475   format %{ "# cast_checked_LL $dst" %}
10476   ins_encode %{
10477     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10478   %}
10479   ins_pipe(pipe_slow);
10480 %}
10481 
10482 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10483 %{
10484   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10485   match(Set dst (CastLL dst));
10486 
10487   effect(KILL cr, TEMP tmp);
10488   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10489   ins_encode %{
10490     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10491   %}
10492   ins_pipe(pipe_slow);
10493 %}
10494 
10495 instruct castFF(regF dst)
10496 %{
10497   match(Set dst (CastFF dst));
10498 
10499   size(0);
10500   format %{ "# castFF of $dst" %}
10501   ins_encode(/* empty encoding */);
10502   ins_cost(0);
10503   ins_pipe(empty);
10504 %}
10505 
10506 instruct castHH(regF dst)
10507 %{
10508   match(Set dst (CastHH dst));
10509 
10510   size(0);
10511   format %{ "# castHH of $dst" %}
10512   ins_encode(/* empty encoding */);
10513   ins_cost(0);
10514   ins_pipe(empty);
10515 %}
10516 
10517 instruct castDD(regD dst)
10518 %{
10519   match(Set dst (CastDD dst));
10520 
10521   size(0);
10522   format %{ "# castDD of $dst" %}
10523   ins_encode(/* empty encoding */);
10524   ins_cost(0);
10525   ins_pipe(empty);
10526 %}
10527 
10528 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10529 instruct compareAndSwapP(rRegI res,
10530                          memory mem_ptr,
10531                          rax_RegP oldval, rRegP newval,
10532                          rFlagsReg cr)
10533 %{
10534   predicate(n->as_LoadStore()->barrier_data() == 0);
10535   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10536   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10537   effect(KILL cr, KILL oldval);
10538 
10539   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10540             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10541             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10542   ins_encode %{
10543     __ lock();
10544     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10545     __ setcc(Assembler::equal, $res$$Register);
10546   %}
10547   ins_pipe( pipe_cmpxchg );
10548 %}
10549 
10550 instruct compareAndSwapL(rRegI res,
10551                          memory mem_ptr,
10552                          rax_RegL oldval, rRegL newval,
10553                          rFlagsReg cr)
10554 %{
10555   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10556   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10557   effect(KILL cr, KILL oldval);
10558 
10559   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10560             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10561             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10562   ins_encode %{
10563     __ lock();
10564     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10565     __ setcc(Assembler::equal, $res$$Register);
10566   %}
10567   ins_pipe( pipe_cmpxchg );
10568 %}
10569 
10570 instruct compareAndSwapI(rRegI res,
10571                          memory mem_ptr,
10572                          rax_RegI oldval, rRegI newval,
10573                          rFlagsReg cr)
10574 %{
10575   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10576   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10577   effect(KILL cr, KILL oldval);
10578 
10579   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10580             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10581             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10582   ins_encode %{
10583     __ lock();
10584     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10585     __ setcc(Assembler::equal, $res$$Register);
10586   %}
10587   ins_pipe( pipe_cmpxchg );
10588 %}
10589 
10590 instruct compareAndSwapB(rRegI res,
10591                          memory mem_ptr,
10592                          rax_RegI oldval, rRegI newval,
10593                          rFlagsReg cr)
10594 %{
10595   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10596   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10597   effect(KILL cr, KILL oldval);
10598 
10599   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10600             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10601             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10602   ins_encode %{
10603     __ lock();
10604     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10605     __ setcc(Assembler::equal, $res$$Register);
10606   %}
10607   ins_pipe( pipe_cmpxchg );
10608 %}
10609 
10610 instruct compareAndSwapS(rRegI res,
10611                          memory mem_ptr,
10612                          rax_RegI oldval, rRegI newval,
10613                          rFlagsReg cr)
10614 %{
10615   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10616   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10617   effect(KILL cr, KILL oldval);
10618 
10619   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10620             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10621             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10622   ins_encode %{
10623     __ lock();
10624     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10625     __ setcc(Assembler::equal, $res$$Register);
10626   %}
10627   ins_pipe( pipe_cmpxchg );
10628 %}
10629 
10630 instruct compareAndSwapN(rRegI res,
10631                           memory mem_ptr,
10632                           rax_RegN oldval, rRegN newval,
10633                           rFlagsReg cr) %{
10634   predicate(n->as_LoadStore()->barrier_data() == 0);
10635   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10636   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10637   effect(KILL cr, KILL oldval);
10638 
10639   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10640             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10641             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10642   ins_encode %{
10643     __ lock();
10644     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10645     __ setcc(Assembler::equal, $res$$Register);
10646   %}
10647   ins_pipe( pipe_cmpxchg );
10648 %}
10649 
10650 instruct compareAndExchangeB(
10651                          memory mem_ptr,
10652                          rax_RegI oldval, rRegI newval,
10653                          rFlagsReg cr)
10654 %{
10655   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10656   effect(KILL cr);
10657 
10658   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10659             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10660   ins_encode %{
10661     __ lock();
10662     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10663   %}
10664   ins_pipe( pipe_cmpxchg );
10665 %}
10666 
10667 instruct compareAndExchangeS(
10668                          memory mem_ptr,
10669                          rax_RegI oldval, rRegI newval,
10670                          rFlagsReg cr)
10671 %{
10672   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10673   effect(KILL cr);
10674 
10675   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10676             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10677   ins_encode %{
10678     __ lock();
10679     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10680   %}
10681   ins_pipe( pipe_cmpxchg );
10682 %}
10683 
10684 instruct compareAndExchangeI(
10685                          memory mem_ptr,
10686                          rax_RegI oldval, rRegI newval,
10687                          rFlagsReg cr)
10688 %{
10689   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10690   effect(KILL cr);
10691 
10692   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10693             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10694   ins_encode %{
10695     __ lock();
10696     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697   %}
10698   ins_pipe( pipe_cmpxchg );
10699 %}
10700 
10701 instruct compareAndExchangeL(
10702                          memory mem_ptr,
10703                          rax_RegL oldval, rRegL newval,
10704                          rFlagsReg cr)
10705 %{
10706   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10707   effect(KILL cr);
10708 
10709   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10710             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10711   ins_encode %{
10712     __ lock();
10713     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10714   %}
10715   ins_pipe( pipe_cmpxchg );
10716 %}
10717 
10718 instruct compareAndExchangeN(
10719                           memory mem_ptr,
10720                           rax_RegN oldval, rRegN newval,
10721                           rFlagsReg cr) %{
10722   predicate(n->as_LoadStore()->barrier_data() == 0);
10723   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10724   effect(KILL cr);
10725 
10726   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10727             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10728   ins_encode %{
10729     __ lock();
10730     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10731   %}
10732   ins_pipe( pipe_cmpxchg );
10733 %}
10734 
10735 instruct compareAndExchangeP(
10736                          memory mem_ptr,
10737                          rax_RegP oldval, rRegP newval,
10738                          rFlagsReg cr)
10739 %{
10740   predicate(n->as_LoadStore()->barrier_data() == 0);
10741   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10742   effect(KILL cr);
10743 
10744   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10745             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10746   ins_encode %{
10747     __ lock();
10748     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10749   %}
10750   ins_pipe( pipe_cmpxchg );
10751 %}
10752 
10753 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10754   predicate(n->as_LoadStore()->result_not_used());
10755   match(Set dummy (GetAndAddB mem add));
10756   effect(KILL cr);
10757   format %{ "addb_lock   $mem, $add" %}
10758   ins_encode %{
10759     __ lock();
10760     __ addb($mem$$Address, $add$$Register);
10761   %}
10762   ins_pipe(pipe_cmpxchg);
10763 %}
10764 
10765 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10766   predicate(n->as_LoadStore()->result_not_used());
10767   match(Set dummy (GetAndAddB mem add));
10768   effect(KILL cr);
10769   format %{ "addb_lock   $mem, $add" %}
10770   ins_encode %{
10771     __ lock();
10772     __ addb($mem$$Address, $add$$constant);
10773   %}
10774   ins_pipe(pipe_cmpxchg);
10775 %}
10776 
10777 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10778   predicate(!n->as_LoadStore()->result_not_used());
10779   match(Set newval (GetAndAddB mem newval));
10780   effect(KILL cr);
10781   format %{ "xaddb_lock  $mem, $newval" %}
10782   ins_encode %{
10783     __ lock();
10784     __ xaddb($mem$$Address, $newval$$Register);
10785   %}
10786   ins_pipe(pipe_cmpxchg);
10787 %}
10788 
10789 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10790   predicate(n->as_LoadStore()->result_not_used());
10791   match(Set dummy (GetAndAddS mem add));
10792   effect(KILL cr);
10793   format %{ "addw_lock   $mem, $add" %}
10794   ins_encode %{
10795     __ lock();
10796     __ addw($mem$$Address, $add$$Register);
10797   %}
10798   ins_pipe(pipe_cmpxchg);
10799 %}
10800 
10801 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10802   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10803   match(Set dummy (GetAndAddS mem add));
10804   effect(KILL cr);
10805   format %{ "addw_lock   $mem, $add" %}
10806   ins_encode %{
10807     __ lock();
10808     __ addw($mem$$Address, $add$$constant);
10809   %}
10810   ins_pipe(pipe_cmpxchg);
10811 %}
10812 
10813 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10814   predicate(!n->as_LoadStore()->result_not_used());
10815   match(Set newval (GetAndAddS mem newval));
10816   effect(KILL cr);
10817   format %{ "xaddw_lock  $mem, $newval" %}
10818   ins_encode %{
10819     __ lock();
10820     __ xaddw($mem$$Address, $newval$$Register);
10821   %}
10822   ins_pipe(pipe_cmpxchg);
10823 %}
10824 
10825 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10826   predicate(n->as_LoadStore()->result_not_used());
10827   match(Set dummy (GetAndAddI mem add));
10828   effect(KILL cr);
10829   format %{ "addl_lock   $mem, $add" %}
10830   ins_encode %{
10831     __ lock();
10832     __ addl($mem$$Address, $add$$Register);
10833   %}
10834   ins_pipe(pipe_cmpxchg);
10835 %}
10836 
10837 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10838   predicate(n->as_LoadStore()->result_not_used());
10839   match(Set dummy (GetAndAddI mem add));
10840   effect(KILL cr);
10841   format %{ "addl_lock   $mem, $add" %}
10842   ins_encode %{
10843     __ lock();
10844     __ addl($mem$$Address, $add$$constant);
10845   %}
10846   ins_pipe(pipe_cmpxchg);
10847 %}
10848 
10849 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10850   predicate(!n->as_LoadStore()->result_not_used());
10851   match(Set newval (GetAndAddI mem newval));
10852   effect(KILL cr);
10853   format %{ "xaddl_lock  $mem, $newval" %}
10854   ins_encode %{
10855     __ lock();
10856     __ xaddl($mem$$Address, $newval$$Register);
10857   %}
10858   ins_pipe(pipe_cmpxchg);
10859 %}
10860 
10861 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10862   predicate(n->as_LoadStore()->result_not_used());
10863   match(Set dummy (GetAndAddL mem add));
10864   effect(KILL cr);
10865   format %{ "addq_lock   $mem, $add" %}
10866   ins_encode %{
10867     __ lock();
10868     __ addq($mem$$Address, $add$$Register);
10869   %}
10870   ins_pipe(pipe_cmpxchg);
10871 %}
10872 
10873 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10874   predicate(n->as_LoadStore()->result_not_used());
10875   match(Set dummy (GetAndAddL mem add));
10876   effect(KILL cr);
10877   format %{ "addq_lock   $mem, $add" %}
10878   ins_encode %{
10879     __ lock();
10880     __ addq($mem$$Address, $add$$constant);
10881   %}
10882   ins_pipe(pipe_cmpxchg);
10883 %}
10884 
10885 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10886   predicate(!n->as_LoadStore()->result_not_used());
10887   match(Set newval (GetAndAddL mem newval));
10888   effect(KILL cr);
10889   format %{ "xaddq_lock  $mem, $newval" %}
10890   ins_encode %{
10891     __ lock();
10892     __ xaddq($mem$$Address, $newval$$Register);
10893   %}
10894   ins_pipe(pipe_cmpxchg);
10895 %}
10896 
10897 instruct xchgB( memory mem, rRegI newval) %{
10898   match(Set newval (GetAndSetB mem newval));
10899   format %{ "XCHGB  $newval,[$mem]" %}
10900   ins_encode %{
10901     __ xchgb($newval$$Register, $mem$$Address);
10902   %}
10903   ins_pipe( pipe_cmpxchg );
10904 %}
10905 
10906 instruct xchgS( memory mem, rRegI newval) %{
10907   match(Set newval (GetAndSetS mem newval));
10908   format %{ "XCHGW  $newval,[$mem]" %}
10909   ins_encode %{
10910     __ xchgw($newval$$Register, $mem$$Address);
10911   %}
10912   ins_pipe( pipe_cmpxchg );
10913 %}
10914 
10915 instruct xchgI( memory mem, rRegI newval) %{
10916   match(Set newval (GetAndSetI mem newval));
10917   format %{ "XCHGL  $newval,[$mem]" %}
10918   ins_encode %{
10919     __ xchgl($newval$$Register, $mem$$Address);
10920   %}
10921   ins_pipe( pipe_cmpxchg );
10922 %}
10923 
10924 instruct xchgL( memory mem, rRegL newval) %{
10925   match(Set newval (GetAndSetL mem newval));
10926   format %{ "XCHGL  $newval,[$mem]" %}
10927   ins_encode %{
10928     __ xchgq($newval$$Register, $mem$$Address);
10929   %}
10930   ins_pipe( pipe_cmpxchg );
10931 %}
10932 
10933 instruct xchgP( memory mem, rRegP newval) %{
10934   match(Set newval (GetAndSetP mem newval));
10935   predicate(n->as_LoadStore()->barrier_data() == 0);
10936   format %{ "XCHGQ  $newval,[$mem]" %}
10937   ins_encode %{
10938     __ xchgq($newval$$Register, $mem$$Address);
10939   %}
10940   ins_pipe( pipe_cmpxchg );
10941 %}
10942 
10943 instruct xchgN( memory mem, rRegN newval) %{
10944   predicate(n->as_LoadStore()->barrier_data() == 0);
10945   match(Set newval (GetAndSetN mem newval));
10946   format %{ "XCHGL  $newval,$mem]" %}
10947   ins_encode %{
10948     __ xchgl($newval$$Register, $mem$$Address);
10949   %}
10950   ins_pipe( pipe_cmpxchg );
10951 %}
10952 
10953 //----------Abs Instructions-------------------------------------------
10954 
10955 // Integer Absolute Instructions
10956 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10957 %{
10958   match(Set dst (AbsI src));
10959   effect(TEMP dst, KILL cr);
10960   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10961             "subl    $dst, $src\n\t"
10962             "cmovll  $dst, $src" %}
10963   ins_encode %{
10964     __ xorl($dst$$Register, $dst$$Register);
10965     __ subl($dst$$Register, $src$$Register);
10966     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10967   %}
10968 
10969   ins_pipe(ialu_reg_reg);
10970 %}
10971 
10972 // Long Absolute Instructions
10973 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10974 %{
10975   match(Set dst (AbsL src));
10976   effect(TEMP dst, KILL cr);
10977   format %{ "xorl    $dst, $dst\t# abs long\n\t"
10978             "subq    $dst, $src\n\t"
10979             "cmovlq  $dst, $src" %}
10980   ins_encode %{
10981     __ xorl($dst$$Register, $dst$$Register);
10982     __ subq($dst$$Register, $src$$Register);
10983     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10984   %}
10985 
10986   ins_pipe(ialu_reg_reg);
10987 %}
10988 
10989 //----------Subtraction Instructions-------------------------------------------
10990 
10991 // Integer Subtraction Instructions
10992 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10993 %{
10994   predicate(!UseAPX);
10995   match(Set dst (SubI dst src));
10996   effect(KILL cr);
10997   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10998 
10999   format %{ "subl    $dst, $src\t# int" %}
11000   ins_encode %{
11001     __ subl($dst$$Register, $src$$Register);
11002   %}
11003   ins_pipe(ialu_reg_reg);
11004 %}
11005 
11006 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11007 %{
11008   predicate(UseAPX);
11009   match(Set dst (SubI src1 src2));
11010   effect(KILL cr);
11011   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11012 
11013   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11014   ins_encode %{
11015     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11016   %}
11017   ins_pipe(ialu_reg_reg);
11018 %}
11019 
11020 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11021 %{
11022   predicate(UseAPX);
11023   match(Set dst (SubI src1 src2));
11024   effect(KILL cr);
11025   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11026 
11027   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11028   ins_encode %{
11029     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11030   %}
11031   ins_pipe(ialu_reg_reg);
11032 %}
11033 
11034 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11035 %{
11036   predicate(UseAPX);
11037   match(Set dst (SubI (LoadI src1) src2));
11038   effect(KILL cr);
11039   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11040 
11041   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11042   ins_encode %{
11043     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11044   %}
11045   ins_pipe(ialu_reg_reg);
11046 %}
11047 
11048 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11049 %{
11050   predicate(!UseAPX);
11051   match(Set dst (SubI dst (LoadI src)));
11052   effect(KILL cr);
11053   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11054 
11055   ins_cost(150);
11056   format %{ "subl    $dst, $src\t# int" %}
11057   ins_encode %{
11058     __ subl($dst$$Register, $src$$Address);
11059   %}
11060   ins_pipe(ialu_reg_mem);
11061 %}
11062 
11063 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11064 %{
11065   predicate(UseAPX);
11066   match(Set dst (SubI src1 (LoadI src2)));
11067   effect(KILL cr);
11068   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11069 
11070   ins_cost(150);
11071   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11072   ins_encode %{
11073     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11074   %}
11075   ins_pipe(ialu_reg_mem);
11076 %}
11077 
11078 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11079 %{
11080   predicate(UseAPX);
11081   match(Set dst (SubI (LoadI src1) src2));
11082   effect(KILL cr);
11083   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11084 
11085   ins_cost(150);
11086   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11087   ins_encode %{
11088     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11089   %}
11090   ins_pipe(ialu_reg_mem);
11091 %}
11092 
11093 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11094 %{
11095   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11096   effect(KILL cr);
11097   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11098 
11099   ins_cost(150);
11100   format %{ "subl    $dst, $src\t# int" %}
11101   ins_encode %{
11102     __ subl($dst$$Address, $src$$Register);
11103   %}
11104   ins_pipe(ialu_mem_reg);
11105 %}
11106 
11107 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11108 %{
11109   predicate(!UseAPX);
11110   match(Set dst (SubL dst src));
11111   effect(KILL cr);
11112   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11113 
11114   format %{ "subq    $dst, $src\t# long" %}
11115   ins_encode %{
11116     __ subq($dst$$Register, $src$$Register);
11117   %}
11118   ins_pipe(ialu_reg_reg);
11119 %}
11120 
11121 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11122 %{
11123   predicate(UseAPX);
11124   match(Set dst (SubL src1 src2));
11125   effect(KILL cr);
11126   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11127 
11128   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11129   ins_encode %{
11130     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11131   %}
11132   ins_pipe(ialu_reg_reg);
11133 %}
11134 
11135 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11136 %{
11137   predicate(UseAPX);
11138   match(Set dst (SubL src1 src2));
11139   effect(KILL cr);
11140   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11141 
11142   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11143   ins_encode %{
11144     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11145   %}
11146   ins_pipe(ialu_reg_reg);
11147 %}
11148 
11149 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11150 %{
11151   predicate(UseAPX);
11152   match(Set dst (SubL (LoadL src1) src2));
11153   effect(KILL cr);
11154   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11155 
11156   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11157   ins_encode %{
11158     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11159   %}
11160   ins_pipe(ialu_reg_reg);
11161 %}
11162 
11163 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11164 %{
11165   predicate(!UseAPX);
11166   match(Set dst (SubL dst (LoadL src)));
11167   effect(KILL cr);
11168   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11169 
11170   ins_cost(150);
11171   format %{ "subq    $dst, $src\t# long" %}
11172   ins_encode %{
11173     __ subq($dst$$Register, $src$$Address);
11174   %}
11175   ins_pipe(ialu_reg_mem);
11176 %}
11177 
11178 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11179 %{
11180   predicate(UseAPX);
11181   match(Set dst (SubL src1 (LoadL src2)));
11182   effect(KILL cr);
11183   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11184 
11185   ins_cost(150);
11186   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11187   ins_encode %{
11188     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11189   %}
11190   ins_pipe(ialu_reg_mem);
11191 %}
11192 
11193 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11194 %{
11195   predicate(UseAPX);
11196   match(Set dst (SubL (LoadL src1) src2));
11197   effect(KILL cr);
11198   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11199 
11200   ins_cost(150);
11201   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11202   ins_encode %{
11203     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11204   %}
11205   ins_pipe(ialu_reg_mem);
11206 %}
11207 
11208 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11209 %{
11210   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11211   effect(KILL cr);
11212   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11213 
11214   ins_cost(150);
11215   format %{ "subq    $dst, $src\t# long" %}
11216   ins_encode %{
11217     __ subq($dst$$Address, $src$$Register);
11218   %}
11219   ins_pipe(ialu_mem_reg);
11220 %}
11221 
11222 // Subtract from a pointer
11223 // XXX hmpf???
11224 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11225 %{
11226   match(Set dst (AddP dst (SubI zero src)));
11227   effect(KILL cr);
11228 
11229   format %{ "subq    $dst, $src\t# ptr - int" %}
11230   ins_encode %{
11231     __ subq($dst$$Register, $src$$Register);
11232   %}
11233   ins_pipe(ialu_reg_reg);
11234 %}
11235 
11236 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11237 %{
11238   predicate(!UseAPX);
11239   match(Set dst (SubI zero dst));
11240   effect(KILL cr);
11241   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11242 
11243   format %{ "negl    $dst\t# int" %}
11244   ins_encode %{
11245     __ negl($dst$$Register);
11246   %}
11247   ins_pipe(ialu_reg);
11248 %}
11249 
11250 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11251 %{
11252   predicate(UseAPX);
11253   match(Set dst (SubI zero src));
11254   effect(KILL cr);
11255   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11256 
11257   format %{ "enegl    $dst, $src\t# int ndd" %}
11258   ins_encode %{
11259     __ enegl($dst$$Register, $src$$Register, false);
11260   %}
11261   ins_pipe(ialu_reg);
11262 %}
11263 
11264 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11265 %{
11266   predicate(!UseAPX);
11267   match(Set dst (NegI dst));
11268   effect(KILL cr);
11269   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11270 
11271   format %{ "negl    $dst\t# int" %}
11272   ins_encode %{
11273     __ negl($dst$$Register);
11274   %}
11275   ins_pipe(ialu_reg);
11276 %}
11277 
11278 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11279 %{
11280   predicate(UseAPX);
11281   match(Set dst (NegI src));
11282   effect(KILL cr);
11283   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11284 
11285   format %{ "enegl    $dst, $src\t# int ndd" %}
11286   ins_encode %{
11287     __ enegl($dst$$Register, $src$$Register, false);
11288   %}
11289   ins_pipe(ialu_reg);
11290 %}
11291 
11292 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11293 %{
11294   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11295   effect(KILL cr);
11296   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11297 
11298   format %{ "negl    $dst\t# int" %}
11299   ins_encode %{
11300     __ negl($dst$$Address);
11301   %}
11302   ins_pipe(ialu_reg);
11303 %}
11304 
11305 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11306 %{
11307   predicate(!UseAPX);
11308   match(Set dst (SubL zero dst));
11309   effect(KILL cr);
11310   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11311 
11312   format %{ "negq    $dst\t# long" %}
11313   ins_encode %{
11314     __ negq($dst$$Register);
11315   %}
11316   ins_pipe(ialu_reg);
11317 %}
11318 
11319 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11320 %{
11321   predicate(UseAPX);
11322   match(Set dst (SubL zero src));
11323   effect(KILL cr);
11324   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11325 
11326   format %{ "enegq    $dst, $src\t# long ndd" %}
11327   ins_encode %{
11328     __ enegq($dst$$Register, $src$$Register, false);
11329   %}
11330   ins_pipe(ialu_reg);
11331 %}
11332 
11333 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11334 %{
11335   predicate(!UseAPX);
11336   match(Set dst (NegL dst));
11337   effect(KILL cr);
11338   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11339 
11340   format %{ "negq    $dst\t# int" %}
11341   ins_encode %{
11342     __ negq($dst$$Register);
11343   %}
11344   ins_pipe(ialu_reg);
11345 %}
11346 
11347 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11348 %{
11349   predicate(UseAPX);
11350   match(Set dst (NegL src));
11351   effect(KILL cr);
11352   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11353 
11354   format %{ "enegq    $dst, $src\t# long ndd" %}
11355   ins_encode %{
11356     __ enegq($dst$$Register, $src$$Register, false);
11357   %}
11358   ins_pipe(ialu_reg);
11359 %}
11360 
11361 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11362 %{
11363   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11364   effect(KILL cr);
11365   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11366 
11367   format %{ "negq    $dst\t# long" %}
11368   ins_encode %{
11369     __ negq($dst$$Address);
11370   %}
11371   ins_pipe(ialu_reg);
11372 %}
11373 
11374 //----------Multiplication/Division Instructions-------------------------------
11375 // Integer Multiplication Instructions
11376 // Multiply Register
11377 
11378 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11379 %{
11380   predicate(!UseAPX);
11381   match(Set dst (MulI dst src));
11382   effect(KILL cr);
11383 
11384   ins_cost(300);
11385   format %{ "imull   $dst, $src\t# int" %}
11386   ins_encode %{
11387     __ imull($dst$$Register, $src$$Register);
11388   %}
11389   ins_pipe(ialu_reg_reg_alu0);
11390 %}
11391 
11392 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11393 %{
11394   predicate(UseAPX);
11395   match(Set dst (MulI src1 src2));
11396   effect(KILL cr);
11397 
11398   ins_cost(300);
11399   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11400   ins_encode %{
11401     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11402   %}
11403   ins_pipe(ialu_reg_reg_alu0);
11404 %}
11405 
11406 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11407 %{
11408   match(Set dst (MulI src imm));
11409   effect(KILL cr);
11410 
11411   ins_cost(300);
11412   format %{ "imull   $dst, $src, $imm\t# int" %}
11413   ins_encode %{
11414     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11415   %}
11416   ins_pipe(ialu_reg_reg_alu0);
11417 %}
11418 
11419 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11420 %{
11421   predicate(!UseAPX);
11422   match(Set dst (MulI dst (LoadI src)));
11423   effect(KILL cr);
11424 
11425   ins_cost(350);
11426   format %{ "imull   $dst, $src\t# int" %}
11427   ins_encode %{
11428     __ imull($dst$$Register, $src$$Address);
11429   %}
11430   ins_pipe(ialu_reg_mem_alu0);
11431 %}
11432 
11433 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11434 %{
11435   predicate(UseAPX);
11436   match(Set dst (MulI src1 (LoadI src2)));
11437   effect(KILL cr);
11438 
11439   ins_cost(350);
11440   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11441   ins_encode %{
11442     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11443   %}
11444   ins_pipe(ialu_reg_mem_alu0);
11445 %}
11446 
11447 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11448 %{
11449   match(Set dst (MulI (LoadI src) imm));
11450   effect(KILL cr);
11451 
11452   ins_cost(300);
11453   format %{ "imull   $dst, $src, $imm\t# int" %}
11454   ins_encode %{
11455     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11456   %}
11457   ins_pipe(ialu_reg_mem_alu0);
11458 %}
11459 
11460 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11461 %{
11462   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11463   effect(KILL cr, KILL src2);
11464 
11465   expand %{ mulI_rReg(dst, src1, cr);
11466            mulI_rReg(src2, src3, cr);
11467            addI_rReg(dst, src2, cr); %}
11468 %}
11469 
11470 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11471 %{
11472   predicate(!UseAPX);
11473   match(Set dst (MulL dst src));
11474   effect(KILL cr);
11475 
11476   ins_cost(300);
11477   format %{ "imulq   $dst, $src\t# long" %}
11478   ins_encode %{
11479     __ imulq($dst$$Register, $src$$Register);
11480   %}
11481   ins_pipe(ialu_reg_reg_alu0);
11482 %}
11483 
11484 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11485 %{
11486   predicate(UseAPX);
11487   match(Set dst (MulL src1 src2));
11488   effect(KILL cr);
11489 
11490   ins_cost(300);
11491   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11492   ins_encode %{
11493     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11494   %}
11495   ins_pipe(ialu_reg_reg_alu0);
11496 %}
11497 
11498 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11499 %{
11500   match(Set dst (MulL src imm));
11501   effect(KILL cr);
11502 
11503   ins_cost(300);
11504   format %{ "imulq   $dst, $src, $imm\t# long" %}
11505   ins_encode %{
11506     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11507   %}
11508   ins_pipe(ialu_reg_reg_alu0);
11509 %}
11510 
11511 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11512 %{
11513   predicate(!UseAPX);
11514   match(Set dst (MulL dst (LoadL src)));
11515   effect(KILL cr);
11516 
11517   ins_cost(350);
11518   format %{ "imulq   $dst, $src\t# long" %}
11519   ins_encode %{
11520     __ imulq($dst$$Register, $src$$Address);
11521   %}
11522   ins_pipe(ialu_reg_mem_alu0);
11523 %}
11524 
11525 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11526 %{
11527   predicate(UseAPX);
11528   match(Set dst (MulL src1 (LoadL src2)));
11529   effect(KILL cr);
11530 
11531   ins_cost(350);
11532   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11533   ins_encode %{
11534     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11535   %}
11536   ins_pipe(ialu_reg_mem_alu0);
11537 %}
11538 
11539 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11540 %{
11541   match(Set dst (MulL (LoadL src) imm));
11542   effect(KILL cr);
11543 
11544   ins_cost(300);
11545   format %{ "imulq   $dst, $src, $imm\t# long" %}
11546   ins_encode %{
11547     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11548   %}
11549   ins_pipe(ialu_reg_mem_alu0);
11550 %}
11551 
11552 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11553 %{
11554   match(Set dst (MulHiL src rax));
11555   effect(USE_KILL rax, KILL cr);
11556 
11557   ins_cost(300);
11558   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11559   ins_encode %{
11560     __ imulq($src$$Register);
11561   %}
11562   ins_pipe(ialu_reg_reg_alu0);
11563 %}
11564 
11565 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11566 %{
11567   match(Set dst (UMulHiL src rax));
11568   effect(USE_KILL rax, KILL cr);
11569 
11570   ins_cost(300);
11571   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11572   ins_encode %{
11573     __ mulq($src$$Register);
11574   %}
11575   ins_pipe(ialu_reg_reg_alu0);
11576 %}
11577 
11578 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11579                    rFlagsReg cr)
11580 %{
11581   match(Set rax (DivI rax div));
11582   effect(KILL rdx, KILL cr);
11583 
11584   ins_cost(30*100+10*100); // XXX
11585   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11586             "jne,s   normal\n\t"
11587             "xorl    rdx, rdx\n\t"
11588             "cmpl    $div, -1\n\t"
11589             "je,s    done\n"
11590     "normal: cdql\n\t"
11591             "idivl   $div\n"
11592     "done:"        %}
11593   ins_encode(cdql_enc(div));
11594   ins_pipe(ialu_reg_reg_alu0);
11595 %}
11596 
11597 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11598                    rFlagsReg cr)
11599 %{
11600   match(Set rax (DivL rax div));
11601   effect(KILL rdx, KILL cr);
11602 
11603   ins_cost(30*100+10*100); // XXX
11604   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11605             "cmpq    rax, rdx\n\t"
11606             "jne,s   normal\n\t"
11607             "xorl    rdx, rdx\n\t"
11608             "cmpq    $div, -1\n\t"
11609             "je,s    done\n"
11610     "normal: cdqq\n\t"
11611             "idivq   $div\n"
11612     "done:"        %}
11613   ins_encode(cdqq_enc(div));
11614   ins_pipe(ialu_reg_reg_alu0);
11615 %}
11616 
11617 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11618 %{
11619   match(Set rax (UDivI rax div));
11620   effect(KILL rdx, KILL cr);
11621 
11622   ins_cost(300);
11623   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11624   ins_encode %{
11625     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11626   %}
11627   ins_pipe(ialu_reg_reg_alu0);
11628 %}
11629 
11630 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11631 %{
11632   match(Set rax (UDivL rax div));
11633   effect(KILL rdx, KILL cr);
11634 
11635   ins_cost(300);
11636   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11637   ins_encode %{
11638      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11639   %}
11640   ins_pipe(ialu_reg_reg_alu0);
11641 %}
11642 
11643 // Integer DIVMOD with Register, both quotient and mod results
11644 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11645                              rFlagsReg cr)
11646 %{
11647   match(DivModI rax div);
11648   effect(KILL cr);
11649 
11650   ins_cost(30*100+10*100); // XXX
11651   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11652             "jne,s   normal\n\t"
11653             "xorl    rdx, rdx\n\t"
11654             "cmpl    $div, -1\n\t"
11655             "je,s    done\n"
11656     "normal: cdql\n\t"
11657             "idivl   $div\n"
11658     "done:"        %}
11659   ins_encode(cdql_enc(div));
11660   ins_pipe(pipe_slow);
11661 %}
11662 
11663 // Long DIVMOD with Register, both quotient and mod results
11664 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11665                              rFlagsReg cr)
11666 %{
11667   match(DivModL rax div);
11668   effect(KILL cr);
11669 
11670   ins_cost(30*100+10*100); // XXX
11671   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11672             "cmpq    rax, rdx\n\t"
11673             "jne,s   normal\n\t"
11674             "xorl    rdx, rdx\n\t"
11675             "cmpq    $div, -1\n\t"
11676             "je,s    done\n"
11677     "normal: cdqq\n\t"
11678             "idivq   $div\n"
11679     "done:"        %}
11680   ins_encode(cdqq_enc(div));
11681   ins_pipe(pipe_slow);
11682 %}
11683 
11684 // Unsigned integer DIVMOD with Register, both quotient and mod results
11685 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11686                               no_rax_rdx_RegI div, rFlagsReg cr)
11687 %{
11688   match(UDivModI rax div);
11689   effect(TEMP tmp, KILL cr);
11690 
11691   ins_cost(300);
11692   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11693             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11694           %}
11695   ins_encode %{
11696     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11697   %}
11698   ins_pipe(pipe_slow);
11699 %}
11700 
11701 // Unsigned long DIVMOD with Register, both quotient and mod results
11702 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11703                               no_rax_rdx_RegL div, rFlagsReg cr)
11704 %{
11705   match(UDivModL rax div);
11706   effect(TEMP tmp, KILL cr);
11707 
11708   ins_cost(300);
11709   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11710             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11711           %}
11712   ins_encode %{
11713     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11714   %}
11715   ins_pipe(pipe_slow);
11716 %}
11717 
11718 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11719                    rFlagsReg cr)
11720 %{
11721   match(Set rdx (ModI rax div));
11722   effect(KILL rax, KILL cr);
11723 
11724   ins_cost(300); // XXX
11725   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11726             "jne,s   normal\n\t"
11727             "xorl    rdx, rdx\n\t"
11728             "cmpl    $div, -1\n\t"
11729             "je,s    done\n"
11730     "normal: cdql\n\t"
11731             "idivl   $div\n"
11732     "done:"        %}
11733   ins_encode(cdql_enc(div));
11734   ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736 
11737 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11738                    rFlagsReg cr)
11739 %{
11740   match(Set rdx (ModL rax div));
11741   effect(KILL rax, KILL cr);
11742 
11743   ins_cost(300); // XXX
11744   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11745             "cmpq    rax, rdx\n\t"
11746             "jne,s   normal\n\t"
11747             "xorl    rdx, rdx\n\t"
11748             "cmpq    $div, -1\n\t"
11749             "je,s    done\n"
11750     "normal: cdqq\n\t"
11751             "idivq   $div\n"
11752     "done:"        %}
11753   ins_encode(cdqq_enc(div));
11754   ins_pipe(ialu_reg_reg_alu0);
11755 %}
11756 
11757 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11758 %{
11759   match(Set rdx (UModI rax div));
11760   effect(KILL rax, KILL cr);
11761 
11762   ins_cost(300);
11763   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11764   ins_encode %{
11765     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11766   %}
11767   ins_pipe(ialu_reg_reg_alu0);
11768 %}
11769 
11770 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11771 %{
11772   match(Set rdx (UModL rax div));
11773   effect(KILL rax, KILL cr);
11774 
11775   ins_cost(300);
11776   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11777   ins_encode %{
11778     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11779   %}
11780   ins_pipe(ialu_reg_reg_alu0);
11781 %}
11782 
11783 // Integer Shift Instructions
11784 // Shift Left by one, two, three
11785 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11786 %{
11787   predicate(!UseAPX);
11788   match(Set dst (LShiftI dst shift));
11789   effect(KILL cr);
11790 
11791   format %{ "sall    $dst, $shift" %}
11792   ins_encode %{
11793     __ sall($dst$$Register, $shift$$constant);
11794   %}
11795   ins_pipe(ialu_reg);
11796 %}
11797 
11798 // Shift Left by one, two, three
11799 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11800 %{
11801   predicate(UseAPX);
11802   match(Set dst (LShiftI src shift));
11803   effect(KILL cr);
11804 
11805   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11806   ins_encode %{
11807     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11808   %}
11809   ins_pipe(ialu_reg);
11810 %}
11811 
11812 // Shift Left by 8-bit immediate
11813 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11814 %{
11815   predicate(!UseAPX);
11816   match(Set dst (LShiftI dst shift));
11817   effect(KILL cr);
11818 
11819   format %{ "sall    $dst, $shift" %}
11820   ins_encode %{
11821     __ sall($dst$$Register, $shift$$constant);
11822   %}
11823   ins_pipe(ialu_reg);
11824 %}
11825 
11826 // Shift Left by 8-bit immediate
11827 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11828 %{
11829   predicate(UseAPX);
11830   match(Set dst (LShiftI src shift));
11831   effect(KILL cr);
11832 
11833   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11834   ins_encode %{
11835     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11836   %}
11837   ins_pipe(ialu_reg);
11838 %}
11839 
11840 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11841 %{
11842   predicate(UseAPX);
11843   match(Set dst (LShiftI (LoadI src) shift));
11844   effect(KILL cr);
11845 
11846   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11847   ins_encode %{
11848     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11849   %}
11850   ins_pipe(ialu_reg);
11851 %}
11852 
11853 // Shift Left by 8-bit immediate
11854 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11855 %{
11856   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11857   effect(KILL cr);
11858 
11859   format %{ "sall    $dst, $shift" %}
11860   ins_encode %{
11861     __ sall($dst$$Address, $shift$$constant);
11862   %}
11863   ins_pipe(ialu_mem_imm);
11864 %}
11865 
11866 // Shift Left by variable
11867 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11868 %{
11869   predicate(!VM_Version::supports_bmi2());
11870   match(Set dst (LShiftI dst shift));
11871   effect(KILL cr);
11872 
11873   format %{ "sall    $dst, $shift" %}
11874   ins_encode %{
11875     __ sall($dst$$Register);
11876   %}
11877   ins_pipe(ialu_reg_reg);
11878 %}
11879 
11880 // Shift Left by variable
11881 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11882 %{
11883   predicate(!VM_Version::supports_bmi2());
11884   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11885   effect(KILL cr);
11886 
11887   format %{ "sall    $dst, $shift" %}
11888   ins_encode %{
11889     __ sall($dst$$Address);
11890   %}
11891   ins_pipe(ialu_mem_reg);
11892 %}
11893 
11894 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11895 %{
11896   predicate(VM_Version::supports_bmi2());
11897   match(Set dst (LShiftI src shift));
11898 
11899   format %{ "shlxl   $dst, $src, $shift" %}
11900   ins_encode %{
11901     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11902   %}
11903   ins_pipe(ialu_reg_reg);
11904 %}
11905 
11906 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11907 %{
11908   predicate(VM_Version::supports_bmi2());
11909   match(Set dst (LShiftI (LoadI src) shift));
11910   ins_cost(175);
11911   format %{ "shlxl   $dst, $src, $shift" %}
11912   ins_encode %{
11913     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11914   %}
11915   ins_pipe(ialu_reg_mem);
11916 %}
11917 
11918 // Arithmetic Shift Right by 8-bit immediate
11919 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11920 %{
11921   predicate(!UseAPX);
11922   match(Set dst (RShiftI dst shift));
11923   effect(KILL cr);
11924 
11925   format %{ "sarl    $dst, $shift" %}
11926   ins_encode %{
11927     __ sarl($dst$$Register, $shift$$constant);
11928   %}
11929   ins_pipe(ialu_mem_imm);
11930 %}
11931 
11932 // Arithmetic Shift Right by 8-bit immediate
11933 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11934 %{
11935   predicate(UseAPX);
11936   match(Set dst (RShiftI src shift));
11937   effect(KILL cr);
11938 
11939   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11940   ins_encode %{
11941     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11942   %}
11943   ins_pipe(ialu_mem_imm);
11944 %}
11945 
11946 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11947 %{
11948   predicate(UseAPX);
11949   match(Set dst (RShiftI (LoadI src) shift));
11950   effect(KILL cr);
11951 
11952   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11953   ins_encode %{
11954     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11955   %}
11956   ins_pipe(ialu_mem_imm);
11957 %}
11958 
11959 // Arithmetic Shift Right by 8-bit immediate
11960 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11961 %{
11962   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11963   effect(KILL cr);
11964 
11965   format %{ "sarl    $dst, $shift" %}
11966   ins_encode %{
11967     __ sarl($dst$$Address, $shift$$constant);
11968   %}
11969   ins_pipe(ialu_mem_imm);
11970 %}
11971 
11972 // Arithmetic Shift Right by variable
11973 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11974 %{
11975   predicate(!VM_Version::supports_bmi2());
11976   match(Set dst (RShiftI dst shift));
11977   effect(KILL cr);
11978 
11979   format %{ "sarl    $dst, $shift" %}
11980   ins_encode %{
11981     __ sarl($dst$$Register);
11982   %}
11983   ins_pipe(ialu_reg_reg);
11984 %}
11985 
11986 // Arithmetic Shift Right by variable
11987 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11988 %{
11989   predicate(!VM_Version::supports_bmi2());
11990   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11991   effect(KILL cr);
11992 
11993   format %{ "sarl    $dst, $shift" %}
11994   ins_encode %{
11995     __ sarl($dst$$Address);
11996   %}
11997   ins_pipe(ialu_mem_reg);
11998 %}
11999 
12000 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12001 %{
12002   predicate(VM_Version::supports_bmi2());
12003   match(Set dst (RShiftI src shift));
12004 
12005   format %{ "sarxl   $dst, $src, $shift" %}
12006   ins_encode %{
12007     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12008   %}
12009   ins_pipe(ialu_reg_reg);
12010 %}
12011 
12012 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12013 %{
12014   predicate(VM_Version::supports_bmi2());
12015   match(Set dst (RShiftI (LoadI src) shift));
12016   ins_cost(175);
12017   format %{ "sarxl   $dst, $src, $shift" %}
12018   ins_encode %{
12019     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12020   %}
12021   ins_pipe(ialu_reg_mem);
12022 %}
12023 
12024 // Logical Shift Right by 8-bit immediate
12025 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12026 %{
12027   predicate(!UseAPX);
12028   match(Set dst (URShiftI dst shift));
12029   effect(KILL cr);
12030 
12031   format %{ "shrl    $dst, $shift" %}
12032   ins_encode %{
12033     __ shrl($dst$$Register, $shift$$constant);
12034   %}
12035   ins_pipe(ialu_reg);
12036 %}
12037 
12038 // Logical Shift Right by 8-bit immediate
12039 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12040 %{
12041   predicate(UseAPX);
12042   match(Set dst (URShiftI src shift));
12043   effect(KILL cr);
12044 
12045   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12046   ins_encode %{
12047     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12048   %}
12049   ins_pipe(ialu_reg);
12050 %}
12051 
12052 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12053 %{
12054   predicate(UseAPX);
12055   match(Set dst (URShiftI (LoadI src) shift));
12056   effect(KILL cr);
12057 
12058   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12059   ins_encode %{
12060     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12061   %}
12062   ins_pipe(ialu_reg);
12063 %}
12064 
12065 // Logical Shift Right by 8-bit immediate
12066 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12067 %{
12068   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12069   effect(KILL cr);
12070 
12071   format %{ "shrl    $dst, $shift" %}
12072   ins_encode %{
12073     __ shrl($dst$$Address, $shift$$constant);
12074   %}
12075   ins_pipe(ialu_mem_imm);
12076 %}
12077 
12078 // Logical Shift Right by variable
12079 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12080 %{
12081   predicate(!VM_Version::supports_bmi2());
12082   match(Set dst (URShiftI dst shift));
12083   effect(KILL cr);
12084 
12085   format %{ "shrl    $dst, $shift" %}
12086   ins_encode %{
12087     __ shrl($dst$$Register);
12088   %}
12089   ins_pipe(ialu_reg_reg);
12090 %}
12091 
12092 // Logical Shift Right by variable
12093 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12094 %{
12095   predicate(!VM_Version::supports_bmi2());
12096   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12097   effect(KILL cr);
12098 
12099   format %{ "shrl    $dst, $shift" %}
12100   ins_encode %{
12101     __ shrl($dst$$Address);
12102   %}
12103   ins_pipe(ialu_mem_reg);
12104 %}
12105 
12106 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12107 %{
12108   predicate(VM_Version::supports_bmi2());
12109   match(Set dst (URShiftI src shift));
12110 
12111   format %{ "shrxl   $dst, $src, $shift" %}
12112   ins_encode %{
12113     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12114   %}
12115   ins_pipe(ialu_reg_reg);
12116 %}
12117 
12118 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12119 %{
12120   predicate(VM_Version::supports_bmi2());
12121   match(Set dst (URShiftI (LoadI src) shift));
12122   ins_cost(175);
12123   format %{ "shrxl   $dst, $src, $shift" %}
12124   ins_encode %{
12125     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12126   %}
12127   ins_pipe(ialu_reg_mem);
12128 %}
12129 
12130 // Long Shift Instructions
12131 // Shift Left by one, two, three
12132 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12133 %{
12134   predicate(!UseAPX);
12135   match(Set dst (LShiftL dst shift));
12136   effect(KILL cr);
12137 
12138   format %{ "salq    $dst, $shift" %}
12139   ins_encode %{
12140     __ salq($dst$$Register, $shift$$constant);
12141   %}
12142   ins_pipe(ialu_reg);
12143 %}
12144 
12145 // Shift Left by one, two, three
12146 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12147 %{
12148   predicate(UseAPX);
12149   match(Set dst (LShiftL src shift));
12150   effect(KILL cr);
12151 
12152   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12153   ins_encode %{
12154     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12155   %}
12156   ins_pipe(ialu_reg);
12157 %}
12158 
12159 // Shift Left by 8-bit immediate
12160 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12161 %{
12162   predicate(!UseAPX);
12163   match(Set dst (LShiftL dst shift));
12164   effect(KILL cr);
12165 
12166   format %{ "salq    $dst, $shift" %}
12167   ins_encode %{
12168     __ salq($dst$$Register, $shift$$constant);
12169   %}
12170   ins_pipe(ialu_reg);
12171 %}
12172 
12173 // Shift Left by 8-bit immediate
12174 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12175 %{
12176   predicate(UseAPX);
12177   match(Set dst (LShiftL src shift));
12178   effect(KILL cr);
12179 
12180   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12181   ins_encode %{
12182     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12183   %}
12184   ins_pipe(ialu_reg);
12185 %}
12186 
12187 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12188 %{
12189   predicate(UseAPX);
12190   match(Set dst (LShiftL (LoadL src) shift));
12191   effect(KILL cr);
12192 
12193   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12194   ins_encode %{
12195     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12196   %}
12197   ins_pipe(ialu_reg);
12198 %}
12199 
12200 // Shift Left by 8-bit immediate
12201 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12202 %{
12203   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12204   effect(KILL cr);
12205 
12206   format %{ "salq    $dst, $shift" %}
12207   ins_encode %{
12208     __ salq($dst$$Address, $shift$$constant);
12209   %}
12210   ins_pipe(ialu_mem_imm);
12211 %}
12212 
12213 // Shift Left by variable
12214 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12215 %{
12216   predicate(!VM_Version::supports_bmi2());
12217   match(Set dst (LShiftL dst shift));
12218   effect(KILL cr);
12219 
12220   format %{ "salq    $dst, $shift" %}
12221   ins_encode %{
12222     __ salq($dst$$Register);
12223   %}
12224   ins_pipe(ialu_reg_reg);
12225 %}
12226 
12227 // Shift Left by variable
12228 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12229 %{
12230   predicate(!VM_Version::supports_bmi2());
12231   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12232   effect(KILL cr);
12233 
12234   format %{ "salq    $dst, $shift" %}
12235   ins_encode %{
12236     __ salq($dst$$Address);
12237   %}
12238   ins_pipe(ialu_mem_reg);
12239 %}
12240 
12241 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12242 %{
12243   predicate(VM_Version::supports_bmi2());
12244   match(Set dst (LShiftL src shift));
12245 
12246   format %{ "shlxq   $dst, $src, $shift" %}
12247   ins_encode %{
12248     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12249   %}
12250   ins_pipe(ialu_reg_reg);
12251 %}
12252 
12253 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12254 %{
12255   predicate(VM_Version::supports_bmi2());
12256   match(Set dst (LShiftL (LoadL src) shift));
12257   ins_cost(175);
12258   format %{ "shlxq   $dst, $src, $shift" %}
12259   ins_encode %{
12260     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12261   %}
12262   ins_pipe(ialu_reg_mem);
12263 %}
12264 
12265 // Arithmetic Shift Right by 8-bit immediate
12266 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12267 %{
12268   predicate(!UseAPX);
12269   match(Set dst (RShiftL dst shift));
12270   effect(KILL cr);
12271 
12272   format %{ "sarq    $dst, $shift" %}
12273   ins_encode %{
12274     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12275   %}
12276   ins_pipe(ialu_mem_imm);
12277 %}
12278 
12279 // Arithmetic Shift Right by 8-bit immediate
12280 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12281 %{
12282   predicate(UseAPX);
12283   match(Set dst (RShiftL src shift));
12284   effect(KILL cr);
12285 
12286   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12287   ins_encode %{
12288     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12289   %}
12290   ins_pipe(ialu_mem_imm);
12291 %}
12292 
12293 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12294 %{
12295   predicate(UseAPX);
12296   match(Set dst (RShiftL (LoadL src) shift));
12297   effect(KILL cr);
12298 
12299   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12300   ins_encode %{
12301     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12302   %}
12303   ins_pipe(ialu_mem_imm);
12304 %}
12305 
12306 // Arithmetic Shift Right by 8-bit immediate
12307 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12308 %{
12309   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12310   effect(KILL cr);
12311 
12312   format %{ "sarq    $dst, $shift" %}
12313   ins_encode %{
12314     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12315   %}
12316   ins_pipe(ialu_mem_imm);
12317 %}
12318 
12319 // Arithmetic Shift Right by variable
12320 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12321 %{
12322   predicate(!VM_Version::supports_bmi2());
12323   match(Set dst (RShiftL dst shift));
12324   effect(KILL cr);
12325 
12326   format %{ "sarq    $dst, $shift" %}
12327   ins_encode %{
12328     __ sarq($dst$$Register);
12329   %}
12330   ins_pipe(ialu_reg_reg);
12331 %}
12332 
12333 // Arithmetic Shift Right by variable
12334 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12335 %{
12336   predicate(!VM_Version::supports_bmi2());
12337   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12338   effect(KILL cr);
12339 
12340   format %{ "sarq    $dst, $shift" %}
12341   ins_encode %{
12342     __ sarq($dst$$Address);
12343   %}
12344   ins_pipe(ialu_mem_reg);
12345 %}
12346 
12347 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12348 %{
12349   predicate(VM_Version::supports_bmi2());
12350   match(Set dst (RShiftL src shift));
12351 
12352   format %{ "sarxq   $dst, $src, $shift" %}
12353   ins_encode %{
12354     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12355   %}
12356   ins_pipe(ialu_reg_reg);
12357 %}
12358 
12359 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12360 %{
12361   predicate(VM_Version::supports_bmi2());
12362   match(Set dst (RShiftL (LoadL src) shift));
12363   ins_cost(175);
12364   format %{ "sarxq   $dst, $src, $shift" %}
12365   ins_encode %{
12366     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12367   %}
12368   ins_pipe(ialu_reg_mem);
12369 %}
12370 
12371 // Logical Shift Right by 8-bit immediate
12372 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12373 %{
12374   predicate(!UseAPX);
12375   match(Set dst (URShiftL dst shift));
12376   effect(KILL cr);
12377 
12378   format %{ "shrq    $dst, $shift" %}
12379   ins_encode %{
12380     __ shrq($dst$$Register, $shift$$constant);
12381   %}
12382   ins_pipe(ialu_reg);
12383 %}
12384 
12385 // Logical Shift Right by 8-bit immediate
12386 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12387 %{
12388   predicate(UseAPX);
12389   match(Set dst (URShiftL src shift));
12390   effect(KILL cr);
12391 
12392   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12393   ins_encode %{
12394     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12395   %}
12396   ins_pipe(ialu_reg);
12397 %}
12398 
12399 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12400 %{
12401   predicate(UseAPX);
12402   match(Set dst (URShiftL (LoadL src) shift));
12403   effect(KILL cr);
12404 
12405   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12406   ins_encode %{
12407     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12408   %}
12409   ins_pipe(ialu_reg);
12410 %}
12411 
12412 // Logical Shift Right by 8-bit immediate
12413 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12414 %{
12415   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12416   effect(KILL cr);
12417 
12418   format %{ "shrq    $dst, $shift" %}
12419   ins_encode %{
12420     __ shrq($dst$$Address, $shift$$constant);
12421   %}
12422   ins_pipe(ialu_mem_imm);
12423 %}
12424 
12425 // Logical Shift Right by variable
12426 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12427 %{
12428   predicate(!VM_Version::supports_bmi2());
12429   match(Set dst (URShiftL dst shift));
12430   effect(KILL cr);
12431 
12432   format %{ "shrq    $dst, $shift" %}
12433   ins_encode %{
12434     __ shrq($dst$$Register);
12435   %}
12436   ins_pipe(ialu_reg_reg);
12437 %}
12438 
12439 // Logical Shift Right by variable
12440 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12441 %{
12442   predicate(!VM_Version::supports_bmi2());
12443   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12444   effect(KILL cr);
12445 
12446   format %{ "shrq    $dst, $shift" %}
12447   ins_encode %{
12448     __ shrq($dst$$Address);
12449   %}
12450   ins_pipe(ialu_mem_reg);
12451 %}
12452 
12453 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12454 %{
12455   predicate(VM_Version::supports_bmi2());
12456   match(Set dst (URShiftL src shift));
12457 
12458   format %{ "shrxq   $dst, $src, $shift" %}
12459   ins_encode %{
12460     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12461   %}
12462   ins_pipe(ialu_reg_reg);
12463 %}
12464 
12465 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12466 %{
12467   predicate(VM_Version::supports_bmi2());
12468   match(Set dst (URShiftL (LoadL src) shift));
12469   ins_cost(175);
12470   format %{ "shrxq   $dst, $src, $shift" %}
12471   ins_encode %{
12472     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12473   %}
12474   ins_pipe(ialu_reg_mem);
12475 %}
12476 
12477 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12478 // This idiom is used by the compiler for the i2b bytecode.
12479 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12480 %{
12481   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12482 
12483   format %{ "movsbl  $dst, $src\t# i2b" %}
12484   ins_encode %{
12485     __ movsbl($dst$$Register, $src$$Register);
12486   %}
12487   ins_pipe(ialu_reg_reg);
12488 %}
12489 
12490 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12491 // This idiom is used by the compiler the i2s bytecode.
12492 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12493 %{
12494   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12495 
12496   format %{ "movswl  $dst, $src\t# i2s" %}
12497   ins_encode %{
12498     __ movswl($dst$$Register, $src$$Register);
12499   %}
12500   ins_pipe(ialu_reg_reg);
12501 %}
12502 
12503 // ROL/ROR instructions
12504 
12505 // Rotate left by constant.
12506 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12507 %{
12508   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12509   match(Set dst (RotateLeft dst shift));
12510   effect(KILL cr);
12511   format %{ "roll    $dst, $shift" %}
12512   ins_encode %{
12513     __ roll($dst$$Register, $shift$$constant);
12514   %}
12515   ins_pipe(ialu_reg);
12516 %}
12517 
12518 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12519 %{
12520   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12521   match(Set dst (RotateLeft src shift));
12522   format %{ "rolxl   $dst, $src, $shift" %}
12523   ins_encode %{
12524     int shift = 32 - ($shift$$constant & 31);
12525     __ rorxl($dst$$Register, $src$$Register, shift);
12526   %}
12527   ins_pipe(ialu_reg_reg);
12528 %}
12529 
12530 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12531 %{
12532   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12533   match(Set dst (RotateLeft (LoadI src) shift));
12534   ins_cost(175);
12535   format %{ "rolxl   $dst, $src, $shift" %}
12536   ins_encode %{
12537     int shift = 32 - ($shift$$constant & 31);
12538     __ rorxl($dst$$Register, $src$$Address, shift);
12539   %}
12540   ins_pipe(ialu_reg_mem);
12541 %}
12542 
12543 // Rotate Left by variable
12544 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12545 %{
12546   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12547   match(Set dst (RotateLeft dst shift));
12548   effect(KILL cr);
12549   format %{ "roll    $dst, $shift" %}
12550   ins_encode %{
12551     __ roll($dst$$Register);
12552   %}
12553   ins_pipe(ialu_reg_reg);
12554 %}
12555 
12556 // Rotate Left by variable
12557 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12558 %{
12559   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12560   match(Set dst (RotateLeft src shift));
12561   effect(KILL cr);
12562 
12563   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12564   ins_encode %{
12565     __ eroll($dst$$Register, $src$$Register, false);
12566   %}
12567   ins_pipe(ialu_reg_reg);
12568 %}
12569 
12570 // Rotate Right by constant.
12571 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12572 %{
12573   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12574   match(Set dst (RotateRight dst shift));
12575   effect(KILL cr);
12576   format %{ "rorl    $dst, $shift" %}
12577   ins_encode %{
12578     __ rorl($dst$$Register, $shift$$constant);
12579   %}
12580   ins_pipe(ialu_reg);
12581 %}
12582 
12583 // Rotate Right by constant.
12584 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12585 %{
12586   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12587   match(Set dst (RotateRight src shift));
12588   format %{ "rorxl   $dst, $src, $shift" %}
12589   ins_encode %{
12590     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12591   %}
12592   ins_pipe(ialu_reg_reg);
12593 %}
12594 
12595 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12596 %{
12597   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12598   match(Set dst (RotateRight (LoadI src) shift));
12599   ins_cost(175);
12600   format %{ "rorxl   $dst, $src, $shift" %}
12601   ins_encode %{
12602     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12603   %}
12604   ins_pipe(ialu_reg_mem);
12605 %}
12606 
12607 // Rotate Right by variable
12608 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12609 %{
12610   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12611   match(Set dst (RotateRight dst shift));
12612   effect(KILL cr);
12613   format %{ "rorl    $dst, $shift" %}
12614   ins_encode %{
12615     __ rorl($dst$$Register);
12616   %}
12617   ins_pipe(ialu_reg_reg);
12618 %}
12619 
12620 // Rotate Right by variable
12621 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12622 %{
12623   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12624   match(Set dst (RotateRight src shift));
12625   effect(KILL cr);
12626 
12627   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12628   ins_encode %{
12629     __ erorl($dst$$Register, $src$$Register, false);
12630   %}
12631   ins_pipe(ialu_reg_reg);
12632 %}
12633 
12634 // Rotate Left by constant.
12635 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12636 %{
12637   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12638   match(Set dst (RotateLeft dst shift));
12639   effect(KILL cr);
12640   format %{ "rolq    $dst, $shift" %}
12641   ins_encode %{
12642     __ rolq($dst$$Register, $shift$$constant);
12643   %}
12644   ins_pipe(ialu_reg);
12645 %}
12646 
12647 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12648 %{
12649   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12650   match(Set dst (RotateLeft src shift));
12651   format %{ "rolxq   $dst, $src, $shift" %}
12652   ins_encode %{
12653     int shift = 64 - ($shift$$constant & 63);
12654     __ rorxq($dst$$Register, $src$$Register, shift);
12655   %}
12656   ins_pipe(ialu_reg_reg);
12657 %}
12658 
12659 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12660 %{
12661   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12662   match(Set dst (RotateLeft (LoadL src) shift));
12663   ins_cost(175);
12664   format %{ "rolxq   $dst, $src, $shift" %}
12665   ins_encode %{
12666     int shift = 64 - ($shift$$constant & 63);
12667     __ rorxq($dst$$Register, $src$$Address, shift);
12668   %}
12669   ins_pipe(ialu_reg_mem);
12670 %}
12671 
12672 // Rotate Left by variable
12673 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12676   match(Set dst (RotateLeft dst shift));
12677   effect(KILL cr);
12678   format %{ "rolq    $dst, $shift" %}
12679   ins_encode %{
12680     __ rolq($dst$$Register);
12681   %}
12682   ins_pipe(ialu_reg_reg);
12683 %}
12684 
12685 // Rotate Left by variable
12686 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12689   match(Set dst (RotateLeft src shift));
12690   effect(KILL cr);
12691 
12692   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12693   ins_encode %{
12694     __ erolq($dst$$Register, $src$$Register, false);
12695   %}
12696   ins_pipe(ialu_reg_reg);
12697 %}
12698 
12699 // Rotate Right by constant.
12700 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12701 %{
12702   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12703   match(Set dst (RotateRight dst shift));
12704   effect(KILL cr);
12705   format %{ "rorq    $dst, $shift" %}
12706   ins_encode %{
12707     __ rorq($dst$$Register, $shift$$constant);
12708   %}
12709   ins_pipe(ialu_reg);
12710 %}
12711 
12712 // Rotate Right by constant
12713 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12714 %{
12715   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12716   match(Set dst (RotateRight src shift));
12717   format %{ "rorxq   $dst, $src, $shift" %}
12718   ins_encode %{
12719     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12720   %}
12721   ins_pipe(ialu_reg_reg);
12722 %}
12723 
12724 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12725 %{
12726   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12727   match(Set dst (RotateRight (LoadL src) shift));
12728   ins_cost(175);
12729   format %{ "rorxq   $dst, $src, $shift" %}
12730   ins_encode %{
12731     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12732   %}
12733   ins_pipe(ialu_reg_mem);
12734 %}
12735 
12736 // Rotate Right by variable
12737 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12738 %{
12739   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12740   match(Set dst (RotateRight dst shift));
12741   effect(KILL cr);
12742   format %{ "rorq    $dst, $shift" %}
12743   ins_encode %{
12744     __ rorq($dst$$Register);
12745   %}
12746   ins_pipe(ialu_reg_reg);
12747 %}
12748 
12749 // Rotate Right by variable
12750 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12751 %{
12752   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12753   match(Set dst (RotateRight src shift));
12754   effect(KILL cr);
12755 
12756   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12757   ins_encode %{
12758     __ erorq($dst$$Register, $src$$Register, false);
12759   %}
12760   ins_pipe(ialu_reg_reg);
12761 %}
12762 
12763 //----------------------------- CompressBits/ExpandBits ------------------------
12764 
12765 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12766   predicate(n->bottom_type()->isa_long());
12767   match(Set dst (CompressBits src mask));
12768   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12769   ins_encode %{
12770     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12771   %}
12772   ins_pipe( pipe_slow );
12773 %}
12774 
12775 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12776   predicate(n->bottom_type()->isa_long());
12777   match(Set dst (ExpandBits src mask));
12778   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12779   ins_encode %{
12780     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12781   %}
12782   ins_pipe( pipe_slow );
12783 %}
12784 
12785 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12786   predicate(n->bottom_type()->isa_long());
12787   match(Set dst (CompressBits src (LoadL mask)));
12788   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12789   ins_encode %{
12790     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12791   %}
12792   ins_pipe( pipe_slow );
12793 %}
12794 
12795 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12796   predicate(n->bottom_type()->isa_long());
12797   match(Set dst (ExpandBits src (LoadL mask)));
12798   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12799   ins_encode %{
12800     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12801   %}
12802   ins_pipe( pipe_slow );
12803 %}
12804 
12805 
12806 // Logical Instructions
12807 
12808 // Integer Logical Instructions
12809 
12810 // And Instructions
12811 // And Register with Register
12812 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12813 %{
12814   predicate(!UseAPX);
12815   match(Set dst (AndI dst src));
12816   effect(KILL cr);
12817   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12818 
12819   format %{ "andl    $dst, $src\t# int" %}
12820   ins_encode %{
12821     __ andl($dst$$Register, $src$$Register);
12822   %}
12823   ins_pipe(ialu_reg_reg);
12824 %}
12825 
12826 // And Register with Register using New Data Destination (NDD)
12827 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12828 %{
12829   predicate(UseAPX);
12830   match(Set dst (AndI src1 src2));
12831   effect(KILL cr);
12832   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12833 
12834   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12835   ins_encode %{
12836     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12837 
12838   %}
12839   ins_pipe(ialu_reg_reg);
12840 %}
12841 
12842 // And Register with Immediate 255
12843 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12844 %{
12845   match(Set dst (AndI src mask));
12846 
12847   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12848   ins_encode %{
12849     __ movzbl($dst$$Register, $src$$Register);
12850   %}
12851   ins_pipe(ialu_reg);
12852 %}
12853 
12854 // And Register with Immediate 255 and promote to long
12855 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12856 %{
12857   match(Set dst (ConvI2L (AndI src mask)));
12858 
12859   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12860   ins_encode %{
12861     __ movzbl($dst$$Register, $src$$Register);
12862   %}
12863   ins_pipe(ialu_reg);
12864 %}
12865 
12866 // And Register with Immediate 65535
12867 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12868 %{
12869   match(Set dst (AndI src mask));
12870 
12871   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12872   ins_encode %{
12873     __ movzwl($dst$$Register, $src$$Register);
12874   %}
12875   ins_pipe(ialu_reg);
12876 %}
12877 
12878 // And Register with Immediate 65535 and promote to long
12879 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12880 %{
12881   match(Set dst (ConvI2L (AndI src mask)));
12882 
12883   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12884   ins_encode %{
12885     __ movzwl($dst$$Register, $src$$Register);
12886   %}
12887   ins_pipe(ialu_reg);
12888 %}
12889 
12890 // Can skip int2long conversions after AND with small bitmask
12891 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12892 %{
12893   predicate(VM_Version::supports_bmi2());
12894   ins_cost(125);
12895   effect(TEMP tmp, KILL cr);
12896   match(Set dst (ConvI2L (AndI src mask)));
12897   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12898   ins_encode %{
12899     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12900     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12901   %}
12902   ins_pipe(ialu_reg_reg);
12903 %}
12904 
12905 // And Register with Immediate
12906 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12907 %{
12908   predicate(!UseAPX);
12909   match(Set dst (AndI dst src));
12910   effect(KILL cr);
12911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12912 
12913   format %{ "andl    $dst, $src\t# int" %}
12914   ins_encode %{
12915     __ andl($dst$$Register, $src$$constant);
12916   %}
12917   ins_pipe(ialu_reg);
12918 %}
12919 
12920 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12921 %{
12922   predicate(UseAPX);
12923   match(Set dst (AndI src1 src2));
12924   effect(KILL cr);
12925   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12926 
12927   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12928   ins_encode %{
12929     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12930   %}
12931   ins_pipe(ialu_reg);
12932 %}
12933 
12934 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12935 %{
12936   predicate(UseAPX);
12937   match(Set dst (AndI (LoadI src1) src2));
12938   effect(KILL cr);
12939   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12940 
12941   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12942   ins_encode %{
12943     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12944   %}
12945   ins_pipe(ialu_reg);
12946 %}
12947 
12948 // And Register with Memory
12949 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12950 %{
12951   predicate(!UseAPX);
12952   match(Set dst (AndI dst (LoadI src)));
12953   effect(KILL cr);
12954   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12955 
12956   ins_cost(150);
12957   format %{ "andl    $dst, $src\t# int" %}
12958   ins_encode %{
12959     __ andl($dst$$Register, $src$$Address);
12960   %}
12961   ins_pipe(ialu_reg_mem);
12962 %}
12963 
12964 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12965 %{
12966   predicate(UseAPX);
12967   match(Set dst (AndI src1 (LoadI src2)));
12968   effect(KILL cr);
12969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12970 
12971   ins_cost(150);
12972   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12973   ins_encode %{
12974     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12975   %}
12976   ins_pipe(ialu_reg_mem);
12977 %}
12978 
12979 // And Memory with Register
12980 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12981 %{
12982   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12983   effect(KILL cr);
12984   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12985 
12986   ins_cost(150);
12987   format %{ "andb    $dst, $src\t# byte" %}
12988   ins_encode %{
12989     __ andb($dst$$Address, $src$$Register);
12990   %}
12991   ins_pipe(ialu_mem_reg);
12992 %}
12993 
12994 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12995 %{
12996   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12997   effect(KILL cr);
12998   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12999 
13000   ins_cost(150);
13001   format %{ "andl    $dst, $src\t# int" %}
13002   ins_encode %{
13003     __ andl($dst$$Address, $src$$Register);
13004   %}
13005   ins_pipe(ialu_mem_reg);
13006 %}
13007 
13008 // And Memory with Immediate
13009 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13010 %{
13011   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13012   effect(KILL cr);
13013   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13014 
13015   ins_cost(125);
13016   format %{ "andl    $dst, $src\t# int" %}
13017   ins_encode %{
13018     __ andl($dst$$Address, $src$$constant);
13019   %}
13020   ins_pipe(ialu_mem_imm);
13021 %}
13022 
13023 // BMI1 instructions
13024 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13025   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13026   predicate(UseBMI1Instructions);
13027   effect(KILL cr);
13028   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13029 
13030   ins_cost(125);
13031   format %{ "andnl  $dst, $src1, $src2" %}
13032 
13033   ins_encode %{
13034     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13035   %}
13036   ins_pipe(ialu_reg_mem);
13037 %}
13038 
13039 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13040   match(Set dst (AndI (XorI src1 minus_1) src2));
13041   predicate(UseBMI1Instructions);
13042   effect(KILL cr);
13043   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13044 
13045   format %{ "andnl  $dst, $src1, $src2" %}
13046 
13047   ins_encode %{
13048     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13049   %}
13050   ins_pipe(ialu_reg);
13051 %}
13052 
13053 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13054   match(Set dst (AndI (SubI imm_zero src) src));
13055   predicate(UseBMI1Instructions);
13056   effect(KILL cr);
13057   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13058 
13059   format %{ "blsil  $dst, $src" %}
13060 
13061   ins_encode %{
13062     __ blsil($dst$$Register, $src$$Register);
13063   %}
13064   ins_pipe(ialu_reg);
13065 %}
13066 
13067 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13068   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13069   predicate(UseBMI1Instructions);
13070   effect(KILL cr);
13071   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13072 
13073   ins_cost(125);
13074   format %{ "blsil  $dst, $src" %}
13075 
13076   ins_encode %{
13077     __ blsil($dst$$Register, $src$$Address);
13078   %}
13079   ins_pipe(ialu_reg_mem);
13080 %}
13081 
13082 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13083 %{
13084   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13085   predicate(UseBMI1Instructions);
13086   effect(KILL cr);
13087   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13088 
13089   ins_cost(125);
13090   format %{ "blsmskl $dst, $src" %}
13091 
13092   ins_encode %{
13093     __ blsmskl($dst$$Register, $src$$Address);
13094   %}
13095   ins_pipe(ialu_reg_mem);
13096 %}
13097 
13098 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13099 %{
13100   match(Set dst (XorI (AddI src minus_1) src));
13101   predicate(UseBMI1Instructions);
13102   effect(KILL cr);
13103   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13104 
13105   format %{ "blsmskl $dst, $src" %}
13106 
13107   ins_encode %{
13108     __ blsmskl($dst$$Register, $src$$Register);
13109   %}
13110 
13111   ins_pipe(ialu_reg);
13112 %}
13113 
13114 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13115 %{
13116   match(Set dst (AndI (AddI src minus_1) src) );
13117   predicate(UseBMI1Instructions);
13118   effect(KILL cr);
13119   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13120 
13121   format %{ "blsrl  $dst, $src" %}
13122 
13123   ins_encode %{
13124     __ blsrl($dst$$Register, $src$$Register);
13125   %}
13126 
13127   ins_pipe(ialu_reg_mem);
13128 %}
13129 
13130 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13131 %{
13132   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13133   predicate(UseBMI1Instructions);
13134   effect(KILL cr);
13135   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13136 
13137   ins_cost(125);
13138   format %{ "blsrl  $dst, $src" %}
13139 
13140   ins_encode %{
13141     __ blsrl($dst$$Register, $src$$Address);
13142   %}
13143 
13144   ins_pipe(ialu_reg);
13145 %}
13146 
13147 // Or Instructions
13148 // Or Register with Register
13149 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13150 %{
13151   predicate(!UseAPX);
13152   match(Set dst (OrI dst src));
13153   effect(KILL cr);
13154   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155 
13156   format %{ "orl     $dst, $src\t# int" %}
13157   ins_encode %{
13158     __ orl($dst$$Register, $src$$Register);
13159   %}
13160   ins_pipe(ialu_reg_reg);
13161 %}
13162 
13163 // Or Register with Register using New Data Destination (NDD)
13164 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13165 %{
13166   predicate(UseAPX);
13167   match(Set dst (OrI src1 src2));
13168   effect(KILL cr);
13169   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13170 
13171   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13172   ins_encode %{
13173     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13174   %}
13175   ins_pipe(ialu_reg_reg);
13176 %}
13177 
13178 // Or Register with Immediate
13179 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13180 %{
13181   predicate(!UseAPX);
13182   match(Set dst (OrI dst src));
13183   effect(KILL cr);
13184   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13185 
13186   format %{ "orl     $dst, $src\t# int" %}
13187   ins_encode %{
13188     __ orl($dst$$Register, $src$$constant);
13189   %}
13190   ins_pipe(ialu_reg);
13191 %}
13192 
13193 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13194 %{
13195   predicate(UseAPX);
13196   match(Set dst (OrI src1 src2));
13197   effect(KILL cr);
13198   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13199 
13200   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13201   ins_encode %{
13202     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13203   %}
13204   ins_pipe(ialu_reg);
13205 %}
13206 
13207 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13208 %{
13209   predicate(UseAPX);
13210   match(Set dst (OrI src1 src2));
13211   effect(KILL cr);
13212   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13213 
13214   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13215   ins_encode %{
13216     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13217   %}
13218   ins_pipe(ialu_reg);
13219 %}
13220 
13221 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13222 %{
13223   predicate(UseAPX);
13224   match(Set dst (OrI (LoadI src1) src2));
13225   effect(KILL cr);
13226   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13227 
13228   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13229   ins_encode %{
13230     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13231   %}
13232   ins_pipe(ialu_reg);
13233 %}
13234 
13235 // Or Register with Memory
13236 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13237 %{
13238   predicate(!UseAPX);
13239   match(Set dst (OrI dst (LoadI src)));
13240   effect(KILL cr);
13241   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13242 
13243   ins_cost(150);
13244   format %{ "orl     $dst, $src\t# int" %}
13245   ins_encode %{
13246     __ orl($dst$$Register, $src$$Address);
13247   %}
13248   ins_pipe(ialu_reg_mem);
13249 %}
13250 
13251 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13252 %{
13253   predicate(UseAPX);
13254   match(Set dst (OrI src1 (LoadI src2)));
13255   effect(KILL cr);
13256   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13257 
13258   ins_cost(150);
13259   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13260   ins_encode %{
13261     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13262   %}
13263   ins_pipe(ialu_reg_mem);
13264 %}
13265 
13266 // Or Memory with Register
13267 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13268 %{
13269   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13270   effect(KILL cr);
13271   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13272 
13273   ins_cost(150);
13274   format %{ "orb    $dst, $src\t# byte" %}
13275   ins_encode %{
13276     __ orb($dst$$Address, $src$$Register);
13277   %}
13278   ins_pipe(ialu_mem_reg);
13279 %}
13280 
13281 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13282 %{
13283   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13284   effect(KILL cr);
13285   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13286 
13287   ins_cost(150);
13288   format %{ "orl     $dst, $src\t# int" %}
13289   ins_encode %{
13290     __ orl($dst$$Address, $src$$Register);
13291   %}
13292   ins_pipe(ialu_mem_reg);
13293 %}
13294 
13295 // Or Memory with Immediate
13296 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13297 %{
13298   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13299   effect(KILL cr);
13300   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13301 
13302   ins_cost(125);
13303   format %{ "orl     $dst, $src\t# int" %}
13304   ins_encode %{
13305     __ orl($dst$$Address, $src$$constant);
13306   %}
13307   ins_pipe(ialu_mem_imm);
13308 %}
13309 
13310 // Xor Instructions
13311 // Xor Register with Register
13312 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13313 %{
13314   predicate(!UseAPX);
13315   match(Set dst (XorI dst src));
13316   effect(KILL cr);
13317   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318 
13319   format %{ "xorl    $dst, $src\t# int" %}
13320   ins_encode %{
13321     __ xorl($dst$$Register, $src$$Register);
13322   %}
13323   ins_pipe(ialu_reg_reg);
13324 %}
13325 
13326 // Xor Register with Register using New Data Destination (NDD)
13327 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13328 %{
13329   predicate(UseAPX);
13330   match(Set dst (XorI src1 src2));
13331   effect(KILL cr);
13332   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13333 
13334   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13335   ins_encode %{
13336     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13337   %}
13338   ins_pipe(ialu_reg_reg);
13339 %}
13340 
13341 // Xor Register with Immediate -1
13342 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13343 %{
13344   predicate(!UseAPX);
13345   match(Set dst (XorI dst imm));
13346 
13347   format %{ "notl    $dst" %}
13348   ins_encode %{
13349      __ notl($dst$$Register);
13350   %}
13351   ins_pipe(ialu_reg);
13352 %}
13353 
13354 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13355 %{
13356   match(Set dst (XorI src imm));
13357   predicate(UseAPX);
13358 
13359   format %{ "enotl    $dst, $src" %}
13360   ins_encode %{
13361      __ enotl($dst$$Register, $src$$Register);
13362   %}
13363   ins_pipe(ialu_reg);
13364 %}
13365 
13366 // Xor Register with Immediate
13367 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13368 %{
13369   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13370   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13371   match(Set dst (XorI dst src));
13372   effect(KILL cr);
13373   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13374 
13375   format %{ "xorl    $dst, $src\t# int" %}
13376   ins_encode %{
13377     __ xorl($dst$$Register, $src$$constant);
13378   %}
13379   ins_pipe(ialu_reg);
13380 %}
13381 
13382 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13383 %{
13384   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13385   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13386   match(Set dst (XorI src1 src2));
13387   effect(KILL cr);
13388   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13389 
13390   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13391   ins_encode %{
13392     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13393   %}
13394   ins_pipe(ialu_reg);
13395 %}
13396 
13397 // Xor Memory with Immediate
13398 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13399 %{
13400   predicate(UseAPX);
13401   match(Set dst (XorI (LoadI src1) src2));
13402   effect(KILL cr);
13403   ins_cost(150);
13404   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405 
13406   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13407   ins_encode %{
13408     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13409   %}
13410   ins_pipe(ialu_reg);
13411 %}
13412 
13413 // Xor Register with Memory
13414 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13415 %{
13416   predicate(!UseAPX);
13417   match(Set dst (XorI dst (LoadI src)));
13418   effect(KILL cr);
13419   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13420 
13421   ins_cost(150);
13422   format %{ "xorl    $dst, $src\t# int" %}
13423   ins_encode %{
13424     __ xorl($dst$$Register, $src$$Address);
13425   %}
13426   ins_pipe(ialu_reg_mem);
13427 %}
13428 
13429 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13430 %{
13431   predicate(UseAPX);
13432   match(Set dst (XorI src1 (LoadI src2)));
13433   effect(KILL cr);
13434   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435 
13436   ins_cost(150);
13437   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13438   ins_encode %{
13439     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13440   %}
13441   ins_pipe(ialu_reg_mem);
13442 %}
13443 
13444 // Xor Memory with Register
13445 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13446 %{
13447   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13448   effect(KILL cr);
13449   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13450 
13451   ins_cost(150);
13452   format %{ "xorb    $dst, $src\t# byte" %}
13453   ins_encode %{
13454     __ xorb($dst$$Address, $src$$Register);
13455   %}
13456   ins_pipe(ialu_mem_reg);
13457 %}
13458 
13459 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13460 %{
13461   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13462   effect(KILL cr);
13463   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13464 
13465   ins_cost(150);
13466   format %{ "xorl    $dst, $src\t# int" %}
13467   ins_encode %{
13468     __ xorl($dst$$Address, $src$$Register);
13469   %}
13470   ins_pipe(ialu_mem_reg);
13471 %}
13472 
13473 // Xor Memory with Immediate
13474 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13475 %{
13476   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13477   effect(KILL cr);
13478   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13479 
13480   ins_cost(125);
13481   format %{ "xorl    $dst, $src\t# int" %}
13482   ins_encode %{
13483     __ xorl($dst$$Address, $src$$constant);
13484   %}
13485   ins_pipe(ialu_mem_imm);
13486 %}
13487 
13488 
13489 // Long Logical Instructions
13490 
13491 // And Instructions
13492 // And Register with Register
13493 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13494 %{
13495   predicate(!UseAPX);
13496   match(Set dst (AndL dst src));
13497   effect(KILL cr);
13498   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13499 
13500   format %{ "andq    $dst, $src\t# long" %}
13501   ins_encode %{
13502     __ andq($dst$$Register, $src$$Register);
13503   %}
13504   ins_pipe(ialu_reg_reg);
13505 %}
13506 
13507 // And Register with Register using New Data Destination (NDD)
13508 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13509 %{
13510   predicate(UseAPX);
13511   match(Set dst (AndL src1 src2));
13512   effect(KILL cr);
13513   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13514 
13515   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13516   ins_encode %{
13517     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13518 
13519   %}
13520   ins_pipe(ialu_reg_reg);
13521 %}
13522 
13523 // And Register with Immediate 255
13524 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13525 %{
13526   match(Set dst (AndL src mask));
13527 
13528   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13529   ins_encode %{
13530     // movzbl zeroes out the upper 32-bit and does not need REX.W
13531     __ movzbl($dst$$Register, $src$$Register);
13532   %}
13533   ins_pipe(ialu_reg);
13534 %}
13535 
13536 // And Register with Immediate 65535
13537 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13538 %{
13539   match(Set dst (AndL src mask));
13540 
13541   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13542   ins_encode %{
13543     // movzwl zeroes out the upper 32-bit and does not need REX.W
13544     __ movzwl($dst$$Register, $src$$Register);
13545   %}
13546   ins_pipe(ialu_reg);
13547 %}
13548 
13549 // And Register with Immediate
13550 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13551 %{
13552   predicate(!UseAPX);
13553   match(Set dst (AndL dst src));
13554   effect(KILL cr);
13555   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13556 
13557   format %{ "andq    $dst, $src\t# long" %}
13558   ins_encode %{
13559     __ andq($dst$$Register, $src$$constant);
13560   %}
13561   ins_pipe(ialu_reg);
13562 %}
13563 
13564 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13565 %{
13566   predicate(UseAPX);
13567   match(Set dst (AndL src1 src2));
13568   effect(KILL cr);
13569   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13570 
13571   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13572   ins_encode %{
13573     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13574   %}
13575   ins_pipe(ialu_reg);
13576 %}
13577 
13578 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13579 %{
13580   predicate(UseAPX);
13581   match(Set dst (AndL (LoadL src1) src2));
13582   effect(KILL cr);
13583   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13584 
13585   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13586   ins_encode %{
13587     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13588   %}
13589   ins_pipe(ialu_reg);
13590 %}
13591 
13592 // And Register with Memory
13593 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13594 %{
13595   predicate(!UseAPX);
13596   match(Set dst (AndL dst (LoadL src)));
13597   effect(KILL cr);
13598   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599 
13600   ins_cost(150);
13601   format %{ "andq    $dst, $src\t# long" %}
13602   ins_encode %{
13603     __ andq($dst$$Register, $src$$Address);
13604   %}
13605   ins_pipe(ialu_reg_mem);
13606 %}
13607 
13608 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13609 %{
13610   predicate(UseAPX);
13611   match(Set dst (AndL src1 (LoadL src2)));
13612   effect(KILL cr);
13613   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13614 
13615   ins_cost(150);
13616   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13617   ins_encode %{
13618     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13619   %}
13620   ins_pipe(ialu_reg_mem);
13621 %}
13622 
13623 // And Memory with Register
13624 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13625 %{
13626   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13627   effect(KILL cr);
13628   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13629 
13630   ins_cost(150);
13631   format %{ "andq    $dst, $src\t# long" %}
13632   ins_encode %{
13633     __ andq($dst$$Address, $src$$Register);
13634   %}
13635   ins_pipe(ialu_mem_reg);
13636 %}
13637 
13638 // And Memory with Immediate
13639 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13640 %{
13641   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13642   effect(KILL cr);
13643   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13644 
13645   ins_cost(125);
13646   format %{ "andq    $dst, $src\t# long" %}
13647   ins_encode %{
13648     __ andq($dst$$Address, $src$$constant);
13649   %}
13650   ins_pipe(ialu_mem_imm);
13651 %}
13652 
13653 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13654 %{
13655   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13656   // because AND/OR works well enough for 8/32-bit values.
13657   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13658 
13659   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13660   effect(KILL cr);
13661 
13662   ins_cost(125);
13663   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13664   ins_encode %{
13665     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13666   %}
13667   ins_pipe(ialu_mem_imm);
13668 %}
13669 
13670 // BMI1 instructions
13671 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13672   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13673   predicate(UseBMI1Instructions);
13674   effect(KILL cr);
13675   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13676 
13677   ins_cost(125);
13678   format %{ "andnq  $dst, $src1, $src2" %}
13679 
13680   ins_encode %{
13681     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13682   %}
13683   ins_pipe(ialu_reg_mem);
13684 %}
13685 
13686 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13687   match(Set dst (AndL (XorL src1 minus_1) src2));
13688   predicate(UseBMI1Instructions);
13689   effect(KILL cr);
13690   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691 
13692   format %{ "andnq  $dst, $src1, $src2" %}
13693 
13694   ins_encode %{
13695   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13696   %}
13697   ins_pipe(ialu_reg_mem);
13698 %}
13699 
13700 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13701   match(Set dst (AndL (SubL imm_zero src) src));
13702   predicate(UseBMI1Instructions);
13703   effect(KILL cr);
13704   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13705 
13706   format %{ "blsiq  $dst, $src" %}
13707 
13708   ins_encode %{
13709     __ blsiq($dst$$Register, $src$$Register);
13710   %}
13711   ins_pipe(ialu_reg);
13712 %}
13713 
13714 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13715   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13716   predicate(UseBMI1Instructions);
13717   effect(KILL cr);
13718   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13719 
13720   ins_cost(125);
13721   format %{ "blsiq  $dst, $src" %}
13722 
13723   ins_encode %{
13724     __ blsiq($dst$$Register, $src$$Address);
13725   %}
13726   ins_pipe(ialu_reg_mem);
13727 %}
13728 
13729 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13730 %{
13731   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13732   predicate(UseBMI1Instructions);
13733   effect(KILL cr);
13734   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13735 
13736   ins_cost(125);
13737   format %{ "blsmskq $dst, $src" %}
13738 
13739   ins_encode %{
13740     __ blsmskq($dst$$Register, $src$$Address);
13741   %}
13742   ins_pipe(ialu_reg_mem);
13743 %}
13744 
13745 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13746 %{
13747   match(Set dst (XorL (AddL src minus_1) src));
13748   predicate(UseBMI1Instructions);
13749   effect(KILL cr);
13750   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13751 
13752   format %{ "blsmskq $dst, $src" %}
13753 
13754   ins_encode %{
13755     __ blsmskq($dst$$Register, $src$$Register);
13756   %}
13757 
13758   ins_pipe(ialu_reg);
13759 %}
13760 
13761 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13762 %{
13763   match(Set dst (AndL (AddL src minus_1) src) );
13764   predicate(UseBMI1Instructions);
13765   effect(KILL cr);
13766   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13767 
13768   format %{ "blsrq  $dst, $src" %}
13769 
13770   ins_encode %{
13771     __ blsrq($dst$$Register, $src$$Register);
13772   %}
13773 
13774   ins_pipe(ialu_reg);
13775 %}
13776 
13777 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13778 %{
13779   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13780   predicate(UseBMI1Instructions);
13781   effect(KILL cr);
13782   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13783 
13784   ins_cost(125);
13785   format %{ "blsrq  $dst, $src" %}
13786 
13787   ins_encode %{
13788     __ blsrq($dst$$Register, $src$$Address);
13789   %}
13790 
13791   ins_pipe(ialu_reg);
13792 %}
13793 
13794 // Or Instructions
13795 // Or Register with Register
13796 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13797 %{
13798   predicate(!UseAPX);
13799   match(Set dst (OrL dst src));
13800   effect(KILL cr);
13801   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13802 
13803   format %{ "orq     $dst, $src\t# long" %}
13804   ins_encode %{
13805     __ orq($dst$$Register, $src$$Register);
13806   %}
13807   ins_pipe(ialu_reg_reg);
13808 %}
13809 
13810 // Or Register with Register using New Data Destination (NDD)
13811 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13812 %{
13813   predicate(UseAPX);
13814   match(Set dst (OrL src1 src2));
13815   effect(KILL cr);
13816   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13817 
13818   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13819   ins_encode %{
13820     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13821 
13822   %}
13823   ins_pipe(ialu_reg_reg);
13824 %}
13825 
13826 // Use any_RegP to match R15 (TLS register) without spilling.
13827 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13828   match(Set dst (OrL dst (CastP2X src)));
13829   effect(KILL cr);
13830   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13831 
13832   format %{ "orq     $dst, $src\t# long" %}
13833   ins_encode %{
13834     __ orq($dst$$Register, $src$$Register);
13835   %}
13836   ins_pipe(ialu_reg_reg);
13837 %}
13838 
13839 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13840   match(Set dst (OrL src1 (CastP2X src2)));
13841   effect(KILL cr);
13842   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13843 
13844   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13845   ins_encode %{
13846     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13847   %}
13848   ins_pipe(ialu_reg_reg);
13849 %}
13850 
13851 // Or Register with Immediate
13852 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13853 %{
13854   predicate(!UseAPX);
13855   match(Set dst (OrL dst src));
13856   effect(KILL cr);
13857   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13858 
13859   format %{ "orq     $dst, $src\t# long" %}
13860   ins_encode %{
13861     __ orq($dst$$Register, $src$$constant);
13862   %}
13863   ins_pipe(ialu_reg);
13864 %}
13865 
13866 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13867 %{
13868   predicate(UseAPX);
13869   match(Set dst (OrL src1 src2));
13870   effect(KILL cr);
13871   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13872 
13873   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13874   ins_encode %{
13875     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13876   %}
13877   ins_pipe(ialu_reg);
13878 %}
13879 
13880 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13881 %{
13882   predicate(UseAPX);
13883   match(Set dst (OrL src1 src2));
13884   effect(KILL cr);
13885   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13886 
13887   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13888   ins_encode %{
13889     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13890   %}
13891   ins_pipe(ialu_reg);
13892 %}
13893 
13894 // Or Memory with Immediate
13895 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13896 %{
13897   predicate(UseAPX);
13898   match(Set dst (OrL (LoadL src1) src2));
13899   effect(KILL cr);
13900   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901 
13902   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13903   ins_encode %{
13904     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13905   %}
13906   ins_pipe(ialu_reg);
13907 %}
13908 
13909 // Or Register with Memory
13910 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13911 %{
13912   predicate(!UseAPX);
13913   match(Set dst (OrL dst (LoadL src)));
13914   effect(KILL cr);
13915   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13916 
13917   ins_cost(150);
13918   format %{ "orq     $dst, $src\t# long" %}
13919   ins_encode %{
13920     __ orq($dst$$Register, $src$$Address);
13921   %}
13922   ins_pipe(ialu_reg_mem);
13923 %}
13924 
13925 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13926 %{
13927   predicate(UseAPX);
13928   match(Set dst (OrL src1 (LoadL src2)));
13929   effect(KILL cr);
13930   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13931 
13932   ins_cost(150);
13933   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13934   ins_encode %{
13935     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13936   %}
13937   ins_pipe(ialu_reg_mem);
13938 %}
13939 
13940 // Or Memory with Register
13941 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13942 %{
13943   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13944   effect(KILL cr);
13945   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13946 
13947   ins_cost(150);
13948   format %{ "orq     $dst, $src\t# long" %}
13949   ins_encode %{
13950     __ orq($dst$$Address, $src$$Register);
13951   %}
13952   ins_pipe(ialu_mem_reg);
13953 %}
13954 
13955 // Or Memory with Immediate
13956 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13957 %{
13958   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13959   effect(KILL cr);
13960   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13961 
13962   ins_cost(125);
13963   format %{ "orq     $dst, $src\t# long" %}
13964   ins_encode %{
13965     __ orq($dst$$Address, $src$$constant);
13966   %}
13967   ins_pipe(ialu_mem_imm);
13968 %}
13969 
13970 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13971 %{
13972   // con should be a pure 64-bit power of 2 immediate
13973   // because AND/OR works well enough for 8/32-bit values.
13974   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13975 
13976   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13977   effect(KILL cr);
13978 
13979   ins_cost(125);
13980   format %{ "btsq    $dst, log2($con)\t# long" %}
13981   ins_encode %{
13982     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13983   %}
13984   ins_pipe(ialu_mem_imm);
13985 %}
13986 
13987 // Xor Instructions
13988 // Xor Register with Register
13989 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13990 %{
13991   predicate(!UseAPX);
13992   match(Set dst (XorL dst src));
13993   effect(KILL cr);
13994   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13995 
13996   format %{ "xorq    $dst, $src\t# long" %}
13997   ins_encode %{
13998     __ xorq($dst$$Register, $src$$Register);
13999   %}
14000   ins_pipe(ialu_reg_reg);
14001 %}
14002 
14003 // Xor Register with Register using New Data Destination (NDD)
14004 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14005 %{
14006   predicate(UseAPX);
14007   match(Set dst (XorL src1 src2));
14008   effect(KILL cr);
14009   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14010 
14011   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14012   ins_encode %{
14013     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14014   %}
14015   ins_pipe(ialu_reg_reg);
14016 %}
14017 
14018 // Xor Register with Immediate -1
14019 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14020 %{
14021   predicate(!UseAPX);
14022   match(Set dst (XorL dst imm));
14023 
14024   format %{ "notq   $dst" %}
14025   ins_encode %{
14026      __ notq($dst$$Register);
14027   %}
14028   ins_pipe(ialu_reg);
14029 %}
14030 
14031 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14032 %{
14033   predicate(UseAPX);
14034   match(Set dst (XorL src imm));
14035 
14036   format %{ "enotq   $dst, $src" %}
14037   ins_encode %{
14038     __ enotq($dst$$Register, $src$$Register);
14039   %}
14040   ins_pipe(ialu_reg);
14041 %}
14042 
14043 // Xor Register with Immediate
14044 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14045 %{
14046   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14047   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14048   match(Set dst (XorL dst src));
14049   effect(KILL cr);
14050   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14051 
14052   format %{ "xorq    $dst, $src\t# long" %}
14053   ins_encode %{
14054     __ xorq($dst$$Register, $src$$constant);
14055   %}
14056   ins_pipe(ialu_reg);
14057 %}
14058 
14059 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14060 %{
14061   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14062   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14063   match(Set dst (XorL src1 src2));
14064   effect(KILL cr);
14065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066 
14067   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14068   ins_encode %{
14069     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14070   %}
14071   ins_pipe(ialu_reg);
14072 %}
14073 
14074 // Xor Memory with Immediate
14075 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14076 %{
14077   predicate(UseAPX);
14078   match(Set dst (XorL (LoadL src1) src2));
14079   effect(KILL cr);
14080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081   ins_cost(150);
14082 
14083   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14084   ins_encode %{
14085     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14086   %}
14087   ins_pipe(ialu_reg);
14088 %}
14089 
14090 // Xor Register with Memory
14091 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14092 %{
14093   predicate(!UseAPX);
14094   match(Set dst (XorL dst (LoadL src)));
14095   effect(KILL cr);
14096   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14097 
14098   ins_cost(150);
14099   format %{ "xorq    $dst, $src\t# long" %}
14100   ins_encode %{
14101     __ xorq($dst$$Register, $src$$Address);
14102   %}
14103   ins_pipe(ialu_reg_mem);
14104 %}
14105 
14106 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14107 %{
14108   predicate(UseAPX);
14109   match(Set dst (XorL src1 (LoadL src2)));
14110   effect(KILL cr);
14111   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14112 
14113   ins_cost(150);
14114   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14115   ins_encode %{
14116     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14117   %}
14118   ins_pipe(ialu_reg_mem);
14119 %}
14120 
14121 // Xor Memory with Register
14122 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14123 %{
14124   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14125   effect(KILL cr);
14126   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14127 
14128   ins_cost(150);
14129   format %{ "xorq    $dst, $src\t# long" %}
14130   ins_encode %{
14131     __ xorq($dst$$Address, $src$$Register);
14132   %}
14133   ins_pipe(ialu_mem_reg);
14134 %}
14135 
14136 // Xor Memory with Immediate
14137 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14138 %{
14139   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14140   effect(KILL cr);
14141   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14142 
14143   ins_cost(125);
14144   format %{ "xorq    $dst, $src\t# long" %}
14145   ins_encode %{
14146     __ xorq($dst$$Address, $src$$constant);
14147   %}
14148   ins_pipe(ialu_mem_imm);
14149 %}
14150 
14151 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14152 %{
14153   match(Set dst (CmpLTMask p q));
14154   effect(KILL cr);
14155 
14156   ins_cost(400);
14157   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14158             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14159             "negl    $dst" %}
14160   ins_encode %{
14161     __ cmpl($p$$Register, $q$$Register);
14162     __ setcc(Assembler::less, $dst$$Register);
14163     __ negl($dst$$Register);
14164   %}
14165   ins_pipe(pipe_slow);
14166 %}
14167 
14168 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14169 %{
14170   match(Set dst (CmpLTMask dst zero));
14171   effect(KILL cr);
14172 
14173   ins_cost(100);
14174   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14175   ins_encode %{
14176     __ sarl($dst$$Register, 31);
14177   %}
14178   ins_pipe(ialu_reg);
14179 %}
14180 
14181 /* Better to save a register than avoid a branch */
14182 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14183 %{
14184   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14185   effect(KILL cr);
14186   ins_cost(300);
14187   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14188             "jge     done\n\t"
14189             "addl    $p,$y\n"
14190             "done:   " %}
14191   ins_encode %{
14192     Register Rp = $p$$Register;
14193     Register Rq = $q$$Register;
14194     Register Ry = $y$$Register;
14195     Label done;
14196     __ subl(Rp, Rq);
14197     __ jccb(Assembler::greaterEqual, done);
14198     __ addl(Rp, Ry);
14199     __ bind(done);
14200   %}
14201   ins_pipe(pipe_cmplt);
14202 %}
14203 
14204 /* Better to save a register than avoid a branch */
14205 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14206 %{
14207   match(Set y (AndI (CmpLTMask p q) y));
14208   effect(KILL cr);
14209 
14210   ins_cost(300);
14211 
14212   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14213             "jlt     done\n\t"
14214             "xorl    $y, $y\n"
14215             "done:   " %}
14216   ins_encode %{
14217     Register Rp = $p$$Register;
14218     Register Rq = $q$$Register;
14219     Register Ry = $y$$Register;
14220     Label done;
14221     __ cmpl(Rp, Rq);
14222     __ jccb(Assembler::less, done);
14223     __ xorl(Ry, Ry);
14224     __ bind(done);
14225   %}
14226   ins_pipe(pipe_cmplt);
14227 %}
14228 
14229 
14230 //---------- FP Instructions------------------------------------------------
14231 
14232 // Really expensive, avoid
14233 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14234 %{
14235   match(Set cr (CmpF src1 src2));
14236 
14237   ins_cost(500);
14238   format %{ "ucomiss $src1, $src2\n\t"
14239             "jnp,s   exit\n\t"
14240             "pushfq\t# saw NaN, set CF\n\t"
14241             "andq    [rsp], #0xffffff2b\n\t"
14242             "popfq\n"
14243     "exit:" %}
14244   ins_encode %{
14245     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14246     emit_cmpfp_fixup(masm);
14247   %}
14248   ins_pipe(pipe_slow);
14249 %}
14250 
14251 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14252   match(Set cr (CmpF src1 src2));
14253 
14254   ins_cost(100);
14255   format %{ "ucomiss $src1, $src2" %}
14256   ins_encode %{
14257     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14258   %}
14259   ins_pipe(pipe_slow);
14260 %}
14261 
14262 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14263   match(Set cr (CmpF src1 (LoadF src2)));
14264 
14265   ins_cost(100);
14266   format %{ "ucomiss $src1, $src2" %}
14267   ins_encode %{
14268     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14269   %}
14270   ins_pipe(pipe_slow);
14271 %}
14272 
14273 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14274   match(Set cr (CmpF src con));
14275   ins_cost(100);
14276   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14277   ins_encode %{
14278     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14279   %}
14280   ins_pipe(pipe_slow);
14281 %}
14282 
14283 // Really expensive, avoid
14284 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14285 %{
14286   match(Set cr (CmpD src1 src2));
14287 
14288   ins_cost(500);
14289   format %{ "ucomisd $src1, $src2\n\t"
14290             "jnp,s   exit\n\t"
14291             "pushfq\t# saw NaN, set CF\n\t"
14292             "andq    [rsp], #0xffffff2b\n\t"
14293             "popfq\n"
14294     "exit:" %}
14295   ins_encode %{
14296     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14297     emit_cmpfp_fixup(masm);
14298   %}
14299   ins_pipe(pipe_slow);
14300 %}
14301 
14302 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14303   match(Set cr (CmpD src1 src2));
14304 
14305   ins_cost(100);
14306   format %{ "ucomisd $src1, $src2 test" %}
14307   ins_encode %{
14308     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14309   %}
14310   ins_pipe(pipe_slow);
14311 %}
14312 
14313 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14314   match(Set cr (CmpD src1 (LoadD src2)));
14315 
14316   ins_cost(100);
14317   format %{ "ucomisd $src1, $src2" %}
14318   ins_encode %{
14319     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14320   %}
14321   ins_pipe(pipe_slow);
14322 %}
14323 
14324 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14325   match(Set cr (CmpD src con));
14326   ins_cost(100);
14327   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14328   ins_encode %{
14329     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14330   %}
14331   ins_pipe(pipe_slow);
14332 %}
14333 
14334 // Compare into -1,0,1
14335 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14336 %{
14337   match(Set dst (CmpF3 src1 src2));
14338   effect(KILL cr);
14339 
14340   ins_cost(275);
14341   format %{ "ucomiss $src1, $src2\n\t"
14342             "movl    $dst, #-1\n\t"
14343             "jp,s    done\n\t"
14344             "jb,s    done\n\t"
14345             "setne   $dst\n\t"
14346             "movzbl  $dst, $dst\n"
14347     "done:" %}
14348   ins_encode %{
14349     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14350     emit_cmpfp3(masm, $dst$$Register);
14351   %}
14352   ins_pipe(pipe_slow);
14353 %}
14354 
14355 // Compare into -1,0,1
14356 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14357 %{
14358   match(Set dst (CmpF3 src1 (LoadF src2)));
14359   effect(KILL cr);
14360 
14361   ins_cost(275);
14362   format %{ "ucomiss $src1, $src2\n\t"
14363             "movl    $dst, #-1\n\t"
14364             "jp,s    done\n\t"
14365             "jb,s    done\n\t"
14366             "setne   $dst\n\t"
14367             "movzbl  $dst, $dst\n"
14368     "done:" %}
14369   ins_encode %{
14370     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14371     emit_cmpfp3(masm, $dst$$Register);
14372   %}
14373   ins_pipe(pipe_slow);
14374 %}
14375 
14376 // Compare into -1,0,1
14377 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14378   match(Set dst (CmpF3 src con));
14379   effect(KILL cr);
14380 
14381   ins_cost(275);
14382   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14383             "movl    $dst, #-1\n\t"
14384             "jp,s    done\n\t"
14385             "jb,s    done\n\t"
14386             "setne   $dst\n\t"
14387             "movzbl  $dst, $dst\n"
14388     "done:" %}
14389   ins_encode %{
14390     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14391     emit_cmpfp3(masm, $dst$$Register);
14392   %}
14393   ins_pipe(pipe_slow);
14394 %}
14395 
14396 // Compare into -1,0,1
14397 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14398 %{
14399   match(Set dst (CmpD3 src1 src2));
14400   effect(KILL cr);
14401 
14402   ins_cost(275);
14403   format %{ "ucomisd $src1, $src2\n\t"
14404             "movl    $dst, #-1\n\t"
14405             "jp,s    done\n\t"
14406             "jb,s    done\n\t"
14407             "setne   $dst\n\t"
14408             "movzbl  $dst, $dst\n"
14409     "done:" %}
14410   ins_encode %{
14411     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14412     emit_cmpfp3(masm, $dst$$Register);
14413   %}
14414   ins_pipe(pipe_slow);
14415 %}
14416 
14417 // Compare into -1,0,1
14418 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14419 %{
14420   match(Set dst (CmpD3 src1 (LoadD src2)));
14421   effect(KILL cr);
14422 
14423   ins_cost(275);
14424   format %{ "ucomisd $src1, $src2\n\t"
14425             "movl    $dst, #-1\n\t"
14426             "jp,s    done\n\t"
14427             "jb,s    done\n\t"
14428             "setne   $dst\n\t"
14429             "movzbl  $dst, $dst\n"
14430     "done:" %}
14431   ins_encode %{
14432     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14433     emit_cmpfp3(masm, $dst$$Register);
14434   %}
14435   ins_pipe(pipe_slow);
14436 %}
14437 
14438 // Compare into -1,0,1
14439 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14440   match(Set dst (CmpD3 src con));
14441   effect(KILL cr);
14442 
14443   ins_cost(275);
14444   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14445             "movl    $dst, #-1\n\t"
14446             "jp,s    done\n\t"
14447             "jb,s    done\n\t"
14448             "setne   $dst\n\t"
14449             "movzbl  $dst, $dst\n"
14450     "done:" %}
14451   ins_encode %{
14452     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14453     emit_cmpfp3(masm, $dst$$Register);
14454   %}
14455   ins_pipe(pipe_slow);
14456 %}
14457 
14458 //----------Arithmetic Conversion Instructions---------------------------------
14459 
14460 instruct convF2D_reg_reg(regD dst, regF src)
14461 %{
14462   match(Set dst (ConvF2D src));
14463 
14464   format %{ "cvtss2sd $dst, $src" %}
14465   ins_encode %{
14466     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14467   %}
14468   ins_pipe(pipe_slow); // XXX
14469 %}
14470 
14471 instruct convF2D_reg_mem(regD dst, memory src)
14472 %{
14473   predicate(UseAVX == 0);
14474   match(Set dst (ConvF2D (LoadF src)));
14475 
14476   format %{ "cvtss2sd $dst, $src" %}
14477   ins_encode %{
14478     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14479   %}
14480   ins_pipe(pipe_slow); // XXX
14481 %}
14482 
14483 instruct convD2F_reg_reg(regF dst, regD src)
14484 %{
14485   match(Set dst (ConvD2F src));
14486 
14487   format %{ "cvtsd2ss $dst, $src" %}
14488   ins_encode %{
14489     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14490   %}
14491   ins_pipe(pipe_slow); // XXX
14492 %}
14493 
14494 instruct convD2F_reg_mem(regF dst, memory src)
14495 %{
14496   predicate(UseAVX == 0);
14497   match(Set dst (ConvD2F (LoadD src)));
14498 
14499   format %{ "cvtsd2ss $dst, $src" %}
14500   ins_encode %{
14501     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14502   %}
14503   ins_pipe(pipe_slow); // XXX
14504 %}
14505 
14506 // XXX do mem variants
14507 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14508 %{
14509   predicate(!VM_Version::supports_avx10_2());
14510   match(Set dst (ConvF2I src));
14511   effect(KILL cr);
14512   format %{ "convert_f2i $dst, $src" %}
14513   ins_encode %{
14514     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14515   %}
14516   ins_pipe(pipe_slow);
14517 %}
14518 
14519 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14520 %{
14521   predicate(VM_Version::supports_avx10_2());
14522   match(Set dst (ConvF2I src));
14523   format %{ "evcvttss2sisl $dst, $src" %}
14524   ins_encode %{
14525     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14526   %}
14527   ins_pipe(pipe_slow);
14528 %}
14529 
14530 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14531 %{
14532   predicate(VM_Version::supports_avx10_2());
14533   match(Set dst (ConvF2I (LoadF src)));
14534   format %{ "evcvttss2sisl $dst, $src" %}
14535   ins_encode %{
14536     __ evcvttss2sisl($dst$$Register, $src$$Address);
14537   %}
14538   ins_pipe(pipe_slow);
14539 %}
14540 
14541 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14542 %{
14543   predicate(!VM_Version::supports_avx10_2());
14544   match(Set dst (ConvF2L src));
14545   effect(KILL cr);
14546   format %{ "convert_f2l $dst, $src"%}
14547   ins_encode %{
14548     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14549   %}
14550   ins_pipe(pipe_slow);
14551 %}
14552 
14553 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14554 %{
14555   predicate(VM_Version::supports_avx10_2());
14556   match(Set dst (ConvF2L src));
14557   format %{ "evcvttss2sisq $dst, $src" %}
14558   ins_encode %{
14559     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14560   %}
14561   ins_pipe(pipe_slow);
14562 %}
14563 
14564 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14565 %{
14566   predicate(VM_Version::supports_avx10_2());
14567   match(Set dst (ConvF2L (LoadF src)));
14568   format %{ "evcvttss2sisq $dst, $src" %}
14569   ins_encode %{
14570     __ evcvttss2sisq($dst$$Register, $src$$Address);
14571   %}
14572   ins_pipe(pipe_slow);
14573 %}
14574 
14575 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14576 %{
14577   predicate(!VM_Version::supports_avx10_2());
14578   match(Set dst (ConvD2I src));
14579   effect(KILL cr);
14580   format %{ "convert_d2i $dst, $src"%}
14581   ins_encode %{
14582     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14583   %}
14584   ins_pipe(pipe_slow);
14585 %}
14586 
14587 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14588 %{
14589   predicate(VM_Version::supports_avx10_2());
14590   match(Set dst (ConvD2I src));
14591   format %{ "evcvttsd2sisl $dst, $src" %}
14592   ins_encode %{
14593     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14594   %}
14595   ins_pipe(pipe_slow);
14596 %}
14597 
14598 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14599 %{
14600   predicate(VM_Version::supports_avx10_2());
14601   match(Set dst (ConvD2I (LoadD src)));
14602   format %{ "evcvttsd2sisl $dst, $src" %}
14603   ins_encode %{
14604     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14605   %}
14606   ins_pipe(pipe_slow);
14607 %}
14608 
14609 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14610 %{
14611   predicate(!VM_Version::supports_avx10_2());
14612   match(Set dst (ConvD2L src));
14613   effect(KILL cr);
14614   format %{ "convert_d2l $dst, $src"%}
14615   ins_encode %{
14616     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14617   %}
14618   ins_pipe(pipe_slow);
14619 %}
14620 
14621 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14622 %{
14623   predicate(VM_Version::supports_avx10_2());
14624   match(Set dst (ConvD2L src));
14625   format %{ "evcvttsd2sisq $dst, $src" %}
14626   ins_encode %{
14627     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14628   %}
14629   ins_pipe(pipe_slow);
14630 %}
14631 
14632 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14633 %{
14634   predicate(VM_Version::supports_avx10_2());
14635   match(Set dst (ConvD2L (LoadD src)));
14636   format %{ "evcvttsd2sisq $dst, $src" %}
14637   ins_encode %{
14638     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14639   %}
14640   ins_pipe(pipe_slow);
14641 %}
14642 
14643 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14644 %{
14645   match(Set dst (RoundD src));
14646   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14647   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14648   ins_encode %{
14649     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14650   %}
14651   ins_pipe(pipe_slow);
14652 %}
14653 
14654 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14655 %{
14656   match(Set dst (RoundF src));
14657   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14658   format %{ "round_float $dst,$src" %}
14659   ins_encode %{
14660     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14661   %}
14662   ins_pipe(pipe_slow);
14663 %}
14664 
14665 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14666 %{
14667   predicate(!UseXmmI2F);
14668   match(Set dst (ConvI2F src));
14669 
14670   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14671   ins_encode %{
14672     if (UseAVX > 0) {
14673       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14674     }
14675     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14676   %}
14677   ins_pipe(pipe_slow); // XXX
14678 %}
14679 
14680 instruct convI2F_reg_mem(regF dst, memory src)
14681 %{
14682   predicate(UseAVX == 0);
14683   match(Set dst (ConvI2F (LoadI src)));
14684 
14685   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14686   ins_encode %{
14687     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14688   %}
14689   ins_pipe(pipe_slow); // XXX
14690 %}
14691 
14692 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14693 %{
14694   predicate(!UseXmmI2D);
14695   match(Set dst (ConvI2D src));
14696 
14697   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14698   ins_encode %{
14699     if (UseAVX > 0) {
14700       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14701     }
14702     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14703   %}
14704   ins_pipe(pipe_slow); // XXX
14705 %}
14706 
14707 instruct convI2D_reg_mem(regD dst, memory src)
14708 %{
14709   predicate(UseAVX == 0);
14710   match(Set dst (ConvI2D (LoadI src)));
14711 
14712   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14713   ins_encode %{
14714     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14715   %}
14716   ins_pipe(pipe_slow); // XXX
14717 %}
14718 
14719 instruct convXI2F_reg(regF dst, rRegI src)
14720 %{
14721   predicate(UseXmmI2F);
14722   match(Set dst (ConvI2F src));
14723 
14724   format %{ "movdl $dst, $src\n\t"
14725             "cvtdq2psl $dst, $dst\t# i2f" %}
14726   ins_encode %{
14727     __ movdl($dst$$XMMRegister, $src$$Register);
14728     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14729   %}
14730   ins_pipe(pipe_slow); // XXX
14731 %}
14732 
14733 instruct convXI2D_reg(regD dst, rRegI src)
14734 %{
14735   predicate(UseXmmI2D);
14736   match(Set dst (ConvI2D src));
14737 
14738   format %{ "movdl $dst, $src\n\t"
14739             "cvtdq2pdl $dst, $dst\t# i2d" %}
14740   ins_encode %{
14741     __ movdl($dst$$XMMRegister, $src$$Register);
14742     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14743   %}
14744   ins_pipe(pipe_slow); // XXX
14745 %}
14746 
14747 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14748 %{
14749   match(Set dst (ConvL2F src));
14750 
14751   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14752   ins_encode %{
14753     if (UseAVX > 0) {
14754       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14755     }
14756     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14757   %}
14758   ins_pipe(pipe_slow); // XXX
14759 %}
14760 
14761 instruct convL2F_reg_mem(regF dst, memory src)
14762 %{
14763   predicate(UseAVX == 0);
14764   match(Set dst (ConvL2F (LoadL src)));
14765 
14766   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14767   ins_encode %{
14768     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14769   %}
14770   ins_pipe(pipe_slow); // XXX
14771 %}
14772 
14773 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14774 %{
14775   match(Set dst (ConvL2D src));
14776 
14777   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14778   ins_encode %{
14779     if (UseAVX > 0) {
14780       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14781     }
14782     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14783   %}
14784   ins_pipe(pipe_slow); // XXX
14785 %}
14786 
14787 instruct convL2D_reg_mem(regD dst, memory src)
14788 %{
14789   predicate(UseAVX == 0);
14790   match(Set dst (ConvL2D (LoadL src)));
14791 
14792   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14793   ins_encode %{
14794     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14795   %}
14796   ins_pipe(pipe_slow); // XXX
14797 %}
14798 
14799 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14800 %{
14801   match(Set dst (ConvI2L src));
14802 
14803   ins_cost(125);
14804   format %{ "movslq  $dst, $src\t# i2l" %}
14805   ins_encode %{
14806     __ movslq($dst$$Register, $src$$Register);
14807   %}
14808   ins_pipe(ialu_reg_reg);
14809 %}
14810 
14811 // Zero-extend convert int to long
14812 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14813 %{
14814   match(Set dst (AndL (ConvI2L src) mask));
14815 
14816   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14817   ins_encode %{
14818     if ($dst$$reg != $src$$reg) {
14819       __ movl($dst$$Register, $src$$Register);
14820     }
14821   %}
14822   ins_pipe(ialu_reg_reg);
14823 %}
14824 
14825 // Zero-extend convert int to long
14826 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14827 %{
14828   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14829 
14830   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14831   ins_encode %{
14832     __ movl($dst$$Register, $src$$Address);
14833   %}
14834   ins_pipe(ialu_reg_mem);
14835 %}
14836 
14837 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14838 %{
14839   match(Set dst (AndL src mask));
14840 
14841   format %{ "movl    $dst, $src\t# zero-extend long" %}
14842   ins_encode %{
14843     __ movl($dst$$Register, $src$$Register);
14844   %}
14845   ins_pipe(ialu_reg_reg);
14846 %}
14847 
14848 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14849 %{
14850   match(Set dst (ConvL2I src));
14851 
14852   format %{ "movl    $dst, $src\t# l2i" %}
14853   ins_encode %{
14854     __ movl($dst$$Register, $src$$Register);
14855   %}
14856   ins_pipe(ialu_reg_reg);
14857 %}
14858 
14859 
14860 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14861   match(Set dst (MoveF2I src));
14862   effect(DEF dst, USE src);
14863 
14864   ins_cost(125);
14865   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14866   ins_encode %{
14867     __ movl($dst$$Register, Address(rsp, $src$$disp));
14868   %}
14869   ins_pipe(ialu_reg_mem);
14870 %}
14871 
14872 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14873   match(Set dst (MoveI2F src));
14874   effect(DEF dst, USE src);
14875 
14876   ins_cost(125);
14877   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14878   ins_encode %{
14879     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14880   %}
14881   ins_pipe(pipe_slow);
14882 %}
14883 
14884 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14885   match(Set dst (MoveD2L src));
14886   effect(DEF dst, USE src);
14887 
14888   ins_cost(125);
14889   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14890   ins_encode %{
14891     __ movq($dst$$Register, Address(rsp, $src$$disp));
14892   %}
14893   ins_pipe(ialu_reg_mem);
14894 %}
14895 
14896 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14897   predicate(!UseXmmLoadAndClearUpper);
14898   match(Set dst (MoveL2D src));
14899   effect(DEF dst, USE src);
14900 
14901   ins_cost(125);
14902   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14903   ins_encode %{
14904     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14905   %}
14906   ins_pipe(pipe_slow);
14907 %}
14908 
14909 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14910   predicate(UseXmmLoadAndClearUpper);
14911   match(Set dst (MoveL2D src));
14912   effect(DEF dst, USE src);
14913 
14914   ins_cost(125);
14915   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14916   ins_encode %{
14917     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14918   %}
14919   ins_pipe(pipe_slow);
14920 %}
14921 
14922 
14923 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14924   match(Set dst (MoveF2I src));
14925   effect(DEF dst, USE src);
14926 
14927   ins_cost(95); // XXX
14928   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14929   ins_encode %{
14930     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14931   %}
14932   ins_pipe(pipe_slow);
14933 %}
14934 
14935 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14936   match(Set dst (MoveI2F src));
14937   effect(DEF dst, USE src);
14938 
14939   ins_cost(100);
14940   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14941   ins_encode %{
14942     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14943   %}
14944   ins_pipe( ialu_mem_reg );
14945 %}
14946 
14947 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14948   match(Set dst (MoveD2L src));
14949   effect(DEF dst, USE src);
14950 
14951   ins_cost(95); // XXX
14952   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14953   ins_encode %{
14954     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14955   %}
14956   ins_pipe(pipe_slow);
14957 %}
14958 
14959 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14960   match(Set dst (MoveL2D src));
14961   effect(DEF dst, USE src);
14962 
14963   ins_cost(100);
14964   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14965   ins_encode %{
14966     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14967   %}
14968   ins_pipe(ialu_mem_reg);
14969 %}
14970 
14971 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14972   match(Set dst (MoveF2I src));
14973   effect(DEF dst, USE src);
14974   ins_cost(85);
14975   format %{ "movd    $dst,$src\t# MoveF2I" %}
14976   ins_encode %{
14977     __ movdl($dst$$Register, $src$$XMMRegister);
14978   %}
14979   ins_pipe( pipe_slow );
14980 %}
14981 
14982 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14983   match(Set dst (MoveD2L src));
14984   effect(DEF dst, USE src);
14985   ins_cost(85);
14986   format %{ "movd    $dst,$src\t# MoveD2L" %}
14987   ins_encode %{
14988     __ movdq($dst$$Register, $src$$XMMRegister);
14989   %}
14990   ins_pipe( pipe_slow );
14991 %}
14992 
14993 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14994   match(Set dst (MoveI2F src));
14995   effect(DEF dst, USE src);
14996   ins_cost(100);
14997   format %{ "movd    $dst,$src\t# MoveI2F" %}
14998   ins_encode %{
14999     __ movdl($dst$$XMMRegister, $src$$Register);
15000   %}
15001   ins_pipe( pipe_slow );
15002 %}
15003 
15004 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15005   match(Set dst (MoveL2D src));
15006   effect(DEF dst, USE src);
15007   ins_cost(100);
15008   format %{ "movd    $dst,$src\t# MoveL2D" %}
15009   ins_encode %{
15010      __ movdq($dst$$XMMRegister, $src$$Register);
15011   %}
15012   ins_pipe( pipe_slow );
15013 %}
15014 
15015 // Fast clearing of an array
15016 // Small non-constant lenght ClearArray for non-AVX512 targets.
15017 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15018                   Universe dummy, rFlagsReg cr)
15019 %{
15020   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15021   match(Set dummy (ClearArray cnt base));
15022   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15023 
15024   format %{ $$template
15025     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15026     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15027     $$emit$$"jg      LARGE\n\t"
15028     $$emit$$"dec     rcx\n\t"
15029     $$emit$$"js      DONE\t# Zero length\n\t"
15030     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15031     $$emit$$"dec     rcx\n\t"
15032     $$emit$$"jge     LOOP\n\t"
15033     $$emit$$"jmp     DONE\n\t"
15034     $$emit$$"# LARGE:\n\t"
15035     if (UseFastStosb) {
15036        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15037        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15038     } else if (UseXMMForObjInit) {
15039        $$emit$$"mov     rdi,rax\n\t"
15040        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15041        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15042        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15043        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15044        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15045        $$emit$$"add     0x40,rax\n\t"
15046        $$emit$$"# L_zero_64_bytes:\n\t"
15047        $$emit$$"sub     0x8,rcx\n\t"
15048        $$emit$$"jge     L_loop\n\t"
15049        $$emit$$"add     0x4,rcx\n\t"
15050        $$emit$$"jl      L_tail\n\t"
15051        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15052        $$emit$$"add     0x20,rax\n\t"
15053        $$emit$$"sub     0x4,rcx\n\t"
15054        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15055        $$emit$$"add     0x4,rcx\n\t"
15056        $$emit$$"jle     L_end\n\t"
15057        $$emit$$"dec     rcx\n\t"
15058        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15059        $$emit$$"vmovq   xmm0,(rax)\n\t"
15060        $$emit$$"add     0x8,rax\n\t"
15061        $$emit$$"dec     rcx\n\t"
15062        $$emit$$"jge     L_sloop\n\t"
15063        $$emit$$"# L_end:\n\t"
15064     } else {
15065        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15066     }
15067     $$emit$$"# DONE"
15068   %}
15069   ins_encode %{
15070     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15071                  $tmp$$XMMRegister, false, knoreg);
15072   %}
15073   ins_pipe(pipe_slow);
15074 %}
15075 
15076 // Small non-constant length ClearArray for AVX512 targets.
15077 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15078                        Universe dummy, rFlagsReg cr)
15079 %{
15080   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15081   match(Set dummy (ClearArray cnt base));
15082   ins_cost(125);
15083   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15084 
15085   format %{ $$template
15086     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15087     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15088     $$emit$$"jg      LARGE\n\t"
15089     $$emit$$"dec     rcx\n\t"
15090     $$emit$$"js      DONE\t# Zero length\n\t"
15091     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15092     $$emit$$"dec     rcx\n\t"
15093     $$emit$$"jge     LOOP\n\t"
15094     $$emit$$"jmp     DONE\n\t"
15095     $$emit$$"# LARGE:\n\t"
15096     if (UseFastStosb) {
15097        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15098        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15099     } else if (UseXMMForObjInit) {
15100        $$emit$$"mov     rdi,rax\n\t"
15101        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15102        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15103        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15104        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15105        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15106        $$emit$$"add     0x40,rax\n\t"
15107        $$emit$$"# L_zero_64_bytes:\n\t"
15108        $$emit$$"sub     0x8,rcx\n\t"
15109        $$emit$$"jge     L_loop\n\t"
15110        $$emit$$"add     0x4,rcx\n\t"
15111        $$emit$$"jl      L_tail\n\t"
15112        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15113        $$emit$$"add     0x20,rax\n\t"
15114        $$emit$$"sub     0x4,rcx\n\t"
15115        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15116        $$emit$$"add     0x4,rcx\n\t"
15117        $$emit$$"jle     L_end\n\t"
15118        $$emit$$"dec     rcx\n\t"
15119        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15120        $$emit$$"vmovq   xmm0,(rax)\n\t"
15121        $$emit$$"add     0x8,rax\n\t"
15122        $$emit$$"dec     rcx\n\t"
15123        $$emit$$"jge     L_sloop\n\t"
15124        $$emit$$"# L_end:\n\t"
15125     } else {
15126        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15127     }
15128     $$emit$$"# DONE"
15129   %}
15130   ins_encode %{
15131     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15132                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15133   %}
15134   ins_pipe(pipe_slow);
15135 %}
15136 
15137 // Large non-constant length ClearArray for non-AVX512 targets.
15138 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15139                         Universe dummy, rFlagsReg cr)
15140 %{
15141   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15142   match(Set dummy (ClearArray cnt base));
15143   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15144 
15145   format %{ $$template
15146     if (UseFastStosb) {
15147        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15148        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15149        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15150     } else if (UseXMMForObjInit) {
15151        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15152        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15153        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15154        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15155        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15156        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15157        $$emit$$"add     0x40,rax\n\t"
15158        $$emit$$"# L_zero_64_bytes:\n\t"
15159        $$emit$$"sub     0x8,rcx\n\t"
15160        $$emit$$"jge     L_loop\n\t"
15161        $$emit$$"add     0x4,rcx\n\t"
15162        $$emit$$"jl      L_tail\n\t"
15163        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15164        $$emit$$"add     0x20,rax\n\t"
15165        $$emit$$"sub     0x4,rcx\n\t"
15166        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15167        $$emit$$"add     0x4,rcx\n\t"
15168        $$emit$$"jle     L_end\n\t"
15169        $$emit$$"dec     rcx\n\t"
15170        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15171        $$emit$$"vmovq   xmm0,(rax)\n\t"
15172        $$emit$$"add     0x8,rax\n\t"
15173        $$emit$$"dec     rcx\n\t"
15174        $$emit$$"jge     L_sloop\n\t"
15175        $$emit$$"# L_end:\n\t"
15176     } else {
15177        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15178        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15179     }
15180   %}
15181   ins_encode %{
15182     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15183                  $tmp$$XMMRegister, true, knoreg);
15184   %}
15185   ins_pipe(pipe_slow);
15186 %}
15187 
15188 // Large non-constant length ClearArray for AVX512 targets.
15189 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15190                              Universe dummy, rFlagsReg cr)
15191 %{
15192   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15193   match(Set dummy (ClearArray cnt base));
15194   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15195 
15196   format %{ $$template
15197     if (UseFastStosb) {
15198        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15199        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15200        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15201     } else if (UseXMMForObjInit) {
15202        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15203        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15204        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15205        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15206        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15207        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15208        $$emit$$"add     0x40,rax\n\t"
15209        $$emit$$"# L_zero_64_bytes:\n\t"
15210        $$emit$$"sub     0x8,rcx\n\t"
15211        $$emit$$"jge     L_loop\n\t"
15212        $$emit$$"add     0x4,rcx\n\t"
15213        $$emit$$"jl      L_tail\n\t"
15214        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15215        $$emit$$"add     0x20,rax\n\t"
15216        $$emit$$"sub     0x4,rcx\n\t"
15217        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15218        $$emit$$"add     0x4,rcx\n\t"
15219        $$emit$$"jle     L_end\n\t"
15220        $$emit$$"dec     rcx\n\t"
15221        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15222        $$emit$$"vmovq   xmm0,(rax)\n\t"
15223        $$emit$$"add     0x8,rax\n\t"
15224        $$emit$$"dec     rcx\n\t"
15225        $$emit$$"jge     L_sloop\n\t"
15226        $$emit$$"# L_end:\n\t"
15227     } else {
15228        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15229        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15230     }
15231   %}
15232   ins_encode %{
15233     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15234                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15235   %}
15236   ins_pipe(pipe_slow);
15237 %}
15238 
15239 // Small constant length ClearArray for AVX512 targets.
15240 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15241 %{
15242   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15243   match(Set dummy (ClearArray cnt base));
15244   ins_cost(100);
15245   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15246   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15247   ins_encode %{
15248    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15249   %}
15250   ins_pipe(pipe_slow);
15251 %}
15252 
15253 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15254                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15255 %{
15256   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15257   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15258   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15259 
15260   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15261   ins_encode %{
15262     __ string_compare($str1$$Register, $str2$$Register,
15263                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15264                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15265   %}
15266   ins_pipe( pipe_slow );
15267 %}
15268 
15269 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15270                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15271 %{
15272   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15273   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15274   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15275 
15276   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15277   ins_encode %{
15278     __ string_compare($str1$$Register, $str2$$Register,
15279                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15280                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15281   %}
15282   ins_pipe( pipe_slow );
15283 %}
15284 
15285 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15286                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15287 %{
15288   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15289   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15290   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15291 
15292   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15293   ins_encode %{
15294     __ string_compare($str1$$Register, $str2$$Register,
15295                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15296                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15297   %}
15298   ins_pipe( pipe_slow );
15299 %}
15300 
15301 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15302                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15303 %{
15304   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15305   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15306   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15307 
15308   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15309   ins_encode %{
15310     __ string_compare($str1$$Register, $str2$$Register,
15311                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15312                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15313   %}
15314   ins_pipe( pipe_slow );
15315 %}
15316 
15317 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15318                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15319 %{
15320   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15321   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15322   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15323 
15324   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15325   ins_encode %{
15326     __ string_compare($str1$$Register, $str2$$Register,
15327                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15328                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15329   %}
15330   ins_pipe( pipe_slow );
15331 %}
15332 
15333 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15334                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15335 %{
15336   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15337   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15338   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15339 
15340   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15341   ins_encode %{
15342     __ string_compare($str1$$Register, $str2$$Register,
15343                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15344                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15345   %}
15346   ins_pipe( pipe_slow );
15347 %}
15348 
15349 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15350                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15351 %{
15352   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15353   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15354   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15355 
15356   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15357   ins_encode %{
15358     __ string_compare($str2$$Register, $str1$$Register,
15359                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15360                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15361   %}
15362   ins_pipe( pipe_slow );
15363 %}
15364 
15365 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15366                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15367 %{
15368   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15369   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15370   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15371 
15372   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15373   ins_encode %{
15374     __ string_compare($str2$$Register, $str1$$Register,
15375                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15376                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15377   %}
15378   ins_pipe( pipe_slow );
15379 %}
15380 
15381 // fast search of substring with known size.
15382 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15383                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15384 %{
15385   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15386   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15387   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15388 
15389   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15390   ins_encode %{
15391     int icnt2 = (int)$int_cnt2$$constant;
15392     if (icnt2 >= 16) {
15393       // IndexOf for constant substrings with size >= 16 elements
15394       // which don't need to be loaded through stack.
15395       __ string_indexofC8($str1$$Register, $str2$$Register,
15396                           $cnt1$$Register, $cnt2$$Register,
15397                           icnt2, $result$$Register,
15398                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15399     } else {
15400       // Small strings are loaded through stack if they cross page boundary.
15401       __ string_indexof($str1$$Register, $str2$$Register,
15402                         $cnt1$$Register, $cnt2$$Register,
15403                         icnt2, $result$$Register,
15404                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15405     }
15406   %}
15407   ins_pipe( pipe_slow );
15408 %}
15409 
15410 // fast search of substring with known size.
15411 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15412                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15413 %{
15414   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15415   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15416   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15417 
15418   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15419   ins_encode %{
15420     int icnt2 = (int)$int_cnt2$$constant;
15421     if (icnt2 >= 8) {
15422       // IndexOf for constant substrings with size >= 8 elements
15423       // which don't need to be loaded through stack.
15424       __ string_indexofC8($str1$$Register, $str2$$Register,
15425                           $cnt1$$Register, $cnt2$$Register,
15426                           icnt2, $result$$Register,
15427                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15428     } else {
15429       // Small strings are loaded through stack if they cross page boundary.
15430       __ string_indexof($str1$$Register, $str2$$Register,
15431                         $cnt1$$Register, $cnt2$$Register,
15432                         icnt2, $result$$Register,
15433                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15434     }
15435   %}
15436   ins_pipe( pipe_slow );
15437 %}
15438 
15439 // fast search of substring with known size.
15440 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15441                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15442 %{
15443   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15444   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15445   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15446 
15447   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15448   ins_encode %{
15449     int icnt2 = (int)$int_cnt2$$constant;
15450     if (icnt2 >= 8) {
15451       // IndexOf for constant substrings with size >= 8 elements
15452       // which don't need to be loaded through stack.
15453       __ string_indexofC8($str1$$Register, $str2$$Register,
15454                           $cnt1$$Register, $cnt2$$Register,
15455                           icnt2, $result$$Register,
15456                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15457     } else {
15458       // Small strings are loaded through stack if they cross page boundary.
15459       __ string_indexof($str1$$Register, $str2$$Register,
15460                         $cnt1$$Register, $cnt2$$Register,
15461                         icnt2, $result$$Register,
15462                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15463     }
15464   %}
15465   ins_pipe( pipe_slow );
15466 %}
15467 
15468 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15469                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15470 %{
15471   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15472   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15473   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15474 
15475   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15476   ins_encode %{
15477     __ string_indexof($str1$$Register, $str2$$Register,
15478                       $cnt1$$Register, $cnt2$$Register,
15479                       (-1), $result$$Register,
15480                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15481   %}
15482   ins_pipe( pipe_slow );
15483 %}
15484 
15485 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15486                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15487 %{
15488   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15489   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15490   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15491 
15492   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15493   ins_encode %{
15494     __ string_indexof($str1$$Register, $str2$$Register,
15495                       $cnt1$$Register, $cnt2$$Register,
15496                       (-1), $result$$Register,
15497                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15498   %}
15499   ins_pipe( pipe_slow );
15500 %}
15501 
15502 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15503                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15504 %{
15505   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15506   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15507   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15508 
15509   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15510   ins_encode %{
15511     __ string_indexof($str1$$Register, $str2$$Register,
15512                       $cnt1$$Register, $cnt2$$Register,
15513                       (-1), $result$$Register,
15514                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15515   %}
15516   ins_pipe( pipe_slow );
15517 %}
15518 
15519 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15520                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15521 %{
15522   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15523   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15524   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15525   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15526   ins_encode %{
15527     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15528                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15529   %}
15530   ins_pipe( pipe_slow );
15531 %}
15532 
15533 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15534                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15535 %{
15536   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15537   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15538   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15539   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15540   ins_encode %{
15541     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15542                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15543   %}
15544   ins_pipe( pipe_slow );
15545 %}
15546 
15547 // fast string equals
15548 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15549                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15550 %{
15551   predicate(!VM_Version::supports_avx512vlbw());
15552   match(Set result (StrEquals (Binary str1 str2) cnt));
15553   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15554 
15555   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15556   ins_encode %{
15557     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15558                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15559                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15560   %}
15561   ins_pipe( pipe_slow );
15562 %}
15563 
15564 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15565                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15566 %{
15567   predicate(VM_Version::supports_avx512vlbw());
15568   match(Set result (StrEquals (Binary str1 str2) cnt));
15569   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15570 
15571   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15572   ins_encode %{
15573     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15574                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15575                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15576   %}
15577   ins_pipe( pipe_slow );
15578 %}
15579 
15580 // fast array equals
15581 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15582                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15583 %{
15584   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15585   match(Set result (AryEq ary1 ary2));
15586   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15587 
15588   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15589   ins_encode %{
15590     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15591                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15592                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15593   %}
15594   ins_pipe( pipe_slow );
15595 %}
15596 
15597 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15598                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15599 %{
15600   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15601   match(Set result (AryEq ary1 ary2));
15602   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15603 
15604   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15605   ins_encode %{
15606     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15607                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15608                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15609   %}
15610   ins_pipe( pipe_slow );
15611 %}
15612 
15613 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15614                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15615 %{
15616   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15617   match(Set result (AryEq ary1 ary2));
15618   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15619 
15620   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15621   ins_encode %{
15622     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15623                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15624                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15625   %}
15626   ins_pipe( pipe_slow );
15627 %}
15628 
15629 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15630                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15631 %{
15632   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15633   match(Set result (AryEq ary1 ary2));
15634   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15635 
15636   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15637   ins_encode %{
15638     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15639                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15640                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15641   %}
15642   ins_pipe( pipe_slow );
15643 %}
15644 
15645 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15646                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15647                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15648                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15649                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15650 %{
15651   predicate(UseAVX >= 2);
15652   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15653   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15654          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15655          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15656          USE basic_type, KILL cr);
15657 
15658   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15659   ins_encode %{
15660     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15661                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15662                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15663                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15664                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15665                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15666                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15667   %}
15668   ins_pipe( pipe_slow );
15669 %}
15670 
15671 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15672                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15673 %{
15674   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15675   match(Set result (CountPositives ary1 len));
15676   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15677 
15678   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15679   ins_encode %{
15680     __ count_positives($ary1$$Register, $len$$Register,
15681                        $result$$Register, $tmp3$$Register,
15682                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15683   %}
15684   ins_pipe( pipe_slow );
15685 %}
15686 
15687 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15688                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15689 %{
15690   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15691   match(Set result (CountPositives ary1 len));
15692   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15693 
15694   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15695   ins_encode %{
15696     __ count_positives($ary1$$Register, $len$$Register,
15697                        $result$$Register, $tmp3$$Register,
15698                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15699   %}
15700   ins_pipe( pipe_slow );
15701 %}
15702 
15703 // fast char[] to byte[] compression
15704 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15705                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15706   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15707   match(Set result (StrCompressedCopy src (Binary dst len)));
15708   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15709          USE_KILL len, KILL tmp5, KILL cr);
15710 
15711   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15712   ins_encode %{
15713     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15714                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15715                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15716                            knoreg, knoreg);
15717   %}
15718   ins_pipe( pipe_slow );
15719 %}
15720 
15721 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15722                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15723   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15724   match(Set result (StrCompressedCopy src (Binary dst len)));
15725   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15726          USE_KILL len, KILL tmp5, KILL cr);
15727 
15728   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15729   ins_encode %{
15730     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15731                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15732                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15733                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15734   %}
15735   ins_pipe( pipe_slow );
15736 %}
15737 // fast byte[] to char[] inflation
15738 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15739                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15740   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15741   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15742   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15743 
15744   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15745   ins_encode %{
15746     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15747                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15748   %}
15749   ins_pipe( pipe_slow );
15750 %}
15751 
15752 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15753                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15754   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15755   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15756   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15757 
15758   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15759   ins_encode %{
15760     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15761                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15762   %}
15763   ins_pipe( pipe_slow );
15764 %}
15765 
15766 // encode char[] to byte[] in ISO_8859_1
15767 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15768                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15769                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15770   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15771   match(Set result (EncodeISOArray src (Binary dst len)));
15772   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15773 
15774   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15775   ins_encode %{
15776     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15777                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15778                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15779   %}
15780   ins_pipe( pipe_slow );
15781 %}
15782 
15783 // encode char[] to byte[] in ASCII
15784 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15785                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15786                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15787   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15788   match(Set result (EncodeISOArray src (Binary dst len)));
15789   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15790 
15791   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15792   ins_encode %{
15793     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15794                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15795                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15796   %}
15797   ins_pipe( pipe_slow );
15798 %}
15799 
15800 //----------Overflow Math Instructions-----------------------------------------
15801 
15802 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15803 %{
15804   match(Set cr (OverflowAddI op1 op2));
15805   effect(DEF cr, USE_KILL op1, USE op2);
15806 
15807   format %{ "addl    $op1, $op2\t# overflow check int" %}
15808 
15809   ins_encode %{
15810     __ addl($op1$$Register, $op2$$Register);
15811   %}
15812   ins_pipe(ialu_reg_reg);
15813 %}
15814 
15815 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15816 %{
15817   match(Set cr (OverflowAddI op1 op2));
15818   effect(DEF cr, USE_KILL op1, USE op2);
15819 
15820   format %{ "addl    $op1, $op2\t# overflow check int" %}
15821 
15822   ins_encode %{
15823     __ addl($op1$$Register, $op2$$constant);
15824   %}
15825   ins_pipe(ialu_reg_reg);
15826 %}
15827 
15828 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15829 %{
15830   match(Set cr (OverflowAddL op1 op2));
15831   effect(DEF cr, USE_KILL op1, USE op2);
15832 
15833   format %{ "addq    $op1, $op2\t# overflow check long" %}
15834   ins_encode %{
15835     __ addq($op1$$Register, $op2$$Register);
15836   %}
15837   ins_pipe(ialu_reg_reg);
15838 %}
15839 
15840 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15841 %{
15842   match(Set cr (OverflowAddL op1 op2));
15843   effect(DEF cr, USE_KILL op1, USE op2);
15844 
15845   format %{ "addq    $op1, $op2\t# overflow check long" %}
15846   ins_encode %{
15847     __ addq($op1$$Register, $op2$$constant);
15848   %}
15849   ins_pipe(ialu_reg_reg);
15850 %}
15851 
15852 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15853 %{
15854   match(Set cr (OverflowSubI op1 op2));
15855 
15856   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15857   ins_encode %{
15858     __ cmpl($op1$$Register, $op2$$Register);
15859   %}
15860   ins_pipe(ialu_reg_reg);
15861 %}
15862 
15863 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15864 %{
15865   match(Set cr (OverflowSubI op1 op2));
15866 
15867   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15868   ins_encode %{
15869     __ cmpl($op1$$Register, $op2$$constant);
15870   %}
15871   ins_pipe(ialu_reg_reg);
15872 %}
15873 
15874 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15875 %{
15876   match(Set cr (OverflowSubL op1 op2));
15877 
15878   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15879   ins_encode %{
15880     __ cmpq($op1$$Register, $op2$$Register);
15881   %}
15882   ins_pipe(ialu_reg_reg);
15883 %}
15884 
15885 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15886 %{
15887   match(Set cr (OverflowSubL op1 op2));
15888 
15889   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15890   ins_encode %{
15891     __ cmpq($op1$$Register, $op2$$constant);
15892   %}
15893   ins_pipe(ialu_reg_reg);
15894 %}
15895 
15896 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15897 %{
15898   match(Set cr (OverflowSubI zero op2));
15899   effect(DEF cr, USE_KILL op2);
15900 
15901   format %{ "negl    $op2\t# overflow check int" %}
15902   ins_encode %{
15903     __ negl($op2$$Register);
15904   %}
15905   ins_pipe(ialu_reg_reg);
15906 %}
15907 
15908 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15909 %{
15910   match(Set cr (OverflowSubL zero op2));
15911   effect(DEF cr, USE_KILL op2);
15912 
15913   format %{ "negq    $op2\t# overflow check long" %}
15914   ins_encode %{
15915     __ negq($op2$$Register);
15916   %}
15917   ins_pipe(ialu_reg_reg);
15918 %}
15919 
15920 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15921 %{
15922   match(Set cr (OverflowMulI op1 op2));
15923   effect(DEF cr, USE_KILL op1, USE op2);
15924 
15925   format %{ "imull    $op1, $op2\t# overflow check int" %}
15926   ins_encode %{
15927     __ imull($op1$$Register, $op2$$Register);
15928   %}
15929   ins_pipe(ialu_reg_reg_alu0);
15930 %}
15931 
15932 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15933 %{
15934   match(Set cr (OverflowMulI op1 op2));
15935   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15936 
15937   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15938   ins_encode %{
15939     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15940   %}
15941   ins_pipe(ialu_reg_reg_alu0);
15942 %}
15943 
15944 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15945 %{
15946   match(Set cr (OverflowMulL op1 op2));
15947   effect(DEF cr, USE_KILL op1, USE op2);
15948 
15949   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15950   ins_encode %{
15951     __ imulq($op1$$Register, $op2$$Register);
15952   %}
15953   ins_pipe(ialu_reg_reg_alu0);
15954 %}
15955 
15956 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15957 %{
15958   match(Set cr (OverflowMulL op1 op2));
15959   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15960 
15961   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15962   ins_encode %{
15963     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15964   %}
15965   ins_pipe(ialu_reg_reg_alu0);
15966 %}
15967 
15968 
15969 //----------Control Flow Instructions------------------------------------------
15970 // Signed compare Instructions
15971 
15972 // XXX more variants!!
15973 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15974 %{
15975   match(Set cr (CmpI op1 op2));
15976   effect(DEF cr, USE op1, USE op2);
15977 
15978   format %{ "cmpl    $op1, $op2" %}
15979   ins_encode %{
15980     __ cmpl($op1$$Register, $op2$$Register);
15981   %}
15982   ins_pipe(ialu_cr_reg_reg);
15983 %}
15984 
15985 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15986 %{
15987   match(Set cr (CmpI op1 op2));
15988 
15989   format %{ "cmpl    $op1, $op2" %}
15990   ins_encode %{
15991     __ cmpl($op1$$Register, $op2$$constant);
15992   %}
15993   ins_pipe(ialu_cr_reg_imm);
15994 %}
15995 
15996 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15997 %{
15998   match(Set cr (CmpI op1 (LoadI op2)));
15999 
16000   ins_cost(500); // XXX
16001   format %{ "cmpl    $op1, $op2" %}
16002   ins_encode %{
16003     __ cmpl($op1$$Register, $op2$$Address);
16004   %}
16005   ins_pipe(ialu_cr_reg_mem);
16006 %}
16007 
16008 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16009 %{
16010   match(Set cr (CmpI src zero));
16011 
16012   format %{ "testl   $src, $src" %}
16013   ins_encode %{
16014     __ testl($src$$Register, $src$$Register);
16015   %}
16016   ins_pipe(ialu_cr_reg_imm);
16017 %}
16018 
16019 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16020 %{
16021   match(Set cr (CmpI (AndI src con) zero));
16022 
16023   format %{ "testl   $src, $con" %}
16024   ins_encode %{
16025     __ testl($src$$Register, $con$$constant);
16026   %}
16027   ins_pipe(ialu_cr_reg_imm);
16028 %}
16029 
16030 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16031 %{
16032   match(Set cr (CmpI (AndI src1 src2) zero));
16033 
16034   format %{ "testl   $src1, $src2" %}
16035   ins_encode %{
16036     __ testl($src1$$Register, $src2$$Register);
16037   %}
16038   ins_pipe(ialu_cr_reg_imm);
16039 %}
16040 
16041 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16042 %{
16043   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16044 
16045   format %{ "testl   $src, $mem" %}
16046   ins_encode %{
16047     __ testl($src$$Register, $mem$$Address);
16048   %}
16049   ins_pipe(ialu_cr_reg_mem);
16050 %}
16051 
16052 // Unsigned compare Instructions; really, same as signed except they
16053 // produce an rFlagsRegU instead of rFlagsReg.
16054 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16055 %{
16056   match(Set cr (CmpU op1 op2));
16057 
16058   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16059   ins_encode %{
16060     __ cmpl($op1$$Register, $op2$$Register);
16061   %}
16062   ins_pipe(ialu_cr_reg_reg);
16063 %}
16064 
16065 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16066 %{
16067   match(Set cr (CmpU op1 op2));
16068 
16069   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16070   ins_encode %{
16071     __ cmpl($op1$$Register, $op2$$constant);
16072   %}
16073   ins_pipe(ialu_cr_reg_imm);
16074 %}
16075 
16076 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16077 %{
16078   match(Set cr (CmpU op1 (LoadI op2)));
16079 
16080   ins_cost(500); // XXX
16081   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16082   ins_encode %{
16083     __ cmpl($op1$$Register, $op2$$Address);
16084   %}
16085   ins_pipe(ialu_cr_reg_mem);
16086 %}
16087 
16088 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16089 %{
16090   match(Set cr (CmpU src zero));
16091 
16092   format %{ "testl   $src, $src\t# unsigned" %}
16093   ins_encode %{
16094     __ testl($src$$Register, $src$$Register);
16095   %}
16096   ins_pipe(ialu_cr_reg_imm);
16097 %}
16098 
16099 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16100 %{
16101   match(Set cr (CmpP op1 op2));
16102 
16103   format %{ "cmpq    $op1, $op2\t# ptr" %}
16104   ins_encode %{
16105     __ cmpq($op1$$Register, $op2$$Register);
16106   %}
16107   ins_pipe(ialu_cr_reg_reg);
16108 %}
16109 
16110 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16111 %{
16112   match(Set cr (CmpP op1 (LoadP op2)));
16113   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16114 
16115   ins_cost(500); // XXX
16116   format %{ "cmpq    $op1, $op2\t# ptr" %}
16117   ins_encode %{
16118     __ cmpq($op1$$Register, $op2$$Address);
16119   %}
16120   ins_pipe(ialu_cr_reg_mem);
16121 %}
16122 
16123 // XXX this is generalized by compP_rReg_mem???
16124 // Compare raw pointer (used in out-of-heap check).
16125 // Only works because non-oop pointers must be raw pointers
16126 // and raw pointers have no anti-dependencies.
16127 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16128 %{
16129   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16130             n->in(2)->as_Load()->barrier_data() == 0);
16131   match(Set cr (CmpP op1 (LoadP op2)));
16132 
16133   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16134   ins_encode %{
16135     __ cmpq($op1$$Register, $op2$$Address);
16136   %}
16137   ins_pipe(ialu_cr_reg_mem);
16138 %}
16139 
16140 // This will generate a signed flags result. This should be OK since
16141 // any compare to a zero should be eq/neq.
16142 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16143 %{
16144   match(Set cr (CmpP src zero));
16145 
16146   format %{ "testq   $src, $src\t# ptr" %}
16147   ins_encode %{
16148     __ testq($src$$Register, $src$$Register);
16149   %}
16150   ins_pipe(ialu_cr_reg_imm);
16151 %}
16152 
16153 // This will generate a signed flags result. This should be OK since
16154 // any compare to a zero should be eq/neq.
16155 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16156 %{
16157   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16158             n->in(1)->as_Load()->barrier_data() == 0);
16159   match(Set cr (CmpP (LoadP op) zero));
16160 
16161   ins_cost(500); // XXX
16162   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16163   ins_encode %{
16164     __ testq($op$$Address, 0xFFFFFFFF);
16165   %}
16166   ins_pipe(ialu_cr_reg_imm);
16167 %}
16168 
16169 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16170 %{
16171   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16172             n->in(1)->as_Load()->barrier_data() == 0);
16173   match(Set cr (CmpP (LoadP mem) zero));
16174 
16175   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16176   ins_encode %{
16177     __ cmpq(r12, $mem$$Address);
16178   %}
16179   ins_pipe(ialu_cr_reg_mem);
16180 %}
16181 
16182 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16183 %{
16184   match(Set cr (CmpN op1 op2));
16185 
16186   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16187   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16188   ins_pipe(ialu_cr_reg_reg);
16189 %}
16190 
16191 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16192 %{
16193   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16194   match(Set cr (CmpN src (LoadN mem)));
16195 
16196   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16197   ins_encode %{
16198     __ cmpl($src$$Register, $mem$$Address);
16199   %}
16200   ins_pipe(ialu_cr_reg_mem);
16201 %}
16202 
16203 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16204   match(Set cr (CmpN op1 op2));
16205 
16206   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16207   ins_encode %{
16208     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16209   %}
16210   ins_pipe(ialu_cr_reg_imm);
16211 %}
16212 
16213 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16214 %{
16215   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16216   match(Set cr (CmpN src (LoadN mem)));
16217 
16218   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16219   ins_encode %{
16220     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16221   %}
16222   ins_pipe(ialu_cr_reg_mem);
16223 %}
16224 
16225 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16226   match(Set cr (CmpN op1 op2));
16227 
16228   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16229   ins_encode %{
16230     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16231   %}
16232   ins_pipe(ialu_cr_reg_imm);
16233 %}
16234 
16235 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16236 %{
16237   predicate(!UseCompactObjectHeaders);
16238   match(Set cr (CmpN src (LoadNKlass mem)));
16239 
16240   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16241   ins_encode %{
16242     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16243   %}
16244   ins_pipe(ialu_cr_reg_mem);
16245 %}
16246 
16247 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16248   match(Set cr (CmpN src zero));
16249 
16250   format %{ "testl   $src, $src\t# compressed ptr" %}
16251   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16252   ins_pipe(ialu_cr_reg_imm);
16253 %}
16254 
16255 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16256 %{
16257   predicate(CompressedOops::base() != nullptr &&
16258             n->in(1)->as_Load()->barrier_data() == 0);
16259   match(Set cr (CmpN (LoadN mem) zero));
16260 
16261   ins_cost(500); // XXX
16262   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16263   ins_encode %{
16264     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16265   %}
16266   ins_pipe(ialu_cr_reg_mem);
16267 %}
16268 
16269 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16270 %{
16271   predicate(CompressedOops::base() == nullptr &&
16272             n->in(1)->as_Load()->barrier_data() == 0);
16273   match(Set cr (CmpN (LoadN mem) zero));
16274 
16275   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16276   ins_encode %{
16277     __ cmpl(r12, $mem$$Address);
16278   %}
16279   ins_pipe(ialu_cr_reg_mem);
16280 %}
16281 
16282 // Yanked all unsigned pointer compare operations.
16283 // Pointer compares are done with CmpP which is already unsigned.
16284 
16285 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16286 %{
16287   match(Set cr (CmpL op1 op2));
16288 
16289   format %{ "cmpq    $op1, $op2" %}
16290   ins_encode %{
16291     __ cmpq($op1$$Register, $op2$$Register);
16292   %}
16293   ins_pipe(ialu_cr_reg_reg);
16294 %}
16295 
16296 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16297 %{
16298   match(Set cr (CmpL op1 op2));
16299 
16300   format %{ "cmpq    $op1, $op2" %}
16301   ins_encode %{
16302     __ cmpq($op1$$Register, $op2$$constant);
16303   %}
16304   ins_pipe(ialu_cr_reg_imm);
16305 %}
16306 
16307 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16308 %{
16309   match(Set cr (CmpL op1 (LoadL op2)));
16310 
16311   format %{ "cmpq    $op1, $op2" %}
16312   ins_encode %{
16313     __ cmpq($op1$$Register, $op2$$Address);
16314   %}
16315   ins_pipe(ialu_cr_reg_mem);
16316 %}
16317 
16318 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16319 %{
16320   match(Set cr (CmpL src zero));
16321 
16322   format %{ "testq   $src, $src" %}
16323   ins_encode %{
16324     __ testq($src$$Register, $src$$Register);
16325   %}
16326   ins_pipe(ialu_cr_reg_imm);
16327 %}
16328 
16329 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16330 %{
16331   match(Set cr (CmpL (AndL src con) zero));
16332 
16333   format %{ "testq   $src, $con\t# long" %}
16334   ins_encode %{
16335     __ testq($src$$Register, $con$$constant);
16336   %}
16337   ins_pipe(ialu_cr_reg_imm);
16338 %}
16339 
16340 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16341 %{
16342   match(Set cr (CmpL (AndL src1 src2) zero));
16343 
16344   format %{ "testq   $src1, $src2\t# long" %}
16345   ins_encode %{
16346     __ testq($src1$$Register, $src2$$Register);
16347   %}
16348   ins_pipe(ialu_cr_reg_imm);
16349 %}
16350 
16351 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16352 %{
16353   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16354 
16355   format %{ "testq   $src, $mem" %}
16356   ins_encode %{
16357     __ testq($src$$Register, $mem$$Address);
16358   %}
16359   ins_pipe(ialu_cr_reg_mem);
16360 %}
16361 
16362 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16363 %{
16364   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16365 
16366   format %{ "testq   $src, $mem" %}
16367   ins_encode %{
16368     __ testq($src$$Register, $mem$$Address);
16369   %}
16370   ins_pipe(ialu_cr_reg_mem);
16371 %}
16372 
16373 // Manifest a CmpU result in an integer register.  Very painful.
16374 // This is the test to avoid.
16375 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16376 %{
16377   match(Set dst (CmpU3 src1 src2));
16378   effect(KILL flags);
16379 
16380   ins_cost(275); // XXX
16381   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16382             "movl    $dst, -1\n\t"
16383             "jb,u    done\n\t"
16384             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16385     "done:" %}
16386   ins_encode %{
16387     Label done;
16388     __ cmpl($src1$$Register, $src2$$Register);
16389     __ movl($dst$$Register, -1);
16390     __ jccb(Assembler::below, done);
16391     __ setcc(Assembler::notZero, $dst$$Register);
16392     __ bind(done);
16393   %}
16394   ins_pipe(pipe_slow);
16395 %}
16396 
16397 // Manifest a CmpL result in an integer register.  Very painful.
16398 // This is the test to avoid.
16399 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16400 %{
16401   match(Set dst (CmpL3 src1 src2));
16402   effect(KILL flags);
16403 
16404   ins_cost(275); // XXX
16405   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16406             "movl    $dst, -1\n\t"
16407             "jl,s    done\n\t"
16408             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16409     "done:" %}
16410   ins_encode %{
16411     Label done;
16412     __ cmpq($src1$$Register, $src2$$Register);
16413     __ movl($dst$$Register, -1);
16414     __ jccb(Assembler::less, done);
16415     __ setcc(Assembler::notZero, $dst$$Register);
16416     __ bind(done);
16417   %}
16418   ins_pipe(pipe_slow);
16419 %}
16420 
16421 // Manifest a CmpUL result in an integer register.  Very painful.
16422 // This is the test to avoid.
16423 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16424 %{
16425   match(Set dst (CmpUL3 src1 src2));
16426   effect(KILL flags);
16427 
16428   ins_cost(275); // XXX
16429   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16430             "movl    $dst, -1\n\t"
16431             "jb,u    done\n\t"
16432             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16433     "done:" %}
16434   ins_encode %{
16435     Label done;
16436     __ cmpq($src1$$Register, $src2$$Register);
16437     __ movl($dst$$Register, -1);
16438     __ jccb(Assembler::below, done);
16439     __ setcc(Assembler::notZero, $dst$$Register);
16440     __ bind(done);
16441   %}
16442   ins_pipe(pipe_slow);
16443 %}
16444 
16445 // Unsigned long compare Instructions; really, same as signed long except they
16446 // produce an rFlagsRegU instead of rFlagsReg.
16447 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16448 %{
16449   match(Set cr (CmpUL op1 op2));
16450 
16451   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16452   ins_encode %{
16453     __ cmpq($op1$$Register, $op2$$Register);
16454   %}
16455   ins_pipe(ialu_cr_reg_reg);
16456 %}
16457 
16458 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16459 %{
16460   match(Set cr (CmpUL op1 op2));
16461 
16462   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16463   ins_encode %{
16464     __ cmpq($op1$$Register, $op2$$constant);
16465   %}
16466   ins_pipe(ialu_cr_reg_imm);
16467 %}
16468 
16469 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16470 %{
16471   match(Set cr (CmpUL op1 (LoadL op2)));
16472 
16473   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16474   ins_encode %{
16475     __ cmpq($op1$$Register, $op2$$Address);
16476   %}
16477   ins_pipe(ialu_cr_reg_mem);
16478 %}
16479 
16480 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16481 %{
16482   match(Set cr (CmpUL src zero));
16483 
16484   format %{ "testq   $src, $src\t# unsigned" %}
16485   ins_encode %{
16486     __ testq($src$$Register, $src$$Register);
16487   %}
16488   ins_pipe(ialu_cr_reg_imm);
16489 %}
16490 
16491 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16492 %{
16493   match(Set cr (CmpI (LoadB mem) imm));
16494 
16495   ins_cost(125);
16496   format %{ "cmpb    $mem, $imm" %}
16497   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16498   ins_pipe(ialu_cr_reg_mem);
16499 %}
16500 
16501 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16502 %{
16503   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16504 
16505   ins_cost(125);
16506   format %{ "testb   $mem, $imm\t# ubyte" %}
16507   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16508   ins_pipe(ialu_cr_reg_mem);
16509 %}
16510 
16511 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16512 %{
16513   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16514 
16515   ins_cost(125);
16516   format %{ "testb   $mem, $imm\t# byte" %}
16517   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16518   ins_pipe(ialu_cr_reg_mem);
16519 %}
16520 
16521 //----------Max and Min--------------------------------------------------------
16522 // Min Instructions
16523 
16524 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16525 %{
16526   predicate(!UseAPX);
16527   effect(USE_DEF dst, USE src, USE cr);
16528 
16529   format %{ "cmovlgt $dst, $src\t# min" %}
16530   ins_encode %{
16531     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16532   %}
16533   ins_pipe(pipe_cmov_reg);
16534 %}
16535 
16536 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16537 %{
16538   predicate(UseAPX);
16539   effect(DEF dst, USE src1, USE src2, USE cr);
16540 
16541   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16542   ins_encode %{
16543     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16544   %}
16545   ins_pipe(pipe_cmov_reg);
16546 %}
16547 
16548 instruct minI_rReg(rRegI dst, rRegI src)
16549 %{
16550   predicate(!UseAPX);
16551   match(Set dst (MinI dst src));
16552 
16553   ins_cost(200);
16554   expand %{
16555     rFlagsReg cr;
16556     compI_rReg(cr, dst, src);
16557     cmovI_reg_g(dst, src, cr);
16558   %}
16559 %}
16560 
16561 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16562 %{
16563   predicate(UseAPX);
16564   match(Set dst (MinI src1 src2));
16565   effect(DEF dst, USE src1, USE src2);
16566 
16567   ins_cost(200);
16568   expand %{
16569     rFlagsReg cr;
16570     compI_rReg(cr, src1, src2);
16571     cmovI_reg_g_ndd(dst, src1, src2, cr);
16572   %}
16573 %}
16574 
16575 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16576 %{
16577   predicate(!UseAPX);
16578   effect(USE_DEF dst, USE src, USE cr);
16579 
16580   format %{ "cmovllt $dst, $src\t# max" %}
16581   ins_encode %{
16582     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16583   %}
16584   ins_pipe(pipe_cmov_reg);
16585 %}
16586 
16587 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16588 %{
16589   predicate(UseAPX);
16590   effect(DEF dst, USE src1, USE src2, USE cr);
16591 
16592   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16593   ins_encode %{
16594     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16595   %}
16596   ins_pipe(pipe_cmov_reg);
16597 %}
16598 
16599 instruct maxI_rReg(rRegI dst, rRegI src)
16600 %{
16601   predicate(!UseAPX);
16602   match(Set dst (MaxI dst src));
16603 
16604   ins_cost(200);
16605   expand %{
16606     rFlagsReg cr;
16607     compI_rReg(cr, dst, src);
16608     cmovI_reg_l(dst, src, cr);
16609   %}
16610 %}
16611 
16612 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16613 %{
16614   predicate(UseAPX);
16615   match(Set dst (MaxI src1 src2));
16616   effect(DEF dst, USE src1, USE src2);
16617 
16618   ins_cost(200);
16619   expand %{
16620     rFlagsReg cr;
16621     compI_rReg(cr, src1, src2);
16622     cmovI_reg_l_ndd(dst, src1, src2, cr);
16623   %}
16624 %}
16625 
16626 // ============================================================================
16627 // Branch Instructions
16628 
16629 // Jump Direct - Label defines a relative address from JMP+1
16630 instruct jmpDir(label labl)
16631 %{
16632   match(Goto);
16633   effect(USE labl);
16634 
16635   ins_cost(300);
16636   format %{ "jmp     $labl" %}
16637   size(5);
16638   ins_encode %{
16639     Label* L = $labl$$label;
16640     __ jmp(*L, false); // Always long jump
16641   %}
16642   ins_pipe(pipe_jmp);
16643 %}
16644 
16645 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16646 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16647 %{
16648   match(If cop cr);
16649   effect(USE labl);
16650 
16651   ins_cost(300);
16652   format %{ "j$cop     $labl" %}
16653   size(6);
16654   ins_encode %{
16655     Label* L = $labl$$label;
16656     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16657   %}
16658   ins_pipe(pipe_jcc);
16659 %}
16660 
16661 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16662 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16663 %{
16664   match(CountedLoopEnd cop cr);
16665   effect(USE labl);
16666 
16667   ins_cost(300);
16668   format %{ "j$cop     $labl\t# loop end" %}
16669   size(6);
16670   ins_encode %{
16671     Label* L = $labl$$label;
16672     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16673   %}
16674   ins_pipe(pipe_jcc);
16675 %}
16676 
16677 // Jump Direct Conditional - using unsigned comparison
16678 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16679   match(If cop cmp);
16680   effect(USE labl);
16681 
16682   ins_cost(300);
16683   format %{ "j$cop,u   $labl" %}
16684   size(6);
16685   ins_encode %{
16686     Label* L = $labl$$label;
16687     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16688   %}
16689   ins_pipe(pipe_jcc);
16690 %}
16691 
16692 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16693   match(If cop cmp);
16694   effect(USE labl);
16695 
16696   ins_cost(200);
16697   format %{ "j$cop,u   $labl" %}
16698   size(6);
16699   ins_encode %{
16700     Label* L = $labl$$label;
16701     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16702   %}
16703   ins_pipe(pipe_jcc);
16704 %}
16705 
16706 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16707   match(If cop cmp);
16708   effect(USE labl);
16709 
16710   ins_cost(200);
16711   format %{ $$template
16712     if ($cop$$cmpcode == Assembler::notEqual) {
16713       $$emit$$"jp,u    $labl\n\t"
16714       $$emit$$"j$cop,u   $labl"
16715     } else {
16716       $$emit$$"jp,u    done\n\t"
16717       $$emit$$"j$cop,u   $labl\n\t"
16718       $$emit$$"done:"
16719     }
16720   %}
16721   ins_encode %{
16722     Label* l = $labl$$label;
16723     if ($cop$$cmpcode == Assembler::notEqual) {
16724       __ jcc(Assembler::parity, *l, false);
16725       __ jcc(Assembler::notEqual, *l, false);
16726     } else if ($cop$$cmpcode == Assembler::equal) {
16727       Label done;
16728       __ jccb(Assembler::parity, done);
16729       __ jcc(Assembler::equal, *l, false);
16730       __ bind(done);
16731     } else {
16732        ShouldNotReachHere();
16733     }
16734   %}
16735   ins_pipe(pipe_jcc);
16736 %}
16737 
16738 // ============================================================================
16739 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16740 // superklass array for an instance of the superklass.  Set a hidden
16741 // internal cache on a hit (cache is checked with exposed code in
16742 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16743 // encoding ALSO sets flags.
16744 
16745 instruct partialSubtypeCheck(rdi_RegP result,
16746                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16747                              rFlagsReg cr)
16748 %{
16749   match(Set result (PartialSubtypeCheck sub super));
16750   predicate(!UseSecondarySupersTable);
16751   effect(KILL rcx, KILL cr);
16752 
16753   ins_cost(1100);  // slightly larger than the next version
16754   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16755             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16756             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16757             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16758             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16759             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16760             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16761     "miss:\t" %}
16762 
16763   ins_encode %{
16764     Label miss;
16765     // NB: Callers may assume that, when $result is a valid register,
16766     // check_klass_subtype_slow_path_linear sets it to a nonzero
16767     // value.
16768     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16769                                             $rcx$$Register, $result$$Register,
16770                                             nullptr, &miss,
16771                                             /*set_cond_codes:*/ true);
16772     __ xorptr($result$$Register, $result$$Register);
16773     __ bind(miss);
16774   %}
16775 
16776   ins_pipe(pipe_slow);
16777 %}
16778 
16779 // ============================================================================
16780 // Two versions of hashtable-based partialSubtypeCheck, both used when
16781 // we need to search for a super class in the secondary supers array.
16782 // The first is used when we don't know _a priori_ the class being
16783 // searched for. The second, far more common, is used when we do know:
16784 // this is used for instanceof, checkcast, and any case where C2 can
16785 // determine it by constant propagation.
16786 
16787 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16788                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16789                                        rFlagsReg cr)
16790 %{
16791   match(Set result (PartialSubtypeCheck sub super));
16792   predicate(UseSecondarySupersTable);
16793   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16794 
16795   ins_cost(1000);
16796   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16797 
16798   ins_encode %{
16799     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16800 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16801   %}
16802 
16803   ins_pipe(pipe_slow);
16804 %}
16805 
16806 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16807                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16808                                        rFlagsReg cr)
16809 %{
16810   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16811   predicate(UseSecondarySupersTable);
16812   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16813 
16814   ins_cost(700);  // smaller than the next version
16815   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16816 
16817   ins_encode %{
16818     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16819     if (InlineSecondarySupersTest) {
16820       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16821                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16822                                        super_klass_slot);
16823     } else {
16824       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16825     }
16826   %}
16827 
16828   ins_pipe(pipe_slow);
16829 %}
16830 
16831 // ============================================================================
16832 // Branch Instructions -- short offset versions
16833 //
16834 // These instructions are used to replace jumps of a long offset (the default
16835 // match) with jumps of a shorter offset.  These instructions are all tagged
16836 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16837 // match rules in general matching.  Instead, the ADLC generates a conversion
16838 // method in the MachNode which can be used to do in-place replacement of the
16839 // long variant with the shorter variant.  The compiler will determine if a
16840 // branch can be taken by the is_short_branch_offset() predicate in the machine
16841 // specific code section of the file.
16842 
16843 // Jump Direct - Label defines a relative address from JMP+1
16844 instruct jmpDir_short(label labl) %{
16845   match(Goto);
16846   effect(USE labl);
16847 
16848   ins_cost(300);
16849   format %{ "jmp,s   $labl" %}
16850   size(2);
16851   ins_encode %{
16852     Label* L = $labl$$label;
16853     __ jmpb(*L);
16854   %}
16855   ins_pipe(pipe_jmp);
16856   ins_short_branch(1);
16857 %}
16858 
16859 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16860 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16861   match(If cop cr);
16862   effect(USE labl);
16863 
16864   ins_cost(300);
16865   format %{ "j$cop,s   $labl" %}
16866   size(2);
16867   ins_encode %{
16868     Label* L = $labl$$label;
16869     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16870   %}
16871   ins_pipe(pipe_jcc);
16872   ins_short_branch(1);
16873 %}
16874 
16875 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16876 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16877   match(CountedLoopEnd cop cr);
16878   effect(USE labl);
16879 
16880   ins_cost(300);
16881   format %{ "j$cop,s   $labl\t# loop end" %}
16882   size(2);
16883   ins_encode %{
16884     Label* L = $labl$$label;
16885     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16886   %}
16887   ins_pipe(pipe_jcc);
16888   ins_short_branch(1);
16889 %}
16890 
16891 // Jump Direct Conditional - using unsigned comparison
16892 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16893   match(If cop cmp);
16894   effect(USE labl);
16895 
16896   ins_cost(300);
16897   format %{ "j$cop,us  $labl" %}
16898   size(2);
16899   ins_encode %{
16900     Label* L = $labl$$label;
16901     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16902   %}
16903   ins_pipe(pipe_jcc);
16904   ins_short_branch(1);
16905 %}
16906 
16907 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16908   match(If cop cmp);
16909   effect(USE labl);
16910 
16911   ins_cost(300);
16912   format %{ "j$cop,us  $labl" %}
16913   size(2);
16914   ins_encode %{
16915     Label* L = $labl$$label;
16916     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16917   %}
16918   ins_pipe(pipe_jcc);
16919   ins_short_branch(1);
16920 %}
16921 
16922 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16923   match(If cop cmp);
16924   effect(USE labl);
16925 
16926   ins_cost(300);
16927   format %{ $$template
16928     if ($cop$$cmpcode == Assembler::notEqual) {
16929       $$emit$$"jp,u,s  $labl\n\t"
16930       $$emit$$"j$cop,u,s  $labl"
16931     } else {
16932       $$emit$$"jp,u,s  done\n\t"
16933       $$emit$$"j$cop,u,s  $labl\n\t"
16934       $$emit$$"done:"
16935     }
16936   %}
16937   size(4);
16938   ins_encode %{
16939     Label* l = $labl$$label;
16940     if ($cop$$cmpcode == Assembler::notEqual) {
16941       __ jccb(Assembler::parity, *l);
16942       __ jccb(Assembler::notEqual, *l);
16943     } else if ($cop$$cmpcode == Assembler::equal) {
16944       Label done;
16945       __ jccb(Assembler::parity, done);
16946       __ jccb(Assembler::equal, *l);
16947       __ bind(done);
16948     } else {
16949        ShouldNotReachHere();
16950     }
16951   %}
16952   ins_pipe(pipe_jcc);
16953   ins_short_branch(1);
16954 %}
16955 
16956 // ============================================================================
16957 // inlined locking and unlocking
16958 
16959 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16960   match(Set cr (FastLock object box));
16961   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16962   ins_cost(300);
16963   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16964   ins_encode %{
16965     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16966   %}
16967   ins_pipe(pipe_slow);
16968 %}
16969 
16970 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16971   match(Set cr (FastUnlock object rax_reg));
16972   effect(TEMP tmp, USE_KILL rax_reg);
16973   ins_cost(300);
16974   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16975   ins_encode %{
16976     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16977   %}
16978   ins_pipe(pipe_slow);
16979 %}
16980 
16981 
16982 // ============================================================================
16983 // Safepoint Instructions
16984 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16985 %{
16986   match(SafePoint poll);
16987   effect(KILL cr, USE poll);
16988 
16989   format %{ "testl   rax, [$poll]\t"
16990             "# Safepoint: poll for GC" %}
16991   ins_cost(125);
16992   ins_encode %{
16993     __ relocate(relocInfo::poll_type);
16994     address pre_pc = __ pc();
16995     __ testl(rax, Address($poll$$Register, 0));
16996     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16997   %}
16998   ins_pipe(ialu_reg_mem);
16999 %}
17000 
17001 instruct mask_all_evexL(kReg dst, rRegL src) %{
17002   match(Set dst (MaskAll src));
17003   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17004   ins_encode %{
17005     int mask_len = Matcher::vector_length(this);
17006     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17007   %}
17008   ins_pipe( pipe_slow );
17009 %}
17010 
17011 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17012   predicate(Matcher::vector_length(n) > 32);
17013   match(Set dst (MaskAll src));
17014   effect(TEMP tmp);
17015   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17016   ins_encode %{
17017     int mask_len = Matcher::vector_length(this);
17018     __ movslq($tmp$$Register, $src$$Register);
17019     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17020   %}
17021   ins_pipe( pipe_slow );
17022 %}
17023 
17024 // ============================================================================
17025 // Procedure Call/Return Instructions
17026 // Call Java Static Instruction
17027 // Note: If this code changes, the corresponding ret_addr_offset() and
17028 //       compute_padding() functions will have to be adjusted.
17029 instruct CallStaticJavaDirect(method meth) %{
17030   match(CallStaticJava);
17031   effect(USE meth);
17032 
17033   ins_cost(300);
17034   format %{ "call,static " %}
17035   opcode(0xE8); /* E8 cd */
17036   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17037   ins_pipe(pipe_slow);
17038   ins_alignment(4);
17039 %}
17040 
17041 // Call Java Dynamic Instruction
17042 // Note: If this code changes, the corresponding ret_addr_offset() and
17043 //       compute_padding() functions will have to be adjusted.
17044 instruct CallDynamicJavaDirect(method meth)
17045 %{
17046   match(CallDynamicJava);
17047   effect(USE meth);
17048 
17049   ins_cost(300);
17050   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17051             "call,dynamic " %}
17052   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17053   ins_pipe(pipe_slow);
17054   ins_alignment(4);
17055 %}
17056 
17057 // Call Runtime Instruction
17058 instruct CallRuntimeDirect(method meth)
17059 %{
17060   match(CallRuntime);
17061   effect(USE meth);
17062 
17063   ins_cost(300);
17064   format %{ "call,runtime " %}
17065   ins_encode(clear_avx, Java_To_Runtime(meth));
17066   ins_pipe(pipe_slow);
17067 %}
17068 
17069 // Call runtime without safepoint
17070 instruct CallLeafDirect(method meth)
17071 %{
17072   match(CallLeaf);
17073   effect(USE meth);
17074 
17075   ins_cost(300);
17076   format %{ "call_leaf,runtime " %}
17077   ins_encode(clear_avx, Java_To_Runtime(meth));
17078   ins_pipe(pipe_slow);
17079 %}
17080 
17081 // Call runtime without safepoint and with vector arguments
17082 instruct CallLeafDirectVector(method meth)
17083 %{
17084   match(CallLeafVector);
17085   effect(USE meth);
17086 
17087   ins_cost(300);
17088   format %{ "call_leaf,vector " %}
17089   ins_encode(Java_To_Runtime(meth));
17090   ins_pipe(pipe_slow);
17091 %}
17092 
17093 // Call runtime without safepoint
17094 instruct CallLeafNoFPDirect(method meth)
17095 %{
17096   match(CallLeafNoFP);
17097   effect(USE meth);
17098 
17099   ins_cost(300);
17100   format %{ "call_leaf_nofp,runtime " %}
17101   ins_encode(clear_avx, Java_To_Runtime(meth));
17102   ins_pipe(pipe_slow);
17103 %}
17104 
17105 // Return Instruction
17106 // Remove the return address & jump to it.
17107 // Notice: We always emit a nop after a ret to make sure there is room
17108 // for safepoint patching
17109 instruct Ret()
17110 %{
17111   match(Return);
17112 
17113   format %{ "ret" %}
17114   ins_encode %{
17115     __ ret(0);
17116   %}
17117   ins_pipe(pipe_jmp);
17118 %}
17119 
17120 // Tail Call; Jump from runtime stub to Java code.
17121 // Also known as an 'interprocedural jump'.
17122 // Target of jump will eventually return to caller.
17123 // TailJump below removes the return address.
17124 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17125 // emitted just above the TailCall which has reset rbp to the caller state.
17126 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17127 %{
17128   match(TailCall jump_target method_ptr);
17129 
17130   ins_cost(300);
17131   format %{ "jmp     $jump_target\t# rbx holds method" %}
17132   ins_encode %{
17133     __ jmp($jump_target$$Register);
17134   %}
17135   ins_pipe(pipe_jmp);
17136 %}
17137 
17138 // Tail Jump; remove the return address; jump to target.
17139 // TailCall above leaves the return address around.
17140 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17141 %{
17142   match(TailJump jump_target ex_oop);
17143 
17144   ins_cost(300);
17145   format %{ "popq    rdx\t# pop return address\n\t"
17146             "jmp     $jump_target" %}
17147   ins_encode %{
17148     __ popq(as_Register(RDX_enc));
17149     __ jmp($jump_target$$Register);
17150   %}
17151   ins_pipe(pipe_jmp);
17152 %}
17153 
17154 // Forward exception.
17155 instruct ForwardExceptionjmp()
17156 %{
17157   match(ForwardException);
17158 
17159   format %{ "jmp     forward_exception_stub" %}
17160   ins_encode %{
17161     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17162   %}
17163   ins_pipe(pipe_jmp);
17164 %}
17165 
17166 // Create exception oop: created by stack-crawling runtime code.
17167 // Created exception is now available to this handler, and is setup
17168 // just prior to jumping to this handler.  No code emitted.
17169 instruct CreateException(rax_RegP ex_oop)
17170 %{
17171   match(Set ex_oop (CreateEx));
17172 
17173   size(0);
17174   // use the following format syntax
17175   format %{ "# exception oop is in rax; no code emitted" %}
17176   ins_encode();
17177   ins_pipe(empty);
17178 %}
17179 
17180 // Rethrow exception:
17181 // The exception oop will come in the first argument position.
17182 // Then JUMP (not call) to the rethrow stub code.
17183 instruct RethrowException()
17184 %{
17185   match(Rethrow);
17186 
17187   // use the following format syntax
17188   format %{ "jmp     rethrow_stub" %}
17189   ins_encode %{
17190     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17191   %}
17192   ins_pipe(pipe_jmp);
17193 %}
17194 
17195 // ============================================================================
17196 // This name is KNOWN by the ADLC and cannot be changed.
17197 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17198 // for this guy.
17199 instruct tlsLoadP(r15_RegP dst) %{
17200   match(Set dst (ThreadLocal));
17201   effect(DEF dst);
17202 
17203   size(0);
17204   format %{ "# TLS is in R15" %}
17205   ins_encode( /*empty encoding*/ );
17206   ins_pipe(ialu_reg_reg);
17207 %}
17208 
17209 instruct addF_reg(regF dst, regF src) %{
17210   predicate(UseAVX == 0);
17211   match(Set dst (AddF dst src));
17212 
17213   format %{ "addss   $dst, $src" %}
17214   ins_cost(150);
17215   ins_encode %{
17216     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17217   %}
17218   ins_pipe(pipe_slow);
17219 %}
17220 
17221 instruct addF_mem(regF dst, memory src) %{
17222   predicate(UseAVX == 0);
17223   match(Set dst (AddF dst (LoadF src)));
17224 
17225   format %{ "addss   $dst, $src" %}
17226   ins_cost(150);
17227   ins_encode %{
17228     __ addss($dst$$XMMRegister, $src$$Address);
17229   %}
17230   ins_pipe(pipe_slow);
17231 %}
17232 
17233 instruct addF_imm(regF dst, immF con) %{
17234   predicate(UseAVX == 0);
17235   match(Set dst (AddF dst con));
17236   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17237   ins_cost(150);
17238   ins_encode %{
17239     __ addss($dst$$XMMRegister, $constantaddress($con));
17240   %}
17241   ins_pipe(pipe_slow);
17242 %}
17243 
17244 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17245   predicate(UseAVX > 0);
17246   match(Set dst (AddF src1 src2));
17247 
17248   format %{ "vaddss  $dst, $src1, $src2" %}
17249   ins_cost(150);
17250   ins_encode %{
17251     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17252   %}
17253   ins_pipe(pipe_slow);
17254 %}
17255 
17256 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17257   predicate(UseAVX > 0);
17258   match(Set dst (AddF src1 (LoadF src2)));
17259 
17260   format %{ "vaddss  $dst, $src1, $src2" %}
17261   ins_cost(150);
17262   ins_encode %{
17263     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17264   %}
17265   ins_pipe(pipe_slow);
17266 %}
17267 
17268 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17269   predicate(UseAVX > 0);
17270   match(Set dst (AddF src con));
17271 
17272   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17273   ins_cost(150);
17274   ins_encode %{
17275     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17276   %}
17277   ins_pipe(pipe_slow);
17278 %}
17279 
17280 instruct addD_reg(regD dst, regD src) %{
17281   predicate(UseAVX == 0);
17282   match(Set dst (AddD dst src));
17283 
17284   format %{ "addsd   $dst, $src" %}
17285   ins_cost(150);
17286   ins_encode %{
17287     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17288   %}
17289   ins_pipe(pipe_slow);
17290 %}
17291 
17292 instruct addD_mem(regD dst, memory src) %{
17293   predicate(UseAVX == 0);
17294   match(Set dst (AddD dst (LoadD src)));
17295 
17296   format %{ "addsd   $dst, $src" %}
17297   ins_cost(150);
17298   ins_encode %{
17299     __ addsd($dst$$XMMRegister, $src$$Address);
17300   %}
17301   ins_pipe(pipe_slow);
17302 %}
17303 
17304 instruct addD_imm(regD dst, immD con) %{
17305   predicate(UseAVX == 0);
17306   match(Set dst (AddD dst con));
17307   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17308   ins_cost(150);
17309   ins_encode %{
17310     __ addsd($dst$$XMMRegister, $constantaddress($con));
17311   %}
17312   ins_pipe(pipe_slow);
17313 %}
17314 
17315 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17316   predicate(UseAVX > 0);
17317   match(Set dst (AddD src1 src2));
17318 
17319   format %{ "vaddsd  $dst, $src1, $src2" %}
17320   ins_cost(150);
17321   ins_encode %{
17322     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17323   %}
17324   ins_pipe(pipe_slow);
17325 %}
17326 
17327 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17328   predicate(UseAVX > 0);
17329   match(Set dst (AddD src1 (LoadD src2)));
17330 
17331   format %{ "vaddsd  $dst, $src1, $src2" %}
17332   ins_cost(150);
17333   ins_encode %{
17334     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17335   %}
17336   ins_pipe(pipe_slow);
17337 %}
17338 
17339 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17340   predicate(UseAVX > 0);
17341   match(Set dst (AddD src con));
17342 
17343   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17344   ins_cost(150);
17345   ins_encode %{
17346     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17347   %}
17348   ins_pipe(pipe_slow);
17349 %}
17350 
17351 instruct subF_reg(regF dst, regF src) %{
17352   predicate(UseAVX == 0);
17353   match(Set dst (SubF dst src));
17354 
17355   format %{ "subss   $dst, $src" %}
17356   ins_cost(150);
17357   ins_encode %{
17358     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17359   %}
17360   ins_pipe(pipe_slow);
17361 %}
17362 
17363 instruct subF_mem(regF dst, memory src) %{
17364   predicate(UseAVX == 0);
17365   match(Set dst (SubF dst (LoadF src)));
17366 
17367   format %{ "subss   $dst, $src" %}
17368   ins_cost(150);
17369   ins_encode %{
17370     __ subss($dst$$XMMRegister, $src$$Address);
17371   %}
17372   ins_pipe(pipe_slow);
17373 %}
17374 
17375 instruct subF_imm(regF dst, immF con) %{
17376   predicate(UseAVX == 0);
17377   match(Set dst (SubF dst con));
17378   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17379   ins_cost(150);
17380   ins_encode %{
17381     __ subss($dst$$XMMRegister, $constantaddress($con));
17382   %}
17383   ins_pipe(pipe_slow);
17384 %}
17385 
17386 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17387   predicate(UseAVX > 0);
17388   match(Set dst (SubF src1 src2));
17389 
17390   format %{ "vsubss  $dst, $src1, $src2" %}
17391   ins_cost(150);
17392   ins_encode %{
17393     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17394   %}
17395   ins_pipe(pipe_slow);
17396 %}
17397 
17398 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17399   predicate(UseAVX > 0);
17400   match(Set dst (SubF src1 (LoadF src2)));
17401 
17402   format %{ "vsubss  $dst, $src1, $src2" %}
17403   ins_cost(150);
17404   ins_encode %{
17405     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17406   %}
17407   ins_pipe(pipe_slow);
17408 %}
17409 
17410 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17411   predicate(UseAVX > 0);
17412   match(Set dst (SubF src con));
17413 
17414   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17415   ins_cost(150);
17416   ins_encode %{
17417     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17418   %}
17419   ins_pipe(pipe_slow);
17420 %}
17421 
17422 instruct subD_reg(regD dst, regD src) %{
17423   predicate(UseAVX == 0);
17424   match(Set dst (SubD dst src));
17425 
17426   format %{ "subsd   $dst, $src" %}
17427   ins_cost(150);
17428   ins_encode %{
17429     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17430   %}
17431   ins_pipe(pipe_slow);
17432 %}
17433 
17434 instruct subD_mem(regD dst, memory src) %{
17435   predicate(UseAVX == 0);
17436   match(Set dst (SubD dst (LoadD src)));
17437 
17438   format %{ "subsd   $dst, $src" %}
17439   ins_cost(150);
17440   ins_encode %{
17441     __ subsd($dst$$XMMRegister, $src$$Address);
17442   %}
17443   ins_pipe(pipe_slow);
17444 %}
17445 
17446 instruct subD_imm(regD dst, immD con) %{
17447   predicate(UseAVX == 0);
17448   match(Set dst (SubD dst con));
17449   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17450   ins_cost(150);
17451   ins_encode %{
17452     __ subsd($dst$$XMMRegister, $constantaddress($con));
17453   %}
17454   ins_pipe(pipe_slow);
17455 %}
17456 
17457 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17458   predicate(UseAVX > 0);
17459   match(Set dst (SubD src1 src2));
17460 
17461   format %{ "vsubsd  $dst, $src1, $src2" %}
17462   ins_cost(150);
17463   ins_encode %{
17464     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17465   %}
17466   ins_pipe(pipe_slow);
17467 %}
17468 
17469 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17470   predicate(UseAVX > 0);
17471   match(Set dst (SubD src1 (LoadD src2)));
17472 
17473   format %{ "vsubsd  $dst, $src1, $src2" %}
17474   ins_cost(150);
17475   ins_encode %{
17476     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17477   %}
17478   ins_pipe(pipe_slow);
17479 %}
17480 
17481 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17482   predicate(UseAVX > 0);
17483   match(Set dst (SubD src con));
17484 
17485   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17486   ins_cost(150);
17487   ins_encode %{
17488     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17489   %}
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 instruct mulF_reg(regF dst, regF src) %{
17494   predicate(UseAVX == 0);
17495   match(Set dst (MulF dst src));
17496 
17497   format %{ "mulss   $dst, $src" %}
17498   ins_cost(150);
17499   ins_encode %{
17500     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17501   %}
17502   ins_pipe(pipe_slow);
17503 %}
17504 
17505 instruct mulF_mem(regF dst, memory src) %{
17506   predicate(UseAVX == 0);
17507   match(Set dst (MulF dst (LoadF src)));
17508 
17509   format %{ "mulss   $dst, $src" %}
17510   ins_cost(150);
17511   ins_encode %{
17512     __ mulss($dst$$XMMRegister, $src$$Address);
17513   %}
17514   ins_pipe(pipe_slow);
17515 %}
17516 
17517 instruct mulF_imm(regF dst, immF con) %{
17518   predicate(UseAVX == 0);
17519   match(Set dst (MulF dst con));
17520   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17521   ins_cost(150);
17522   ins_encode %{
17523     __ mulss($dst$$XMMRegister, $constantaddress($con));
17524   %}
17525   ins_pipe(pipe_slow);
17526 %}
17527 
17528 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17529   predicate(UseAVX > 0);
17530   match(Set dst (MulF src1 src2));
17531 
17532   format %{ "vmulss  $dst, $src1, $src2" %}
17533   ins_cost(150);
17534   ins_encode %{
17535     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17536   %}
17537   ins_pipe(pipe_slow);
17538 %}
17539 
17540 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17541   predicate(UseAVX > 0);
17542   match(Set dst (MulF src1 (LoadF src2)));
17543 
17544   format %{ "vmulss  $dst, $src1, $src2" %}
17545   ins_cost(150);
17546   ins_encode %{
17547     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17548   %}
17549   ins_pipe(pipe_slow);
17550 %}
17551 
17552 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17553   predicate(UseAVX > 0);
17554   match(Set dst (MulF src con));
17555 
17556   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17557   ins_cost(150);
17558   ins_encode %{
17559     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17560   %}
17561   ins_pipe(pipe_slow);
17562 %}
17563 
17564 instruct mulD_reg(regD dst, regD src) %{
17565   predicate(UseAVX == 0);
17566   match(Set dst (MulD dst src));
17567 
17568   format %{ "mulsd   $dst, $src" %}
17569   ins_cost(150);
17570   ins_encode %{
17571     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17572   %}
17573   ins_pipe(pipe_slow);
17574 %}
17575 
17576 instruct mulD_mem(regD dst, memory src) %{
17577   predicate(UseAVX == 0);
17578   match(Set dst (MulD dst (LoadD src)));
17579 
17580   format %{ "mulsd   $dst, $src" %}
17581   ins_cost(150);
17582   ins_encode %{
17583     __ mulsd($dst$$XMMRegister, $src$$Address);
17584   %}
17585   ins_pipe(pipe_slow);
17586 %}
17587 
17588 instruct mulD_imm(regD dst, immD con) %{
17589   predicate(UseAVX == 0);
17590   match(Set dst (MulD dst con));
17591   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17592   ins_cost(150);
17593   ins_encode %{
17594     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17595   %}
17596   ins_pipe(pipe_slow);
17597 %}
17598 
17599 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17600   predicate(UseAVX > 0);
17601   match(Set dst (MulD src1 src2));
17602 
17603   format %{ "vmulsd  $dst, $src1, $src2" %}
17604   ins_cost(150);
17605   ins_encode %{
17606     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17607   %}
17608   ins_pipe(pipe_slow);
17609 %}
17610 
17611 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17612   predicate(UseAVX > 0);
17613   match(Set dst (MulD src1 (LoadD src2)));
17614 
17615   format %{ "vmulsd  $dst, $src1, $src2" %}
17616   ins_cost(150);
17617   ins_encode %{
17618     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17619   %}
17620   ins_pipe(pipe_slow);
17621 %}
17622 
17623 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17624   predicate(UseAVX > 0);
17625   match(Set dst (MulD src con));
17626 
17627   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17628   ins_cost(150);
17629   ins_encode %{
17630     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17631   %}
17632   ins_pipe(pipe_slow);
17633 %}
17634 
17635 instruct divF_reg(regF dst, regF src) %{
17636   predicate(UseAVX == 0);
17637   match(Set dst (DivF dst src));
17638 
17639   format %{ "divss   $dst, $src" %}
17640   ins_cost(150);
17641   ins_encode %{
17642     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17643   %}
17644   ins_pipe(pipe_slow);
17645 %}
17646 
17647 instruct divF_mem(regF dst, memory src) %{
17648   predicate(UseAVX == 0);
17649   match(Set dst (DivF dst (LoadF src)));
17650 
17651   format %{ "divss   $dst, $src" %}
17652   ins_cost(150);
17653   ins_encode %{
17654     __ divss($dst$$XMMRegister, $src$$Address);
17655   %}
17656   ins_pipe(pipe_slow);
17657 %}
17658 
17659 instruct divF_imm(regF dst, immF con) %{
17660   predicate(UseAVX == 0);
17661   match(Set dst (DivF dst con));
17662   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17663   ins_cost(150);
17664   ins_encode %{
17665     __ divss($dst$$XMMRegister, $constantaddress($con));
17666   %}
17667   ins_pipe(pipe_slow);
17668 %}
17669 
17670 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17671   predicate(UseAVX > 0);
17672   match(Set dst (DivF src1 src2));
17673 
17674   format %{ "vdivss  $dst, $src1, $src2" %}
17675   ins_cost(150);
17676   ins_encode %{
17677     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17678   %}
17679   ins_pipe(pipe_slow);
17680 %}
17681 
17682 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17683   predicate(UseAVX > 0);
17684   match(Set dst (DivF src1 (LoadF src2)));
17685 
17686   format %{ "vdivss  $dst, $src1, $src2" %}
17687   ins_cost(150);
17688   ins_encode %{
17689     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17690   %}
17691   ins_pipe(pipe_slow);
17692 %}
17693 
17694 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17695   predicate(UseAVX > 0);
17696   match(Set dst (DivF src con));
17697 
17698   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17699   ins_cost(150);
17700   ins_encode %{
17701     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17702   %}
17703   ins_pipe(pipe_slow);
17704 %}
17705 
17706 instruct divD_reg(regD dst, regD src) %{
17707   predicate(UseAVX == 0);
17708   match(Set dst (DivD dst src));
17709 
17710   format %{ "divsd   $dst, $src" %}
17711   ins_cost(150);
17712   ins_encode %{
17713     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17714   %}
17715   ins_pipe(pipe_slow);
17716 %}
17717 
17718 instruct divD_mem(regD dst, memory src) %{
17719   predicate(UseAVX == 0);
17720   match(Set dst (DivD dst (LoadD src)));
17721 
17722   format %{ "divsd   $dst, $src" %}
17723   ins_cost(150);
17724   ins_encode %{
17725     __ divsd($dst$$XMMRegister, $src$$Address);
17726   %}
17727   ins_pipe(pipe_slow);
17728 %}
17729 
17730 instruct divD_imm(regD dst, immD con) %{
17731   predicate(UseAVX == 0);
17732   match(Set dst (DivD dst con));
17733   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17734   ins_cost(150);
17735   ins_encode %{
17736     __ divsd($dst$$XMMRegister, $constantaddress($con));
17737   %}
17738   ins_pipe(pipe_slow);
17739 %}
17740 
17741 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17742   predicate(UseAVX > 0);
17743   match(Set dst (DivD src1 src2));
17744 
17745   format %{ "vdivsd  $dst, $src1, $src2" %}
17746   ins_cost(150);
17747   ins_encode %{
17748     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17749   %}
17750   ins_pipe(pipe_slow);
17751 %}
17752 
17753 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17754   predicate(UseAVX > 0);
17755   match(Set dst (DivD src1 (LoadD src2)));
17756 
17757   format %{ "vdivsd  $dst, $src1, $src2" %}
17758   ins_cost(150);
17759   ins_encode %{
17760     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17761   %}
17762   ins_pipe(pipe_slow);
17763 %}
17764 
17765 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17766   predicate(UseAVX > 0);
17767   match(Set dst (DivD src con));
17768 
17769   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17770   ins_cost(150);
17771   ins_encode %{
17772     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17773   %}
17774   ins_pipe(pipe_slow);
17775 %}
17776 
17777 instruct absF_reg(regF dst) %{
17778   predicate(UseAVX == 0);
17779   match(Set dst (AbsF dst));
17780   ins_cost(150);
17781   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17782   ins_encode %{
17783     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17784   %}
17785   ins_pipe(pipe_slow);
17786 %}
17787 
17788 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17789   predicate(UseAVX > 0);
17790   match(Set dst (AbsF src));
17791   ins_cost(150);
17792   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17793   ins_encode %{
17794     int vlen_enc = Assembler::AVX_128bit;
17795     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17796               ExternalAddress(float_signmask()), vlen_enc);
17797   %}
17798   ins_pipe(pipe_slow);
17799 %}
17800 
17801 instruct absD_reg(regD dst) %{
17802   predicate(UseAVX == 0);
17803   match(Set dst (AbsD dst));
17804   ins_cost(150);
17805   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17806             "# abs double by sign masking" %}
17807   ins_encode %{
17808     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17809   %}
17810   ins_pipe(pipe_slow);
17811 %}
17812 
17813 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17814   predicate(UseAVX > 0);
17815   match(Set dst (AbsD src));
17816   ins_cost(150);
17817   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17818             "# abs double by sign masking" %}
17819   ins_encode %{
17820     int vlen_enc = Assembler::AVX_128bit;
17821     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17822               ExternalAddress(double_signmask()), vlen_enc);
17823   %}
17824   ins_pipe(pipe_slow);
17825 %}
17826 
17827 instruct negF_reg(regF dst) %{
17828   predicate(UseAVX == 0);
17829   match(Set dst (NegF dst));
17830   ins_cost(150);
17831   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17832   ins_encode %{
17833     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17834   %}
17835   ins_pipe(pipe_slow);
17836 %}
17837 
17838 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17839   predicate(UseAVX > 0);
17840   match(Set dst (NegF src));
17841   ins_cost(150);
17842   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17843   ins_encode %{
17844     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17845                  ExternalAddress(float_signflip()));
17846   %}
17847   ins_pipe(pipe_slow);
17848 %}
17849 
17850 instruct negD_reg(regD dst) %{
17851   predicate(UseAVX == 0);
17852   match(Set dst (NegD dst));
17853   ins_cost(150);
17854   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17855             "# neg double by sign flipping" %}
17856   ins_encode %{
17857     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17858   %}
17859   ins_pipe(pipe_slow);
17860 %}
17861 
17862 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17863   predicate(UseAVX > 0);
17864   match(Set dst (NegD src));
17865   ins_cost(150);
17866   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17867             "# neg double by sign flipping" %}
17868   ins_encode %{
17869     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17870                  ExternalAddress(double_signflip()));
17871   %}
17872   ins_pipe(pipe_slow);
17873 %}
17874 
17875 // sqrtss instruction needs destination register to be pre initialized for best performance
17876 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17877 instruct sqrtF_reg(regF dst) %{
17878   match(Set dst (SqrtF dst));
17879   format %{ "sqrtss  $dst, $dst" %}
17880   ins_encode %{
17881     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17882   %}
17883   ins_pipe(pipe_slow);
17884 %}
17885 
17886 // sqrtsd instruction needs destination register to be pre initialized for best performance
17887 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17888 instruct sqrtD_reg(regD dst) %{
17889   match(Set dst (SqrtD dst));
17890   format %{ "sqrtsd  $dst, $dst" %}
17891   ins_encode %{
17892     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17893   %}
17894   ins_pipe(pipe_slow);
17895 %}
17896 
17897 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17898   effect(TEMP tmp);
17899   match(Set dst (ConvF2HF src));
17900   ins_cost(125);
17901   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17902   ins_encode %{
17903     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17904   %}
17905   ins_pipe( pipe_slow );
17906 %}
17907 
17908 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17909   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17910   effect(TEMP ktmp, TEMP rtmp);
17911   match(Set mem (StoreC mem (ConvF2HF src)));
17912   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17913   ins_encode %{
17914     __ movl($rtmp$$Register, 0x1);
17915     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17916     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17917   %}
17918   ins_pipe( pipe_slow );
17919 %}
17920 
17921 instruct vconvF2HF(vec dst, vec src) %{
17922   match(Set dst (VectorCastF2HF src));
17923   format %{ "vector_conv_F2HF $dst $src" %}
17924   ins_encode %{
17925     int vlen_enc = vector_length_encoding(this, $src);
17926     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17927   %}
17928   ins_pipe( pipe_slow );
17929 %}
17930 
17931 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17932   predicate(n->as_StoreVector()->memory_size() >= 16);
17933   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17934   format %{ "vcvtps2ph $mem,$src" %}
17935   ins_encode %{
17936     int vlen_enc = vector_length_encoding(this, $src);
17937     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17938   %}
17939   ins_pipe( pipe_slow );
17940 %}
17941 
17942 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17943   match(Set dst (ConvHF2F src));
17944   format %{ "vcvtph2ps $dst,$src" %}
17945   ins_encode %{
17946     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17947   %}
17948   ins_pipe( pipe_slow );
17949 %}
17950 
17951 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17952   match(Set dst (VectorCastHF2F (LoadVector mem)));
17953   format %{ "vcvtph2ps $dst,$mem" %}
17954   ins_encode %{
17955     int vlen_enc = vector_length_encoding(this);
17956     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17957   %}
17958   ins_pipe( pipe_slow );
17959 %}
17960 
17961 instruct vconvHF2F(vec dst, vec src) %{
17962   match(Set dst (VectorCastHF2F src));
17963   ins_cost(125);
17964   format %{ "vector_conv_HF2F $dst,$src" %}
17965   ins_encode %{
17966     int vlen_enc = vector_length_encoding(this);
17967     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17968   %}
17969   ins_pipe( pipe_slow );
17970 %}
17971 
17972 // ---------------------------------------- VectorReinterpret ------------------------------------
17973 instruct reinterpret_mask(kReg dst) %{
17974   predicate(n->bottom_type()->isa_vectmask() &&
17975             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17976   match(Set dst (VectorReinterpret dst));
17977   ins_cost(125);
17978   format %{ "vector_reinterpret $dst\t!" %}
17979   ins_encode %{
17980     // empty
17981   %}
17982   ins_pipe( pipe_slow );
17983 %}
17984 
17985 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17986   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17987             n->bottom_type()->isa_vectmask() &&
17988             n->in(1)->bottom_type()->isa_vectmask() &&
17989             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17990             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17991   match(Set dst (VectorReinterpret src));
17992   effect(TEMP xtmp);
17993   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17994   ins_encode %{
17995      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17996      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17997      assert(src_sz == dst_sz , "src and dst size mismatch");
17998      int vlen_enc = vector_length_encoding(src_sz);
17999      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18000      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18001   %}
18002   ins_pipe( pipe_slow );
18003 %}
18004 
18005 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18006   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18007             n->bottom_type()->isa_vectmask() &&
18008             n->in(1)->bottom_type()->isa_vectmask() &&
18009             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18010              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18011             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18012   match(Set dst (VectorReinterpret src));
18013   effect(TEMP xtmp);
18014   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18015   ins_encode %{
18016      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18017      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18018      assert(src_sz == dst_sz , "src and dst size mismatch");
18019      int vlen_enc = vector_length_encoding(src_sz);
18020      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18021      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18022   %}
18023   ins_pipe( pipe_slow );
18024 %}
18025 
18026 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18027   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18028             n->bottom_type()->isa_vectmask() &&
18029             n->in(1)->bottom_type()->isa_vectmask() &&
18030             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18031              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18032             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18033   match(Set dst (VectorReinterpret src));
18034   effect(TEMP xtmp);
18035   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18036   ins_encode %{
18037      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18038      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18039      assert(src_sz == dst_sz , "src and dst size mismatch");
18040      int vlen_enc = vector_length_encoding(src_sz);
18041      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18042      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18043   %}
18044   ins_pipe( pipe_slow );
18045 %}
18046 
18047 instruct reinterpret(vec dst) %{
18048   predicate(!n->bottom_type()->isa_vectmask() &&
18049             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18050   match(Set dst (VectorReinterpret dst));
18051   ins_cost(125);
18052   format %{ "vector_reinterpret $dst\t!" %}
18053   ins_encode %{
18054     // empty
18055   %}
18056   ins_pipe( pipe_slow );
18057 %}
18058 
18059 instruct reinterpret_expand(vec dst, vec src) %{
18060   predicate(UseAVX == 0 &&
18061             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18062   match(Set dst (VectorReinterpret src));
18063   ins_cost(125);
18064   effect(TEMP dst);
18065   format %{ "vector_reinterpret_expand $dst,$src" %}
18066   ins_encode %{
18067     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18068     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18069 
18070     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18071     if (src_vlen_in_bytes == 4) {
18072       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18073     } else {
18074       assert(src_vlen_in_bytes == 8, "");
18075       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18076     }
18077     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18078   %}
18079   ins_pipe( pipe_slow );
18080 %}
18081 
18082 instruct vreinterpret_expand4(legVec dst, vec src) %{
18083   predicate(UseAVX > 0 &&
18084             !n->bottom_type()->isa_vectmask() &&
18085             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18086             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18087   match(Set dst (VectorReinterpret src));
18088   ins_cost(125);
18089   format %{ "vector_reinterpret_expand $dst,$src" %}
18090   ins_encode %{
18091     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18092   %}
18093   ins_pipe( pipe_slow );
18094 %}
18095 
18096 
18097 instruct vreinterpret_expand(legVec dst, vec src) %{
18098   predicate(UseAVX > 0 &&
18099             !n->bottom_type()->isa_vectmask() &&
18100             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18101             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18102   match(Set dst (VectorReinterpret src));
18103   ins_cost(125);
18104   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18105   ins_encode %{
18106     switch (Matcher::vector_length_in_bytes(this, $src)) {
18107       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18108       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18109       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18110       default: ShouldNotReachHere();
18111     }
18112   %}
18113   ins_pipe( pipe_slow );
18114 %}
18115 
18116 instruct reinterpret_shrink(vec dst, legVec src) %{
18117   predicate(!n->bottom_type()->isa_vectmask() &&
18118             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18119   match(Set dst (VectorReinterpret src));
18120   ins_cost(125);
18121   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18122   ins_encode %{
18123     switch (Matcher::vector_length_in_bytes(this)) {
18124       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18125       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18126       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18127       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18128       default: ShouldNotReachHere();
18129     }
18130   %}
18131   ins_pipe( pipe_slow );
18132 %}
18133 
18134 // ----------------------------------------------------------------------------------------------------
18135 
18136 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18137   match(Set dst (RoundDoubleMode src rmode));
18138   format %{ "roundsd $dst,$src" %}
18139   ins_cost(150);
18140   ins_encode %{
18141     assert(UseSSE >= 4, "required");
18142     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18143       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18144     }
18145     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18146   %}
18147   ins_pipe(pipe_slow);
18148 %}
18149 
18150 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18151   match(Set dst (RoundDoubleMode con rmode));
18152   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18153   ins_cost(150);
18154   ins_encode %{
18155     assert(UseSSE >= 4, "required");
18156     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18157   %}
18158   ins_pipe(pipe_slow);
18159 %}
18160 
18161 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18162   predicate(Matcher::vector_length(n) < 8);
18163   match(Set dst (RoundDoubleModeV src rmode));
18164   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18165   ins_encode %{
18166     assert(UseAVX > 0, "required");
18167     int vlen_enc = vector_length_encoding(this);
18168     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18169   %}
18170   ins_pipe( pipe_slow );
18171 %}
18172 
18173 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18174   predicate(Matcher::vector_length(n) == 8);
18175   match(Set dst (RoundDoubleModeV src rmode));
18176   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18177   ins_encode %{
18178     assert(UseAVX > 2, "required");
18179     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18180   %}
18181   ins_pipe( pipe_slow );
18182 %}
18183 
18184 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18185   predicate(Matcher::vector_length(n) < 8);
18186   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18187   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18188   ins_encode %{
18189     assert(UseAVX > 0, "required");
18190     int vlen_enc = vector_length_encoding(this);
18191     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18192   %}
18193   ins_pipe( pipe_slow );
18194 %}
18195 
18196 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18197   predicate(Matcher::vector_length(n) == 8);
18198   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18199   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18200   ins_encode %{
18201     assert(UseAVX > 2, "required");
18202     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18203   %}
18204   ins_pipe( pipe_slow );
18205 %}
18206 
18207 instruct onspinwait() %{
18208   match(OnSpinWait);
18209   ins_cost(200);
18210 
18211   format %{
18212     $$template
18213     $$emit$$"pause\t! membar_onspinwait"
18214   %}
18215   ins_encode %{
18216     __ pause();
18217   %}
18218   ins_pipe(pipe_slow);
18219 %}
18220 
18221 // a * b + c
18222 instruct fmaD_reg(regD a, regD b, regD c) %{
18223   match(Set c (FmaD  c (Binary a b)));
18224   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18225   ins_cost(150);
18226   ins_encode %{
18227     assert(UseFMA, "Needs FMA instructions support.");
18228     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18229   %}
18230   ins_pipe( pipe_slow );
18231 %}
18232 
18233 // a * b + c
18234 instruct fmaF_reg(regF a, regF b, regF c) %{
18235   match(Set c (FmaF  c (Binary a b)));
18236   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18237   ins_cost(150);
18238   ins_encode %{
18239     assert(UseFMA, "Needs FMA instructions support.");
18240     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18241   %}
18242   ins_pipe( pipe_slow );
18243 %}
18244 
18245 // ====================VECTOR INSTRUCTIONS=====================================
18246 
18247 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18248 instruct MoveVec2Leg(legVec dst, vec src) %{
18249   match(Set dst src);
18250   format %{ "" %}
18251   ins_encode %{
18252     ShouldNotReachHere();
18253   %}
18254   ins_pipe( fpu_reg_reg );
18255 %}
18256 
18257 instruct MoveLeg2Vec(vec dst, legVec src) %{
18258   match(Set dst src);
18259   format %{ "" %}
18260   ins_encode %{
18261     ShouldNotReachHere();
18262   %}
18263   ins_pipe( fpu_reg_reg );
18264 %}
18265 
18266 // ============================================================================
18267 
18268 // Load vectors generic operand pattern
18269 instruct loadV(vec dst, memory mem) %{
18270   match(Set dst (LoadVector mem));
18271   ins_cost(125);
18272   format %{ "load_vector $dst,$mem" %}
18273   ins_encode %{
18274     BasicType bt = Matcher::vector_element_basic_type(this);
18275     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18276   %}
18277   ins_pipe( pipe_slow );
18278 %}
18279 
18280 // Store vectors generic operand pattern.
18281 instruct storeV(memory mem, vec src) %{
18282   match(Set mem (StoreVector mem src));
18283   ins_cost(145);
18284   format %{ "store_vector $mem,$src\n\t" %}
18285   ins_encode %{
18286     switch (Matcher::vector_length_in_bytes(this, $src)) {
18287       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18288       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18289       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18290       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18291       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18292       default: ShouldNotReachHere();
18293     }
18294   %}
18295   ins_pipe( pipe_slow );
18296 %}
18297 
18298 // ---------------------------------------- Gather ------------------------------------
18299 
18300 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18301 
18302 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18303   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18304             Matcher::vector_length_in_bytes(n) <= 32);
18305   match(Set dst (LoadVectorGather mem idx));
18306   effect(TEMP dst, TEMP tmp, TEMP mask);
18307   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18308   ins_encode %{
18309     int vlen_enc = vector_length_encoding(this);
18310     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18311     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18312     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18313     __ lea($tmp$$Register, $mem$$Address);
18314     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18315   %}
18316   ins_pipe( pipe_slow );
18317 %}
18318 
18319 
18320 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18321   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18322             !is_subword_type(Matcher::vector_element_basic_type(n)));
18323   match(Set dst (LoadVectorGather mem idx));
18324   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18325   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18326   ins_encode %{
18327     int vlen_enc = vector_length_encoding(this);
18328     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18329     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18330     __ lea($tmp$$Register, $mem$$Address);
18331     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18332   %}
18333   ins_pipe( pipe_slow );
18334 %}
18335 
18336 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18337   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18338             !is_subword_type(Matcher::vector_element_basic_type(n)));
18339   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18340   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18341   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18342   ins_encode %{
18343     assert(UseAVX > 2, "sanity");
18344     int vlen_enc = vector_length_encoding(this);
18345     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18346     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18347     // Note: Since gather instruction partially updates the opmask register used
18348     // for predication hense moving mask operand to a temporary.
18349     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18350     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18351     __ lea($tmp$$Register, $mem$$Address);
18352     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18353   %}
18354   ins_pipe( pipe_slow );
18355 %}
18356 
18357 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18358   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18359   match(Set dst (LoadVectorGather mem idx_base));
18360   effect(TEMP tmp, TEMP rtmp);
18361   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18362   ins_encode %{
18363     int vlen_enc = vector_length_encoding(this);
18364     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18365     __ lea($tmp$$Register, $mem$$Address);
18366     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18367   %}
18368   ins_pipe( pipe_slow );
18369 %}
18370 
18371 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18372                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18373   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18374   match(Set dst (LoadVectorGather mem idx_base));
18375   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18376   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18377   ins_encode %{
18378     int vlen_enc = vector_length_encoding(this);
18379     int vector_len = Matcher::vector_length(this);
18380     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18381     __ lea($tmp$$Register, $mem$$Address);
18382     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18383     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18384                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18385   %}
18386   ins_pipe( pipe_slow );
18387 %}
18388 
18389 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18390   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18391   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18392   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18393   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18394   ins_encode %{
18395     int vlen_enc = vector_length_encoding(this);
18396     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18397     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18398     __ lea($tmp$$Register, $mem$$Address);
18399     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18400     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18401   %}
18402   ins_pipe( pipe_slow );
18403 %}
18404 
18405 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18406                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18407   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18408   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18409   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18410   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18411   ins_encode %{
18412     int vlen_enc = vector_length_encoding(this);
18413     int vector_len = Matcher::vector_length(this);
18414     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18415     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18416     __ lea($tmp$$Register, $mem$$Address);
18417     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18418     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18419     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18420                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18421   %}
18422   ins_pipe( pipe_slow );
18423 %}
18424 
18425 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18426   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18427   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18428   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18429   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18430   ins_encode %{
18431     int vlen_enc = vector_length_encoding(this);
18432     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18433     __ lea($tmp$$Register, $mem$$Address);
18434     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18435     if (elem_bt == T_SHORT) {
18436       __ movl($mask_idx$$Register, 0x55555555);
18437       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18438     }
18439     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18440     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18441   %}
18442   ins_pipe( pipe_slow );
18443 %}
18444 
18445 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18446                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18447   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18448   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18449   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18450   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18451   ins_encode %{
18452     int vlen_enc = vector_length_encoding(this);
18453     int vector_len = Matcher::vector_length(this);
18454     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18455     __ lea($tmp$$Register, $mem$$Address);
18456     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18457     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18458     if (elem_bt == T_SHORT) {
18459       __ movl($mask_idx$$Register, 0x55555555);
18460       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18461     }
18462     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18463     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18464                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18465   %}
18466   ins_pipe( pipe_slow );
18467 %}
18468 
18469 // ====================Scatter=======================================
18470 
18471 // Scatter INT, LONG, FLOAT, DOUBLE
18472 
18473 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18474   predicate(UseAVX > 2);
18475   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18476   effect(TEMP tmp, TEMP ktmp);
18477   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18478   ins_encode %{
18479     int vlen_enc = vector_length_encoding(this, $src);
18480     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18481 
18482     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18483     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18484 
18485     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18486     __ lea($tmp$$Register, $mem$$Address);
18487     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18488   %}
18489   ins_pipe( pipe_slow );
18490 %}
18491 
18492 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18493   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18494   effect(TEMP tmp, TEMP ktmp);
18495   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18496   ins_encode %{
18497     int vlen_enc = vector_length_encoding(this, $src);
18498     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18499     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18500     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18501     // Note: Since scatter instruction partially updates the opmask register used
18502     // for predication hense moving mask operand to a temporary.
18503     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18504     __ lea($tmp$$Register, $mem$$Address);
18505     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18506   %}
18507   ins_pipe( pipe_slow );
18508 %}
18509 
18510 // ====================REPLICATE=======================================
18511 
18512 // Replicate byte scalar to be vector
18513 instruct vReplB_reg(vec dst, rRegI src) %{
18514   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18515   match(Set dst (Replicate src));
18516   format %{ "replicateB $dst,$src" %}
18517   ins_encode %{
18518     uint vlen = Matcher::vector_length(this);
18519     if (UseAVX >= 2) {
18520       int vlen_enc = vector_length_encoding(this);
18521       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18522         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18523         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18524       } else {
18525         __ movdl($dst$$XMMRegister, $src$$Register);
18526         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18527       }
18528     } else {
18529        assert(UseAVX < 2, "");
18530       __ movdl($dst$$XMMRegister, $src$$Register);
18531       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18532       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18533       if (vlen >= 16) {
18534         assert(vlen == 16, "");
18535         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18536       }
18537     }
18538   %}
18539   ins_pipe( pipe_slow );
18540 %}
18541 
18542 instruct ReplB_mem(vec dst, memory mem) %{
18543   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18544   match(Set dst (Replicate (LoadB mem)));
18545   format %{ "replicateB $dst,$mem" %}
18546   ins_encode %{
18547     int vlen_enc = vector_length_encoding(this);
18548     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18549   %}
18550   ins_pipe( pipe_slow );
18551 %}
18552 
18553 // ====================ReplicateS=======================================
18554 
18555 instruct vReplS_reg(vec dst, rRegI src) %{
18556   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18557   match(Set dst (Replicate src));
18558   format %{ "replicateS $dst,$src" %}
18559   ins_encode %{
18560     uint vlen = Matcher::vector_length(this);
18561     int vlen_enc = vector_length_encoding(this);
18562     if (UseAVX >= 2) {
18563       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18564         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18565         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18566       } else {
18567         __ movdl($dst$$XMMRegister, $src$$Register);
18568         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18569       }
18570     } else {
18571       assert(UseAVX < 2, "");
18572       __ movdl($dst$$XMMRegister, $src$$Register);
18573       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18574       if (vlen >= 8) {
18575         assert(vlen == 8, "");
18576         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18577       }
18578     }
18579   %}
18580   ins_pipe( pipe_slow );
18581 %}
18582 
18583 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18584   match(Set dst (Replicate con));
18585   effect(TEMP rtmp);
18586   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18587   ins_encode %{
18588     int vlen_enc = vector_length_encoding(this);
18589     BasicType bt = Matcher::vector_element_basic_type(this);
18590     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18591     __ movl($rtmp$$Register, $con$$constant);
18592     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18593   %}
18594   ins_pipe( pipe_slow );
18595 %}
18596 
18597 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18598   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18599   match(Set dst (Replicate src));
18600   effect(TEMP rtmp);
18601   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18602   ins_encode %{
18603     int vlen_enc = vector_length_encoding(this);
18604     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18605     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18606   %}
18607   ins_pipe( pipe_slow );
18608 %}
18609 
18610 instruct ReplS_mem(vec dst, memory mem) %{
18611   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18612   match(Set dst (Replicate (LoadS mem)));
18613   format %{ "replicateS $dst,$mem" %}
18614   ins_encode %{
18615     int vlen_enc = vector_length_encoding(this);
18616     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18617   %}
18618   ins_pipe( pipe_slow );
18619 %}
18620 
18621 // ====================ReplicateI=======================================
18622 
18623 instruct ReplI_reg(vec dst, rRegI src) %{
18624   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18625   match(Set dst (Replicate src));
18626   format %{ "replicateI $dst,$src" %}
18627   ins_encode %{
18628     uint vlen = Matcher::vector_length(this);
18629     int vlen_enc = vector_length_encoding(this);
18630     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18631       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18632     } else if (VM_Version::supports_avx2()) {
18633       __ movdl($dst$$XMMRegister, $src$$Register);
18634       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18635     } else {
18636       __ movdl($dst$$XMMRegister, $src$$Register);
18637       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18638     }
18639   %}
18640   ins_pipe( pipe_slow );
18641 %}
18642 
18643 instruct ReplI_mem(vec dst, memory mem) %{
18644   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18645   match(Set dst (Replicate (LoadI mem)));
18646   format %{ "replicateI $dst,$mem" %}
18647   ins_encode %{
18648     int vlen_enc = vector_length_encoding(this);
18649     if (VM_Version::supports_avx2()) {
18650       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18651     } else if (VM_Version::supports_avx()) {
18652       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18653     } else {
18654       __ movdl($dst$$XMMRegister, $mem$$Address);
18655       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18656     }
18657   %}
18658   ins_pipe( pipe_slow );
18659 %}
18660 
18661 instruct ReplI_imm(vec dst, immI con) %{
18662   predicate(Matcher::is_non_long_integral_vector(n));
18663   match(Set dst (Replicate con));
18664   format %{ "replicateI $dst,$con" %}
18665   ins_encode %{
18666     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18667                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18668                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18669     BasicType bt = Matcher::vector_element_basic_type(this);
18670     int vlen = Matcher::vector_length_in_bytes(this);
18671     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18672   %}
18673   ins_pipe( pipe_slow );
18674 %}
18675 
18676 // Replicate scalar zero to be vector
18677 instruct ReplI_zero(vec dst, immI_0 zero) %{
18678   predicate(Matcher::is_non_long_integral_vector(n));
18679   match(Set dst (Replicate zero));
18680   format %{ "replicateI $dst,$zero" %}
18681   ins_encode %{
18682     int vlen_enc = vector_length_encoding(this);
18683     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18684       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18685     } else {
18686       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18687     }
18688   %}
18689   ins_pipe( fpu_reg_reg );
18690 %}
18691 
18692 instruct ReplI_M1(vec dst, immI_M1 con) %{
18693   predicate(Matcher::is_non_long_integral_vector(n));
18694   match(Set dst (Replicate con));
18695   format %{ "vallones $dst" %}
18696   ins_encode %{
18697     int vector_len = vector_length_encoding(this);
18698     __ vallones($dst$$XMMRegister, vector_len);
18699   %}
18700   ins_pipe( pipe_slow );
18701 %}
18702 
18703 // ====================ReplicateL=======================================
18704 
18705 // Replicate long (8 byte) scalar to be vector
18706 instruct ReplL_reg(vec dst, rRegL src) %{
18707   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18708   match(Set dst (Replicate src));
18709   format %{ "replicateL $dst,$src" %}
18710   ins_encode %{
18711     int vlen = Matcher::vector_length(this);
18712     int vlen_enc = vector_length_encoding(this);
18713     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18714       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18715     } else if (VM_Version::supports_avx2()) {
18716       __ movdq($dst$$XMMRegister, $src$$Register);
18717       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18718     } else {
18719       __ movdq($dst$$XMMRegister, $src$$Register);
18720       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18721     }
18722   %}
18723   ins_pipe( pipe_slow );
18724 %}
18725 
18726 instruct ReplL_mem(vec dst, memory mem) %{
18727   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18728   match(Set dst (Replicate (LoadL mem)));
18729   format %{ "replicateL $dst,$mem" %}
18730   ins_encode %{
18731     int vlen_enc = vector_length_encoding(this);
18732     if (VM_Version::supports_avx2()) {
18733       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18734     } else if (VM_Version::supports_sse3()) {
18735       __ movddup($dst$$XMMRegister, $mem$$Address);
18736     } else {
18737       __ movq($dst$$XMMRegister, $mem$$Address);
18738       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18739     }
18740   %}
18741   ins_pipe( pipe_slow );
18742 %}
18743 
18744 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18745 instruct ReplL_imm(vec dst, immL con) %{
18746   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18747   match(Set dst (Replicate con));
18748   format %{ "replicateL $dst,$con" %}
18749   ins_encode %{
18750     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18751     int vlen = Matcher::vector_length_in_bytes(this);
18752     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18753   %}
18754   ins_pipe( pipe_slow );
18755 %}
18756 
18757 instruct ReplL_zero(vec dst, immL0 zero) %{
18758   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18759   match(Set dst (Replicate zero));
18760   format %{ "replicateL $dst,$zero" %}
18761   ins_encode %{
18762     int vlen_enc = vector_length_encoding(this);
18763     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18764       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18765     } else {
18766       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18767     }
18768   %}
18769   ins_pipe( fpu_reg_reg );
18770 %}
18771 
18772 instruct ReplL_M1(vec dst, immL_M1 con) %{
18773   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18774   match(Set dst (Replicate con));
18775   format %{ "vallones $dst" %}
18776   ins_encode %{
18777     int vector_len = vector_length_encoding(this);
18778     __ vallones($dst$$XMMRegister, vector_len);
18779   %}
18780   ins_pipe( pipe_slow );
18781 %}
18782 
18783 // ====================ReplicateF=======================================
18784 
18785 instruct vReplF_reg(vec dst, vlRegF src) %{
18786   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18787   match(Set dst (Replicate src));
18788   format %{ "replicateF $dst,$src" %}
18789   ins_encode %{
18790     uint vlen = Matcher::vector_length(this);
18791     int vlen_enc = vector_length_encoding(this);
18792     if (vlen <= 4) {
18793       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18794     } else if (VM_Version::supports_avx2()) {
18795       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18796     } else {
18797       assert(vlen == 8, "sanity");
18798       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18799       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18800     }
18801   %}
18802   ins_pipe( pipe_slow );
18803 %}
18804 
18805 instruct ReplF_reg(vec dst, vlRegF src) %{
18806   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18807   match(Set dst (Replicate src));
18808   format %{ "replicateF $dst,$src" %}
18809   ins_encode %{
18810     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18811   %}
18812   ins_pipe( pipe_slow );
18813 %}
18814 
18815 instruct ReplF_mem(vec dst, memory mem) %{
18816   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18817   match(Set dst (Replicate (LoadF mem)));
18818   format %{ "replicateF $dst,$mem" %}
18819   ins_encode %{
18820     int vlen_enc = vector_length_encoding(this);
18821     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18822   %}
18823   ins_pipe( pipe_slow );
18824 %}
18825 
18826 // Replicate float scalar immediate to be vector by loading from const table.
18827 instruct ReplF_imm(vec dst, immF con) %{
18828   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18829   match(Set dst (Replicate con));
18830   format %{ "replicateF $dst,$con" %}
18831   ins_encode %{
18832     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18833                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18834     int vlen = Matcher::vector_length_in_bytes(this);
18835     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18836   %}
18837   ins_pipe( pipe_slow );
18838 %}
18839 
18840 instruct ReplF_zero(vec dst, immF0 zero) %{
18841   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18842   match(Set dst (Replicate zero));
18843   format %{ "replicateF $dst,$zero" %}
18844   ins_encode %{
18845     int vlen_enc = vector_length_encoding(this);
18846     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18847       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848     } else {
18849       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18850     }
18851   %}
18852   ins_pipe( fpu_reg_reg );
18853 %}
18854 
18855 // ====================ReplicateD=======================================
18856 
18857 // Replicate double (8 bytes) scalar to be vector
18858 instruct vReplD_reg(vec dst, vlRegD src) %{
18859   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18860   match(Set dst (Replicate src));
18861   format %{ "replicateD $dst,$src" %}
18862   ins_encode %{
18863     uint vlen = Matcher::vector_length(this);
18864     int vlen_enc = vector_length_encoding(this);
18865     if (vlen <= 2) {
18866       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18867     } else if (VM_Version::supports_avx2()) {
18868       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18869     } else {
18870       assert(vlen == 4, "sanity");
18871       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18872       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18873     }
18874   %}
18875   ins_pipe( pipe_slow );
18876 %}
18877 
18878 instruct ReplD_reg(vec dst, vlRegD src) %{
18879   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18880   match(Set dst (Replicate src));
18881   format %{ "replicateD $dst,$src" %}
18882   ins_encode %{
18883     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18884   %}
18885   ins_pipe( pipe_slow );
18886 %}
18887 
18888 instruct ReplD_mem(vec dst, memory mem) %{
18889   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18890   match(Set dst (Replicate (LoadD mem)));
18891   format %{ "replicateD $dst,$mem" %}
18892   ins_encode %{
18893     if (Matcher::vector_length(this) >= 4) {
18894       int vlen_enc = vector_length_encoding(this);
18895       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18896     } else {
18897       __ movddup($dst$$XMMRegister, $mem$$Address);
18898     }
18899   %}
18900   ins_pipe( pipe_slow );
18901 %}
18902 
18903 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18904 instruct ReplD_imm(vec dst, immD con) %{
18905   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18906   match(Set dst (Replicate con));
18907   format %{ "replicateD $dst,$con" %}
18908   ins_encode %{
18909     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18910     int vlen = Matcher::vector_length_in_bytes(this);
18911     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18912   %}
18913   ins_pipe( pipe_slow );
18914 %}
18915 
18916 instruct ReplD_zero(vec dst, immD0 zero) %{
18917   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18918   match(Set dst (Replicate zero));
18919   format %{ "replicateD $dst,$zero" %}
18920   ins_encode %{
18921     int vlen_enc = vector_length_encoding(this);
18922     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18923       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18924     } else {
18925       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18926     }
18927   %}
18928   ins_pipe( fpu_reg_reg );
18929 %}
18930 
18931 // ====================VECTOR INSERT=======================================
18932 
18933 instruct insert(vec dst, rRegI val, immU8 idx) %{
18934   predicate(Matcher::vector_length_in_bytes(n) < 32);
18935   match(Set dst (VectorInsert (Binary dst val) idx));
18936   format %{ "vector_insert $dst,$val,$idx" %}
18937   ins_encode %{
18938     assert(UseSSE >= 4, "required");
18939     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18940 
18941     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18942 
18943     assert(is_integral_type(elem_bt), "");
18944     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18945 
18946     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18947   %}
18948   ins_pipe( pipe_slow );
18949 %}
18950 
18951 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18952   predicate(Matcher::vector_length_in_bytes(n) == 32);
18953   match(Set dst (VectorInsert (Binary src val) idx));
18954   effect(TEMP vtmp);
18955   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18956   ins_encode %{
18957     int vlen_enc = Assembler::AVX_256bit;
18958     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18959     int elem_per_lane = 16/type2aelembytes(elem_bt);
18960     int log2epr = log2(elem_per_lane);
18961 
18962     assert(is_integral_type(elem_bt), "sanity");
18963     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18964 
18965     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18966     uint y_idx = ($idx$$constant >> log2epr) & 1;
18967     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18968     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18969     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18970   %}
18971   ins_pipe( pipe_slow );
18972 %}
18973 
18974 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18975   predicate(Matcher::vector_length_in_bytes(n) == 64);
18976   match(Set dst (VectorInsert (Binary src val) idx));
18977   effect(TEMP vtmp);
18978   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18979   ins_encode %{
18980     assert(UseAVX > 2, "sanity");
18981 
18982     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18983     int elem_per_lane = 16/type2aelembytes(elem_bt);
18984     int log2epr = log2(elem_per_lane);
18985 
18986     assert(is_integral_type(elem_bt), "");
18987     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18988 
18989     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18990     uint y_idx = ($idx$$constant >> log2epr) & 3;
18991     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18992     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18993     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18994   %}
18995   ins_pipe( pipe_slow );
18996 %}
18997 
18998 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18999   predicate(Matcher::vector_length(n) == 2);
19000   match(Set dst (VectorInsert (Binary dst val) idx));
19001   format %{ "vector_insert $dst,$val,$idx" %}
19002   ins_encode %{
19003     assert(UseSSE >= 4, "required");
19004     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19005     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19006 
19007     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19008   %}
19009   ins_pipe( pipe_slow );
19010 %}
19011 
19012 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19013   predicate(Matcher::vector_length(n) == 4);
19014   match(Set dst (VectorInsert (Binary src val) idx));
19015   effect(TEMP vtmp);
19016   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19017   ins_encode %{
19018     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19019     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19020 
19021     uint x_idx = $idx$$constant & right_n_bits(1);
19022     uint y_idx = ($idx$$constant >> 1) & 1;
19023     int vlen_enc = Assembler::AVX_256bit;
19024     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19025     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19026     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19027   %}
19028   ins_pipe( pipe_slow );
19029 %}
19030 
19031 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19032   predicate(Matcher::vector_length(n) == 8);
19033   match(Set dst (VectorInsert (Binary src val) idx));
19034   effect(TEMP vtmp);
19035   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19036   ins_encode %{
19037     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19038     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19039 
19040     uint x_idx = $idx$$constant & right_n_bits(1);
19041     uint y_idx = ($idx$$constant >> 1) & 3;
19042     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19043     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19044     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19045   %}
19046   ins_pipe( pipe_slow );
19047 %}
19048 
19049 instruct insertF(vec dst, regF val, immU8 idx) %{
19050   predicate(Matcher::vector_length(n) < 8);
19051   match(Set dst (VectorInsert (Binary dst val) idx));
19052   format %{ "vector_insert $dst,$val,$idx" %}
19053   ins_encode %{
19054     assert(UseSSE >= 4, "sanity");
19055 
19056     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19057     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19058 
19059     uint x_idx = $idx$$constant & right_n_bits(2);
19060     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19061   %}
19062   ins_pipe( pipe_slow );
19063 %}
19064 
19065 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19066   predicate(Matcher::vector_length(n) >= 8);
19067   match(Set dst (VectorInsert (Binary src val) idx));
19068   effect(TEMP vtmp);
19069   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19070   ins_encode %{
19071     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19072     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19073 
19074     int vlen = Matcher::vector_length(this);
19075     uint x_idx = $idx$$constant & right_n_bits(2);
19076     if (vlen == 8) {
19077       uint y_idx = ($idx$$constant >> 2) & 1;
19078       int vlen_enc = Assembler::AVX_256bit;
19079       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19080       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19081       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19082     } else {
19083       assert(vlen == 16, "sanity");
19084       uint y_idx = ($idx$$constant >> 2) & 3;
19085       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19086       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19087       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19088     }
19089   %}
19090   ins_pipe( pipe_slow );
19091 %}
19092 
19093 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19094   predicate(Matcher::vector_length(n) == 2);
19095   match(Set dst (VectorInsert (Binary dst val) idx));
19096   effect(TEMP tmp);
19097   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19098   ins_encode %{
19099     assert(UseSSE >= 4, "sanity");
19100     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19101     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19102 
19103     __ movq($tmp$$Register, $val$$XMMRegister);
19104     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19105   %}
19106   ins_pipe( pipe_slow );
19107 %}
19108 
19109 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19110   predicate(Matcher::vector_length(n) == 4);
19111   match(Set dst (VectorInsert (Binary src val) idx));
19112   effect(TEMP vtmp, TEMP tmp);
19113   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19114   ins_encode %{
19115     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19116     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19117 
19118     uint x_idx = $idx$$constant & right_n_bits(1);
19119     uint y_idx = ($idx$$constant >> 1) & 1;
19120     int vlen_enc = Assembler::AVX_256bit;
19121     __ movq($tmp$$Register, $val$$XMMRegister);
19122     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19123     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19124     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19125   %}
19126   ins_pipe( pipe_slow );
19127 %}
19128 
19129 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19130   predicate(Matcher::vector_length(n) == 8);
19131   match(Set dst (VectorInsert (Binary src val) idx));
19132   effect(TEMP tmp, TEMP vtmp);
19133   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19134   ins_encode %{
19135     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19136     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19137 
19138     uint x_idx = $idx$$constant & right_n_bits(1);
19139     uint y_idx = ($idx$$constant >> 1) & 3;
19140     __ movq($tmp$$Register, $val$$XMMRegister);
19141     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19142     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19143     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19144   %}
19145   ins_pipe( pipe_slow );
19146 %}
19147 
19148 // ====================REDUCTION ARITHMETIC=======================================
19149 
19150 // =======================Int Reduction==========================================
19151 
19152 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19153   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19154   match(Set dst (AddReductionVI src1 src2));
19155   match(Set dst (MulReductionVI src1 src2));
19156   match(Set dst (AndReductionV  src1 src2));
19157   match(Set dst ( OrReductionV  src1 src2));
19158   match(Set dst (XorReductionV  src1 src2));
19159   match(Set dst (MinReductionV  src1 src2));
19160   match(Set dst (MaxReductionV  src1 src2));
19161   effect(TEMP vtmp1, TEMP vtmp2);
19162   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19163   ins_encode %{
19164     int opcode = this->ideal_Opcode();
19165     int vlen = Matcher::vector_length(this, $src2);
19166     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19167   %}
19168   ins_pipe( pipe_slow );
19169 %}
19170 
19171 // =======================Long Reduction==========================================
19172 
19173 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19174   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19175   match(Set dst (AddReductionVL src1 src2));
19176   match(Set dst (MulReductionVL src1 src2));
19177   match(Set dst (AndReductionV  src1 src2));
19178   match(Set dst ( OrReductionV  src1 src2));
19179   match(Set dst (XorReductionV  src1 src2));
19180   match(Set dst (MinReductionV  src1 src2));
19181   match(Set dst (MaxReductionV  src1 src2));
19182   effect(TEMP vtmp1, TEMP vtmp2);
19183   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19184   ins_encode %{
19185     int opcode = this->ideal_Opcode();
19186     int vlen = Matcher::vector_length(this, $src2);
19187     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19188   %}
19189   ins_pipe( pipe_slow );
19190 %}
19191 
19192 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19193   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19194   match(Set dst (AddReductionVL src1 src2));
19195   match(Set dst (MulReductionVL src1 src2));
19196   match(Set dst (AndReductionV  src1 src2));
19197   match(Set dst ( OrReductionV  src1 src2));
19198   match(Set dst (XorReductionV  src1 src2));
19199   match(Set dst (MinReductionV  src1 src2));
19200   match(Set dst (MaxReductionV  src1 src2));
19201   effect(TEMP vtmp1, TEMP vtmp2);
19202   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19203   ins_encode %{
19204     int opcode = this->ideal_Opcode();
19205     int vlen = Matcher::vector_length(this, $src2);
19206     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19207   %}
19208   ins_pipe( pipe_slow );
19209 %}
19210 
19211 // =======================Float Reduction==========================================
19212 
19213 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19214   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19215   match(Set dst (AddReductionVF dst src));
19216   match(Set dst (MulReductionVF dst src));
19217   effect(TEMP dst, TEMP vtmp);
19218   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19219   ins_encode %{
19220     int opcode = this->ideal_Opcode();
19221     int vlen = Matcher::vector_length(this, $src);
19222     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19223   %}
19224   ins_pipe( pipe_slow );
19225 %}
19226 
19227 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19228   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19229   match(Set dst (AddReductionVF dst src));
19230   match(Set dst (MulReductionVF dst src));
19231   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19232   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19233   ins_encode %{
19234     int opcode = this->ideal_Opcode();
19235     int vlen = Matcher::vector_length(this, $src);
19236     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19237   %}
19238   ins_pipe( pipe_slow );
19239 %}
19240 
19241 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19242   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19243   match(Set dst (AddReductionVF dst src));
19244   match(Set dst (MulReductionVF dst src));
19245   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19246   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19247   ins_encode %{
19248     int opcode = this->ideal_Opcode();
19249     int vlen = Matcher::vector_length(this, $src);
19250     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19251   %}
19252   ins_pipe( pipe_slow );
19253 %}
19254 
19255 
19256 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19257   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19258   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19259   // src1 contains reduction identity
19260   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19261   match(Set dst (AddReductionVF src1 src2));
19262   match(Set dst (MulReductionVF src1 src2));
19263   effect(TEMP dst);
19264   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19265   ins_encode %{
19266     int opcode = this->ideal_Opcode();
19267     int vlen = Matcher::vector_length(this, $src2);
19268     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19269   %}
19270   ins_pipe( pipe_slow );
19271 %}
19272 
19273 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19274   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19275   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19276   // src1 contains reduction identity
19277   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19278   match(Set dst (AddReductionVF src1 src2));
19279   match(Set dst (MulReductionVF src1 src2));
19280   effect(TEMP dst, TEMP vtmp);
19281   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19282   ins_encode %{
19283     int opcode = this->ideal_Opcode();
19284     int vlen = Matcher::vector_length(this, $src2);
19285     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19286   %}
19287   ins_pipe( pipe_slow );
19288 %}
19289 
19290 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19291   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19292   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19293   // src1 contains reduction identity
19294   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19295   match(Set dst (AddReductionVF src1 src2));
19296   match(Set dst (MulReductionVF src1 src2));
19297   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19298   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19299   ins_encode %{
19300     int opcode = this->ideal_Opcode();
19301     int vlen = Matcher::vector_length(this, $src2);
19302     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19303   %}
19304   ins_pipe( pipe_slow );
19305 %}
19306 
19307 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19308   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19309   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19310   // src1 contains reduction identity
19311   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19312   match(Set dst (AddReductionVF src1 src2));
19313   match(Set dst (MulReductionVF src1 src2));
19314   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19315   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19316   ins_encode %{
19317     int opcode = this->ideal_Opcode();
19318     int vlen = Matcher::vector_length(this, $src2);
19319     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19320   %}
19321   ins_pipe( pipe_slow );
19322 %}
19323 
19324 // =======================Double Reduction==========================================
19325 
19326 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19327   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19328   match(Set dst (AddReductionVD dst src));
19329   match(Set dst (MulReductionVD dst src));
19330   effect(TEMP dst, TEMP vtmp);
19331   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19332   ins_encode %{
19333     int opcode = this->ideal_Opcode();
19334     int vlen = Matcher::vector_length(this, $src);
19335     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19336 %}
19337   ins_pipe( pipe_slow );
19338 %}
19339 
19340 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19341   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19342   match(Set dst (AddReductionVD dst src));
19343   match(Set dst (MulReductionVD dst src));
19344   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19345   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19346   ins_encode %{
19347     int opcode = this->ideal_Opcode();
19348     int vlen = Matcher::vector_length(this, $src);
19349     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19350   %}
19351   ins_pipe( pipe_slow );
19352 %}
19353 
19354 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19355   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19356   match(Set dst (AddReductionVD dst src));
19357   match(Set dst (MulReductionVD dst src));
19358   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19359   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19360   ins_encode %{
19361     int opcode = this->ideal_Opcode();
19362     int vlen = Matcher::vector_length(this, $src);
19363     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19364   %}
19365   ins_pipe( pipe_slow );
19366 %}
19367 
19368 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19369   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19370   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19371   // src1 contains reduction identity
19372   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19373   match(Set dst (AddReductionVD src1 src2));
19374   match(Set dst (MulReductionVD src1 src2));
19375   effect(TEMP dst);
19376   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19377   ins_encode %{
19378     int opcode = this->ideal_Opcode();
19379     int vlen = Matcher::vector_length(this, $src2);
19380     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19381 %}
19382   ins_pipe( pipe_slow );
19383 %}
19384 
19385 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19386   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19387   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19388   // src1 contains reduction identity
19389   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19390   match(Set dst (AddReductionVD src1 src2));
19391   match(Set dst (MulReductionVD src1 src2));
19392   effect(TEMP dst, TEMP vtmp);
19393   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19394   ins_encode %{
19395     int opcode = this->ideal_Opcode();
19396     int vlen = Matcher::vector_length(this, $src2);
19397     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19398   %}
19399   ins_pipe( pipe_slow );
19400 %}
19401 
19402 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19403   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19404   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19405   // src1 contains reduction identity
19406   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19407   match(Set dst (AddReductionVD src1 src2));
19408   match(Set dst (MulReductionVD src1 src2));
19409   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19410   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19411   ins_encode %{
19412     int opcode = this->ideal_Opcode();
19413     int vlen = Matcher::vector_length(this, $src2);
19414     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19415   %}
19416   ins_pipe( pipe_slow );
19417 %}
19418 
19419 // =======================Byte Reduction==========================================
19420 
19421 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19422   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19423   match(Set dst (AddReductionVI src1 src2));
19424   match(Set dst (AndReductionV  src1 src2));
19425   match(Set dst ( OrReductionV  src1 src2));
19426   match(Set dst (XorReductionV  src1 src2));
19427   match(Set dst (MinReductionV  src1 src2));
19428   match(Set dst (MaxReductionV  src1 src2));
19429   effect(TEMP vtmp1, TEMP vtmp2);
19430   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19431   ins_encode %{
19432     int opcode = this->ideal_Opcode();
19433     int vlen = Matcher::vector_length(this, $src2);
19434     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19435   %}
19436   ins_pipe( pipe_slow );
19437 %}
19438 
19439 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19440   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19441   match(Set dst (AddReductionVI src1 src2));
19442   match(Set dst (AndReductionV  src1 src2));
19443   match(Set dst ( OrReductionV  src1 src2));
19444   match(Set dst (XorReductionV  src1 src2));
19445   match(Set dst (MinReductionV  src1 src2));
19446   match(Set dst (MaxReductionV  src1 src2));
19447   effect(TEMP vtmp1, TEMP vtmp2);
19448   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449   ins_encode %{
19450     int opcode = this->ideal_Opcode();
19451     int vlen = Matcher::vector_length(this, $src2);
19452     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453   %}
19454   ins_pipe( pipe_slow );
19455 %}
19456 
19457 // =======================Short Reduction==========================================
19458 
19459 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19460   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19461   match(Set dst (AddReductionVI src1 src2));
19462   match(Set dst (MulReductionVI src1 src2));
19463   match(Set dst (AndReductionV  src1 src2));
19464   match(Set dst ( OrReductionV  src1 src2));
19465   match(Set dst (XorReductionV  src1 src2));
19466   match(Set dst (MinReductionV  src1 src2));
19467   match(Set dst (MaxReductionV  src1 src2));
19468   effect(TEMP vtmp1, TEMP vtmp2);
19469   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19470   ins_encode %{
19471     int opcode = this->ideal_Opcode();
19472     int vlen = Matcher::vector_length(this, $src2);
19473     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19474   %}
19475   ins_pipe( pipe_slow );
19476 %}
19477 
19478 // =======================Mul Reduction==========================================
19479 
19480 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19481   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19482             Matcher::vector_length(n->in(2)) <= 32); // src2
19483   match(Set dst (MulReductionVI src1 src2));
19484   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19485   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19486   ins_encode %{
19487     int opcode = this->ideal_Opcode();
19488     int vlen = Matcher::vector_length(this, $src2);
19489     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19490   %}
19491   ins_pipe( pipe_slow );
19492 %}
19493 
19494 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19496             Matcher::vector_length(n->in(2)) == 64); // src2
19497   match(Set dst (MulReductionVI src1 src2));
19498   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19499   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19500   ins_encode %{
19501     int opcode = this->ideal_Opcode();
19502     int vlen = Matcher::vector_length(this, $src2);
19503     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19504   %}
19505   ins_pipe( pipe_slow );
19506 %}
19507 
19508 //--------------------Min/Max Float Reduction --------------------
19509 // Float Min Reduction
19510 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19511                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19512   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19513             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19514              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19515             Matcher::vector_length(n->in(2)) == 2);
19516   match(Set dst (MinReductionV src1 src2));
19517   match(Set dst (MaxReductionV src1 src2));
19518   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19519   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19520   ins_encode %{
19521     assert(UseAVX > 0, "sanity");
19522 
19523     int opcode = this->ideal_Opcode();
19524     int vlen = Matcher::vector_length(this, $src2);
19525     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19526                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19527   %}
19528   ins_pipe( pipe_slow );
19529 %}
19530 
19531 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19532                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19533   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19534             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19535              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19536             Matcher::vector_length(n->in(2)) >= 4);
19537   match(Set dst (MinReductionV src1 src2));
19538   match(Set dst (MaxReductionV src1 src2));
19539   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19540   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19541   ins_encode %{
19542     assert(UseAVX > 0, "sanity");
19543 
19544     int opcode = this->ideal_Opcode();
19545     int vlen = Matcher::vector_length(this, $src2);
19546     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19547                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19548   %}
19549   ins_pipe( pipe_slow );
19550 %}
19551 
19552 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19553                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19554   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19555             Matcher::vector_length(n->in(2)) == 2);
19556   match(Set dst (MinReductionV dst src));
19557   match(Set dst (MaxReductionV dst src));
19558   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19559   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19560   ins_encode %{
19561     assert(UseAVX > 0, "sanity");
19562 
19563     int opcode = this->ideal_Opcode();
19564     int vlen = Matcher::vector_length(this, $src);
19565     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19566                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19567   %}
19568   ins_pipe( pipe_slow );
19569 %}
19570 
19571 
19572 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19573                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19574   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19575             Matcher::vector_length(n->in(2)) >= 4);
19576   match(Set dst (MinReductionV dst src));
19577   match(Set dst (MaxReductionV dst src));
19578   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19579   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19580   ins_encode %{
19581     assert(UseAVX > 0, "sanity");
19582 
19583     int opcode = this->ideal_Opcode();
19584     int vlen = Matcher::vector_length(this, $src);
19585     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19586                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19587   %}
19588   ins_pipe( pipe_slow );
19589 %}
19590 
19591 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19592   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19593             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19594              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19595             Matcher::vector_length(n->in(2)) == 2);
19596   match(Set dst (MinReductionV src1 src2));
19597   match(Set dst (MaxReductionV src1 src2));
19598   effect(TEMP dst, TEMP xtmp1);
19599   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19600   ins_encode %{
19601     int opcode = this->ideal_Opcode();
19602     int vlen = Matcher::vector_length(this, $src2);
19603     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19604                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19605   %}
19606   ins_pipe( pipe_slow );
19607 %}
19608 
19609 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19610   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19611             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19612              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19613             Matcher::vector_length(n->in(2)) >= 4);
19614   match(Set dst (MinReductionV src1 src2));
19615   match(Set dst (MaxReductionV src1 src2));
19616   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19617   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19618   ins_encode %{
19619     int opcode = this->ideal_Opcode();
19620     int vlen = Matcher::vector_length(this, $src2);
19621     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19622                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19623   %}
19624   ins_pipe( pipe_slow );
19625 %}
19626 
19627 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19628   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19629             Matcher::vector_length(n->in(2)) == 2);
19630   match(Set dst (MinReductionV dst src));
19631   match(Set dst (MaxReductionV dst src));
19632   effect(TEMP dst, TEMP xtmp1);
19633   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19634   ins_encode %{
19635     int opcode = this->ideal_Opcode();
19636     int vlen = Matcher::vector_length(this, $src);
19637     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19638                          $xtmp1$$XMMRegister);
19639   %}
19640   ins_pipe( pipe_slow );
19641 %}
19642 
19643 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19644   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19645             Matcher::vector_length(n->in(2)) >= 4);
19646   match(Set dst (MinReductionV dst src));
19647   match(Set dst (MaxReductionV dst src));
19648   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19649   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19650   ins_encode %{
19651     int opcode = this->ideal_Opcode();
19652     int vlen = Matcher::vector_length(this, $src);
19653     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19654                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19655   %}
19656   ins_pipe( pipe_slow );
19657 %}
19658 
19659 //--------------------Min Double Reduction --------------------
19660 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19661                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19662   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19663             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19664              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19665             Matcher::vector_length(n->in(2)) == 2);
19666   match(Set dst (MinReductionV src1 src2));
19667   match(Set dst (MaxReductionV src1 src2));
19668   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19669   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19670   ins_encode %{
19671     assert(UseAVX > 0, "sanity");
19672 
19673     int opcode = this->ideal_Opcode();
19674     int vlen = Matcher::vector_length(this, $src2);
19675     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19677   %}
19678   ins_pipe( pipe_slow );
19679 %}
19680 
19681 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19682                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19683   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19684             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19685              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19686             Matcher::vector_length(n->in(2)) >= 4);
19687   match(Set dst (MinReductionV src1 src2));
19688   match(Set dst (MaxReductionV src1 src2));
19689   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19690   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19691   ins_encode %{
19692     assert(UseAVX > 0, "sanity");
19693 
19694     int opcode = this->ideal_Opcode();
19695     int vlen = Matcher::vector_length(this, $src2);
19696     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19697                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19698   %}
19699   ins_pipe( pipe_slow );
19700 %}
19701 
19702 
19703 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19704                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19705   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19706             Matcher::vector_length(n->in(2)) == 2);
19707   match(Set dst (MinReductionV dst src));
19708   match(Set dst (MaxReductionV dst src));
19709   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19710   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19711   ins_encode %{
19712     assert(UseAVX > 0, "sanity");
19713 
19714     int opcode = this->ideal_Opcode();
19715     int vlen = Matcher::vector_length(this, $src);
19716     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19717                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19718   %}
19719   ins_pipe( pipe_slow );
19720 %}
19721 
19722 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19723                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19724   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19725             Matcher::vector_length(n->in(2)) >= 4);
19726   match(Set dst (MinReductionV dst src));
19727   match(Set dst (MaxReductionV dst src));
19728   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19729   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19730   ins_encode %{
19731     assert(UseAVX > 0, "sanity");
19732 
19733     int opcode = this->ideal_Opcode();
19734     int vlen = Matcher::vector_length(this, $src);
19735     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19736                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19737   %}
19738   ins_pipe( pipe_slow );
19739 %}
19740 
19741 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19742   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19743             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19744              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19745             Matcher::vector_length(n->in(2)) == 2);
19746   match(Set dst (MinReductionV src1 src2));
19747   match(Set dst (MaxReductionV src1 src2));
19748   effect(TEMP dst, TEMP xtmp1);
19749   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19750   ins_encode %{
19751     int opcode = this->ideal_Opcode();
19752     int vlen = Matcher::vector_length(this, $src2);
19753     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19754                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19755   %}
19756   ins_pipe( pipe_slow );
19757 %}
19758 
19759 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19760   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19761             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19762              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19763             Matcher::vector_length(n->in(2)) >= 4);
19764   match(Set dst (MinReductionV src1 src2));
19765   match(Set dst (MaxReductionV src1 src2));
19766   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19767   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19768   ins_encode %{
19769     int opcode = this->ideal_Opcode();
19770     int vlen = Matcher::vector_length(this, $src2);
19771     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19772                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19773   %}
19774   ins_pipe( pipe_slow );
19775 %}
19776 
19777 
19778 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19779   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19780             Matcher::vector_length(n->in(2)) == 2);
19781   match(Set dst (MinReductionV dst src));
19782   match(Set dst (MaxReductionV dst src));
19783   effect(TEMP dst, TEMP xtmp1);
19784   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19785   ins_encode %{
19786     int opcode = this->ideal_Opcode();
19787     int vlen = Matcher::vector_length(this, $src);
19788     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19789                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19790   %}
19791   ins_pipe( pipe_slow );
19792 %}
19793 
19794 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19795   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19796             Matcher::vector_length(n->in(2)) >= 4);
19797   match(Set dst (MinReductionV dst src));
19798   match(Set dst (MaxReductionV dst src));
19799   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19800   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19801   ins_encode %{
19802     int opcode = this->ideal_Opcode();
19803     int vlen = Matcher::vector_length(this, $src);
19804     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19805                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19806   %}
19807   ins_pipe( pipe_slow );
19808 %}
19809 
19810 // ====================VECTOR ARITHMETIC=======================================
19811 
19812 // --------------------------------- ADD --------------------------------------
19813 
19814 // Bytes vector add
19815 instruct vaddB(vec dst, vec src) %{
19816   predicate(UseAVX == 0);
19817   match(Set dst (AddVB dst src));
19818   format %{ "paddb   $dst,$src\t! add packedB" %}
19819   ins_encode %{
19820     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19821   %}
19822   ins_pipe( pipe_slow );
19823 %}
19824 
19825 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19826   predicate(UseAVX > 0);
19827   match(Set dst (AddVB src1 src2));
19828   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19829   ins_encode %{
19830     int vlen_enc = vector_length_encoding(this);
19831     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19832   %}
19833   ins_pipe( pipe_slow );
19834 %}
19835 
19836 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19837   predicate((UseAVX > 0) &&
19838             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19839   match(Set dst (AddVB src (LoadVector mem)));
19840   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19841   ins_encode %{
19842     int vlen_enc = vector_length_encoding(this);
19843     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19844   %}
19845   ins_pipe( pipe_slow );
19846 %}
19847 
19848 // Shorts/Chars vector add
19849 instruct vaddS(vec dst, vec src) %{
19850   predicate(UseAVX == 0);
19851   match(Set dst (AddVS dst src));
19852   format %{ "paddw   $dst,$src\t! add packedS" %}
19853   ins_encode %{
19854     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19855   %}
19856   ins_pipe( pipe_slow );
19857 %}
19858 
19859 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19860   predicate(UseAVX > 0);
19861   match(Set dst (AddVS src1 src2));
19862   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19863   ins_encode %{
19864     int vlen_enc = vector_length_encoding(this);
19865     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19866   %}
19867   ins_pipe( pipe_slow );
19868 %}
19869 
19870 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19871   predicate((UseAVX > 0) &&
19872             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19873   match(Set dst (AddVS src (LoadVector mem)));
19874   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19875   ins_encode %{
19876     int vlen_enc = vector_length_encoding(this);
19877     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19878   %}
19879   ins_pipe( pipe_slow );
19880 %}
19881 
19882 // Integers vector add
19883 instruct vaddI(vec dst, vec src) %{
19884   predicate(UseAVX == 0);
19885   match(Set dst (AddVI dst src));
19886   format %{ "paddd   $dst,$src\t! add packedI" %}
19887   ins_encode %{
19888     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19889   %}
19890   ins_pipe( pipe_slow );
19891 %}
19892 
19893 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19894   predicate(UseAVX > 0);
19895   match(Set dst (AddVI src1 src2));
19896   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19897   ins_encode %{
19898     int vlen_enc = vector_length_encoding(this);
19899     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19900   %}
19901   ins_pipe( pipe_slow );
19902 %}
19903 
19904 
19905 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19906   predicate((UseAVX > 0) &&
19907             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19908   match(Set dst (AddVI src (LoadVector mem)));
19909   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19910   ins_encode %{
19911     int vlen_enc = vector_length_encoding(this);
19912     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19913   %}
19914   ins_pipe( pipe_slow );
19915 %}
19916 
19917 // Longs vector add
19918 instruct vaddL(vec dst, vec src) %{
19919   predicate(UseAVX == 0);
19920   match(Set dst (AddVL dst src));
19921   format %{ "paddq   $dst,$src\t! add packedL" %}
19922   ins_encode %{
19923     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19924   %}
19925   ins_pipe( pipe_slow );
19926 %}
19927 
19928 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19929   predicate(UseAVX > 0);
19930   match(Set dst (AddVL src1 src2));
19931   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19932   ins_encode %{
19933     int vlen_enc = vector_length_encoding(this);
19934     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19935   %}
19936   ins_pipe( pipe_slow );
19937 %}
19938 
19939 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19940   predicate((UseAVX > 0) &&
19941             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19942   match(Set dst (AddVL src (LoadVector mem)));
19943   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19944   ins_encode %{
19945     int vlen_enc = vector_length_encoding(this);
19946     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19947   %}
19948   ins_pipe( pipe_slow );
19949 %}
19950 
19951 // Floats vector add
19952 instruct vaddF(vec dst, vec src) %{
19953   predicate(UseAVX == 0);
19954   match(Set dst (AddVF dst src));
19955   format %{ "addps   $dst,$src\t! add packedF" %}
19956   ins_encode %{
19957     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19958   %}
19959   ins_pipe( pipe_slow );
19960 %}
19961 
19962 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19963   predicate(UseAVX > 0);
19964   match(Set dst (AddVF src1 src2));
19965   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
19966   ins_encode %{
19967     int vlen_enc = vector_length_encoding(this);
19968     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19969   %}
19970   ins_pipe( pipe_slow );
19971 %}
19972 
19973 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19974   predicate((UseAVX > 0) &&
19975             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19976   match(Set dst (AddVF src (LoadVector mem)));
19977   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
19978   ins_encode %{
19979     int vlen_enc = vector_length_encoding(this);
19980     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19981   %}
19982   ins_pipe( pipe_slow );
19983 %}
19984 
19985 // Doubles vector add
19986 instruct vaddD(vec dst, vec src) %{
19987   predicate(UseAVX == 0);
19988   match(Set dst (AddVD dst src));
19989   format %{ "addpd   $dst,$src\t! add packedD" %}
19990   ins_encode %{
19991     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19992   %}
19993   ins_pipe( pipe_slow );
19994 %}
19995 
19996 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19997   predicate(UseAVX > 0);
19998   match(Set dst (AddVD src1 src2));
19999   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20000   ins_encode %{
20001     int vlen_enc = vector_length_encoding(this);
20002     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20003   %}
20004   ins_pipe( pipe_slow );
20005 %}
20006 
20007 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20008   predicate((UseAVX > 0) &&
20009             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20010   match(Set dst (AddVD src (LoadVector mem)));
20011   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20012   ins_encode %{
20013     int vlen_enc = vector_length_encoding(this);
20014     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20015   %}
20016   ins_pipe( pipe_slow );
20017 %}
20018 
20019 // --------------------------------- SUB --------------------------------------
20020 
20021 // Bytes vector sub
20022 instruct vsubB(vec dst, vec src) %{
20023   predicate(UseAVX == 0);
20024   match(Set dst (SubVB dst src));
20025   format %{ "psubb   $dst,$src\t! sub packedB" %}
20026   ins_encode %{
20027     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20028   %}
20029   ins_pipe( pipe_slow );
20030 %}
20031 
20032 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20033   predicate(UseAVX > 0);
20034   match(Set dst (SubVB src1 src2));
20035   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20036   ins_encode %{
20037     int vlen_enc = vector_length_encoding(this);
20038     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20039   %}
20040   ins_pipe( pipe_slow );
20041 %}
20042 
20043 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20044   predicate((UseAVX > 0) &&
20045             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20046   match(Set dst (SubVB src (LoadVector mem)));
20047   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20048   ins_encode %{
20049     int vlen_enc = vector_length_encoding(this);
20050     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20051   %}
20052   ins_pipe( pipe_slow );
20053 %}
20054 
20055 // Shorts/Chars vector sub
20056 instruct vsubS(vec dst, vec src) %{
20057   predicate(UseAVX == 0);
20058   match(Set dst (SubVS dst src));
20059   format %{ "psubw   $dst,$src\t! sub packedS" %}
20060   ins_encode %{
20061     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20062   %}
20063   ins_pipe( pipe_slow );
20064 %}
20065 
20066 
20067 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20068   predicate(UseAVX > 0);
20069   match(Set dst (SubVS src1 src2));
20070   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20071   ins_encode %{
20072     int vlen_enc = vector_length_encoding(this);
20073     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20074   %}
20075   ins_pipe( pipe_slow );
20076 %}
20077 
20078 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20079   predicate((UseAVX > 0) &&
20080             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20081   match(Set dst (SubVS src (LoadVector mem)));
20082   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20083   ins_encode %{
20084     int vlen_enc = vector_length_encoding(this);
20085     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20086   %}
20087   ins_pipe( pipe_slow );
20088 %}
20089 
20090 // Integers vector sub
20091 instruct vsubI(vec dst, vec src) %{
20092   predicate(UseAVX == 0);
20093   match(Set dst (SubVI dst src));
20094   format %{ "psubd   $dst,$src\t! sub packedI" %}
20095   ins_encode %{
20096     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20097   %}
20098   ins_pipe( pipe_slow );
20099 %}
20100 
20101 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20102   predicate(UseAVX > 0);
20103   match(Set dst (SubVI src1 src2));
20104   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20105   ins_encode %{
20106     int vlen_enc = vector_length_encoding(this);
20107     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20108   %}
20109   ins_pipe( pipe_slow );
20110 %}
20111 
20112 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20113   predicate((UseAVX > 0) &&
20114             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20115   match(Set dst (SubVI src (LoadVector mem)));
20116   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20117   ins_encode %{
20118     int vlen_enc = vector_length_encoding(this);
20119     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20120   %}
20121   ins_pipe( pipe_slow );
20122 %}
20123 
20124 // Longs vector sub
20125 instruct vsubL(vec dst, vec src) %{
20126   predicate(UseAVX == 0);
20127   match(Set dst (SubVL dst src));
20128   format %{ "psubq   $dst,$src\t! sub packedL" %}
20129   ins_encode %{
20130     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20131   %}
20132   ins_pipe( pipe_slow );
20133 %}
20134 
20135 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20136   predicate(UseAVX > 0);
20137   match(Set dst (SubVL src1 src2));
20138   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20139   ins_encode %{
20140     int vlen_enc = vector_length_encoding(this);
20141     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20142   %}
20143   ins_pipe( pipe_slow );
20144 %}
20145 
20146 
20147 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20148   predicate((UseAVX > 0) &&
20149             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20150   match(Set dst (SubVL src (LoadVector mem)));
20151   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20152   ins_encode %{
20153     int vlen_enc = vector_length_encoding(this);
20154     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20155   %}
20156   ins_pipe( pipe_slow );
20157 %}
20158 
20159 // Floats vector sub
20160 instruct vsubF(vec dst, vec src) %{
20161   predicate(UseAVX == 0);
20162   match(Set dst (SubVF dst src));
20163   format %{ "subps   $dst,$src\t! sub packedF" %}
20164   ins_encode %{
20165     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20166   %}
20167   ins_pipe( pipe_slow );
20168 %}
20169 
20170 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20171   predicate(UseAVX > 0);
20172   match(Set dst (SubVF src1 src2));
20173   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20174   ins_encode %{
20175     int vlen_enc = vector_length_encoding(this);
20176     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20177   %}
20178   ins_pipe( pipe_slow );
20179 %}
20180 
20181 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20182   predicate((UseAVX > 0) &&
20183             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20184   match(Set dst (SubVF src (LoadVector mem)));
20185   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20186   ins_encode %{
20187     int vlen_enc = vector_length_encoding(this);
20188     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20189   %}
20190   ins_pipe( pipe_slow );
20191 %}
20192 
20193 // Doubles vector sub
20194 instruct vsubD(vec dst, vec src) %{
20195   predicate(UseAVX == 0);
20196   match(Set dst (SubVD dst src));
20197   format %{ "subpd   $dst,$src\t! sub packedD" %}
20198   ins_encode %{
20199     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20200   %}
20201   ins_pipe( pipe_slow );
20202 %}
20203 
20204 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20205   predicate(UseAVX > 0);
20206   match(Set dst (SubVD src1 src2));
20207   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20208   ins_encode %{
20209     int vlen_enc = vector_length_encoding(this);
20210     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20211   %}
20212   ins_pipe( pipe_slow );
20213 %}
20214 
20215 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20216   predicate((UseAVX > 0) &&
20217             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20218   match(Set dst (SubVD src (LoadVector mem)));
20219   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20220   ins_encode %{
20221     int vlen_enc = vector_length_encoding(this);
20222     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20223   %}
20224   ins_pipe( pipe_slow );
20225 %}
20226 
20227 // --------------------------------- MUL --------------------------------------
20228 
20229 // Byte vector mul
20230 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20231   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20232   match(Set dst (MulVB src1 src2));
20233   effect(TEMP dst, TEMP xtmp);
20234   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20235   ins_encode %{
20236     assert(UseSSE > 3, "required");
20237     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20238     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20239     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20240     __ psllw($dst$$XMMRegister, 8);
20241     __ psrlw($dst$$XMMRegister, 8);
20242     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20243   %}
20244   ins_pipe( pipe_slow );
20245 %}
20246 
20247 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20248   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20249   match(Set dst (MulVB src1 src2));
20250   effect(TEMP dst, TEMP xtmp);
20251   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20252   ins_encode %{
20253     assert(UseSSE > 3, "required");
20254     // Odd-index elements
20255     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20256     __ psrlw($dst$$XMMRegister, 8);
20257     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20258     __ psrlw($xtmp$$XMMRegister, 8);
20259     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20260     __ psllw($dst$$XMMRegister, 8);
20261     // Even-index elements
20262     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20263     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20264     __ psllw($xtmp$$XMMRegister, 8);
20265     __ psrlw($xtmp$$XMMRegister, 8);
20266     // Combine
20267     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20268   %}
20269   ins_pipe( pipe_slow );
20270 %}
20271 
20272 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20273   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20274   match(Set dst (MulVB src1 src2));
20275   effect(TEMP xtmp1, TEMP xtmp2);
20276   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20277   ins_encode %{
20278     int vlen_enc = vector_length_encoding(this);
20279     // Odd-index elements
20280     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20281     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20282     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20283     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20284     // Even-index elements
20285     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20286     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20287     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20288     // Combine
20289     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20290   %}
20291   ins_pipe( pipe_slow );
20292 %}
20293 
20294 // Shorts/Chars vector mul
20295 instruct vmulS(vec dst, vec src) %{
20296   predicate(UseAVX == 0);
20297   match(Set dst (MulVS dst src));
20298   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20299   ins_encode %{
20300     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20301   %}
20302   ins_pipe( pipe_slow );
20303 %}
20304 
20305 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20306   predicate(UseAVX > 0);
20307   match(Set dst (MulVS src1 src2));
20308   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20309   ins_encode %{
20310     int vlen_enc = vector_length_encoding(this);
20311     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20312   %}
20313   ins_pipe( pipe_slow );
20314 %}
20315 
20316 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20317   predicate((UseAVX > 0) &&
20318             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20319   match(Set dst (MulVS src (LoadVector mem)));
20320   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20321   ins_encode %{
20322     int vlen_enc = vector_length_encoding(this);
20323     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20324   %}
20325   ins_pipe( pipe_slow );
20326 %}
20327 
20328 // Integers vector mul
20329 instruct vmulI(vec dst, vec src) %{
20330   predicate(UseAVX == 0);
20331   match(Set dst (MulVI dst src));
20332   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20333   ins_encode %{
20334     assert(UseSSE > 3, "required");
20335     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20336   %}
20337   ins_pipe( pipe_slow );
20338 %}
20339 
20340 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20341   predicate(UseAVX > 0);
20342   match(Set dst (MulVI src1 src2));
20343   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20344   ins_encode %{
20345     int vlen_enc = vector_length_encoding(this);
20346     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20347   %}
20348   ins_pipe( pipe_slow );
20349 %}
20350 
20351 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20352   predicate((UseAVX > 0) &&
20353             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20354   match(Set dst (MulVI src (LoadVector mem)));
20355   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20356   ins_encode %{
20357     int vlen_enc = vector_length_encoding(this);
20358     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20359   %}
20360   ins_pipe( pipe_slow );
20361 %}
20362 
20363 // Longs vector mul
20364 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20365   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20366              VM_Version::supports_avx512dq()) ||
20367             VM_Version::supports_avx512vldq());
20368   match(Set dst (MulVL src1 src2));
20369   ins_cost(500);
20370   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20371   ins_encode %{
20372     assert(UseAVX > 2, "required");
20373     int vlen_enc = vector_length_encoding(this);
20374     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20375   %}
20376   ins_pipe( pipe_slow );
20377 %}
20378 
20379 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20380   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20381              VM_Version::supports_avx512dq()) ||
20382             (Matcher::vector_length_in_bytes(n) > 8 &&
20383              VM_Version::supports_avx512vldq()));
20384   match(Set dst (MulVL src (LoadVector mem)));
20385   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20386   ins_cost(500);
20387   ins_encode %{
20388     assert(UseAVX > 2, "required");
20389     int vlen_enc = vector_length_encoding(this);
20390     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20391   %}
20392   ins_pipe( pipe_slow );
20393 %}
20394 
20395 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20396   predicate(UseAVX == 0);
20397   match(Set dst (MulVL src1 src2));
20398   ins_cost(500);
20399   effect(TEMP dst, TEMP xtmp);
20400   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20401   ins_encode %{
20402     assert(VM_Version::supports_sse4_1(), "required");
20403     // Get the lo-hi products, only the lower 32 bits is in concerns
20404     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20405     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20406     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20407     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20408     __ psllq($dst$$XMMRegister, 32);
20409     // Get the lo-lo products
20410     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20411     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20412     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20413   %}
20414   ins_pipe( pipe_slow );
20415 %}
20416 
20417 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20418   predicate(UseAVX > 0 &&
20419             ((Matcher::vector_length_in_bytes(n) == 64 &&
20420               !VM_Version::supports_avx512dq()) ||
20421              (Matcher::vector_length_in_bytes(n) < 64 &&
20422               !VM_Version::supports_avx512vldq())));
20423   match(Set dst (MulVL src1 src2));
20424   effect(TEMP xtmp1, TEMP xtmp2);
20425   ins_cost(500);
20426   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20427   ins_encode %{
20428     int vlen_enc = vector_length_encoding(this);
20429     // Get the lo-hi products, only the lower 32 bits is in concerns
20430     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20431     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20432     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20433     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20434     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20435     // Get the lo-lo products
20436     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20437     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20438   %}
20439   ins_pipe( pipe_slow );
20440 %}
20441 
20442 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20443   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20444   match(Set dst (MulVL src1 src2));
20445   ins_cost(100);
20446   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20447   ins_encode %{
20448     int vlen_enc = vector_length_encoding(this);
20449     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20450   %}
20451   ins_pipe( pipe_slow );
20452 %}
20453 
20454 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20455   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20456   match(Set dst (MulVL src1 src2));
20457   ins_cost(100);
20458   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20459   ins_encode %{
20460     int vlen_enc = vector_length_encoding(this);
20461     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20462   %}
20463   ins_pipe( pipe_slow );
20464 %}
20465 
20466 // Floats vector mul
20467 instruct vmulF(vec dst, vec src) %{
20468   predicate(UseAVX == 0);
20469   match(Set dst (MulVF dst src));
20470   format %{ "mulps   $dst,$src\t! mul packedF" %}
20471   ins_encode %{
20472     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20473   %}
20474   ins_pipe( pipe_slow );
20475 %}
20476 
20477 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20478   predicate(UseAVX > 0);
20479   match(Set dst (MulVF src1 src2));
20480   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20481   ins_encode %{
20482     int vlen_enc = vector_length_encoding(this);
20483     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20484   %}
20485   ins_pipe( pipe_slow );
20486 %}
20487 
20488 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20489   predicate((UseAVX > 0) &&
20490             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20491   match(Set dst (MulVF src (LoadVector mem)));
20492   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20493   ins_encode %{
20494     int vlen_enc = vector_length_encoding(this);
20495     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20496   %}
20497   ins_pipe( pipe_slow );
20498 %}
20499 
20500 // Doubles vector mul
20501 instruct vmulD(vec dst, vec src) %{
20502   predicate(UseAVX == 0);
20503   match(Set dst (MulVD dst src));
20504   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20505   ins_encode %{
20506     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20507   %}
20508   ins_pipe( pipe_slow );
20509 %}
20510 
20511 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20512   predicate(UseAVX > 0);
20513   match(Set dst (MulVD src1 src2));
20514   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20515   ins_encode %{
20516     int vlen_enc = vector_length_encoding(this);
20517     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20518   %}
20519   ins_pipe( pipe_slow );
20520 %}
20521 
20522 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20523   predicate((UseAVX > 0) &&
20524             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20525   match(Set dst (MulVD src (LoadVector mem)));
20526   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20527   ins_encode %{
20528     int vlen_enc = vector_length_encoding(this);
20529     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20530   %}
20531   ins_pipe( pipe_slow );
20532 %}
20533 
20534 // --------------------------------- DIV --------------------------------------
20535 
20536 // Floats vector div
20537 instruct vdivF(vec dst, vec src) %{
20538   predicate(UseAVX == 0);
20539   match(Set dst (DivVF dst src));
20540   format %{ "divps   $dst,$src\t! div packedF" %}
20541   ins_encode %{
20542     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20543   %}
20544   ins_pipe( pipe_slow );
20545 %}
20546 
20547 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20548   predicate(UseAVX > 0);
20549   match(Set dst (DivVF src1 src2));
20550   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20551   ins_encode %{
20552     int vlen_enc = vector_length_encoding(this);
20553     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20554   %}
20555   ins_pipe( pipe_slow );
20556 %}
20557 
20558 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20559   predicate((UseAVX > 0) &&
20560             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20561   match(Set dst (DivVF src (LoadVector mem)));
20562   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20563   ins_encode %{
20564     int vlen_enc = vector_length_encoding(this);
20565     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20566   %}
20567   ins_pipe( pipe_slow );
20568 %}
20569 
20570 // Doubles vector div
20571 instruct vdivD(vec dst, vec src) %{
20572   predicate(UseAVX == 0);
20573   match(Set dst (DivVD dst src));
20574   format %{ "divpd   $dst,$src\t! div packedD" %}
20575   ins_encode %{
20576     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20577   %}
20578   ins_pipe( pipe_slow );
20579 %}
20580 
20581 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20582   predicate(UseAVX > 0);
20583   match(Set dst (DivVD src1 src2));
20584   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20585   ins_encode %{
20586     int vlen_enc = vector_length_encoding(this);
20587     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20588   %}
20589   ins_pipe( pipe_slow );
20590 %}
20591 
20592 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20593   predicate((UseAVX > 0) &&
20594             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20595   match(Set dst (DivVD src (LoadVector mem)));
20596   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20597   ins_encode %{
20598     int vlen_enc = vector_length_encoding(this);
20599     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20600   %}
20601   ins_pipe( pipe_slow );
20602 %}
20603 
20604 // ------------------------------ MinMax ---------------------------------------
20605 
20606 // Byte, Short, Int vector Min/Max
20607 instruct minmax_reg_sse(vec dst, vec src) %{
20608   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20609             UseAVX == 0);
20610   match(Set dst (MinV dst src));
20611   match(Set dst (MaxV dst src));
20612   format %{ "vector_minmax  $dst,$src\t!  " %}
20613   ins_encode %{
20614     assert(UseSSE >= 4, "required");
20615 
20616     int opcode = this->ideal_Opcode();
20617     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20618     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20619   %}
20620   ins_pipe( pipe_slow );
20621 %}
20622 
20623 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20624   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20625             UseAVX > 0);
20626   match(Set dst (MinV src1 src2));
20627   match(Set dst (MaxV src1 src2));
20628   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20629   ins_encode %{
20630     int opcode = this->ideal_Opcode();
20631     int vlen_enc = vector_length_encoding(this);
20632     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20633 
20634     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20635   %}
20636   ins_pipe( pipe_slow );
20637 %}
20638 
20639 // Long vector Min/Max
20640 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20641   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20642             UseAVX == 0);
20643   match(Set dst (MinV dst src));
20644   match(Set dst (MaxV src dst));
20645   effect(TEMP dst, TEMP tmp);
20646   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20647   ins_encode %{
20648     assert(UseSSE >= 4, "required");
20649 
20650     int opcode = this->ideal_Opcode();
20651     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20652     assert(elem_bt == T_LONG, "sanity");
20653 
20654     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20655   %}
20656   ins_pipe( pipe_slow );
20657 %}
20658 
20659 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20660   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20661             UseAVX > 0 && !VM_Version::supports_avx512vl());
20662   match(Set dst (MinV src1 src2));
20663   match(Set dst (MaxV src1 src2));
20664   effect(TEMP dst);
20665   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20666   ins_encode %{
20667     int vlen_enc = vector_length_encoding(this);
20668     int opcode = this->ideal_Opcode();
20669     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20670     assert(elem_bt == T_LONG, "sanity");
20671 
20672     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20673   %}
20674   ins_pipe( pipe_slow );
20675 %}
20676 
20677 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20678   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20679             Matcher::vector_element_basic_type(n) == T_LONG);
20680   match(Set dst (MinV src1 src2));
20681   match(Set dst (MaxV src1 src2));
20682   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20683   ins_encode %{
20684     assert(UseAVX > 2, "required");
20685 
20686     int vlen_enc = vector_length_encoding(this);
20687     int opcode = this->ideal_Opcode();
20688     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20689     assert(elem_bt == T_LONG, "sanity");
20690 
20691     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20692   %}
20693   ins_pipe( pipe_slow );
20694 %}
20695 
20696 // Float/Double vector Min/Max
20697 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20698   predicate(VM_Version::supports_avx10_2() &&
20699             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20700   match(Set dst (MinV a b));
20701   match(Set dst (MaxV a b));
20702   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20703   ins_encode %{
20704     int vlen_enc = vector_length_encoding(this);
20705     int opcode = this->ideal_Opcode();
20706     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20707     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20708   %}
20709   ins_pipe( pipe_slow );
20710 %}
20711 
20712 // Float/Double vector Min/Max
20713 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20714   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20715             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20716             UseAVX > 0);
20717   match(Set dst (MinV a b));
20718   match(Set dst (MaxV a b));
20719   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20720   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20721   ins_encode %{
20722     assert(UseAVX > 0, "required");
20723 
20724     int opcode = this->ideal_Opcode();
20725     int vlen_enc = vector_length_encoding(this);
20726     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20727 
20728     __ vminmax_fp(opcode, elem_bt,
20729                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20730                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20731   %}
20732   ins_pipe( pipe_slow );
20733 %}
20734 
20735 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20736   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20737             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20738   match(Set dst (MinV a b));
20739   match(Set dst (MaxV a b));
20740   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20741   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20742   ins_encode %{
20743     assert(UseAVX > 2, "required");
20744 
20745     int opcode = this->ideal_Opcode();
20746     int vlen_enc = vector_length_encoding(this);
20747     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20748 
20749     __ evminmax_fp(opcode, elem_bt,
20750                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20751                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20752   %}
20753   ins_pipe( pipe_slow );
20754 %}
20755 
20756 // ------------------------------ Unsigned vector Min/Max ----------------------
20757 
20758 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20759   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20760   match(Set dst (UMinV a b));
20761   match(Set dst (UMaxV a b));
20762   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20763   ins_encode %{
20764     int opcode = this->ideal_Opcode();
20765     int vlen_enc = vector_length_encoding(this);
20766     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20767     assert(is_integral_type(elem_bt), "");
20768     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20769   %}
20770   ins_pipe( pipe_slow );
20771 %}
20772 
20773 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20774   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20775   match(Set dst (UMinV a (LoadVector b)));
20776   match(Set dst (UMaxV a (LoadVector b)));
20777   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20778   ins_encode %{
20779     int opcode = this->ideal_Opcode();
20780     int vlen_enc = vector_length_encoding(this);
20781     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20782     assert(is_integral_type(elem_bt), "");
20783     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20784   %}
20785   ins_pipe( pipe_slow );
20786 %}
20787 
20788 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20789   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20790   match(Set dst (UMinV a b));
20791   match(Set dst (UMaxV a b));
20792   effect(TEMP xtmp1, TEMP xtmp2);
20793   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20794   ins_encode %{
20795     int opcode = this->ideal_Opcode();
20796     int vlen_enc = vector_length_encoding(this);
20797     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20798   %}
20799   ins_pipe( pipe_slow );
20800 %}
20801 
20802 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20803   match(Set dst (UMinV (Binary dst src2) mask));
20804   match(Set dst (UMaxV (Binary dst src2) mask));
20805   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20806   ins_encode %{
20807     int vlen_enc = vector_length_encoding(this);
20808     BasicType bt = Matcher::vector_element_basic_type(this);
20809     int opc = this->ideal_Opcode();
20810     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20811                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20812   %}
20813   ins_pipe( pipe_slow );
20814 %}
20815 
20816 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20817   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20818   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20819   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20820   ins_encode %{
20821     int vlen_enc = vector_length_encoding(this);
20822     BasicType bt = Matcher::vector_element_basic_type(this);
20823     int opc = this->ideal_Opcode();
20824     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20825                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20826   %}
20827   ins_pipe( pipe_slow );
20828 %}
20829 
20830 // --------------------------------- Signum/CopySign ---------------------------
20831 
20832 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20833   match(Set dst (SignumF dst (Binary zero one)));
20834   effect(KILL cr);
20835   format %{ "signumF $dst, $dst" %}
20836   ins_encode %{
20837     int opcode = this->ideal_Opcode();
20838     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20839   %}
20840   ins_pipe( pipe_slow );
20841 %}
20842 
20843 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20844   match(Set dst (SignumD dst (Binary zero one)));
20845   effect(KILL cr);
20846   format %{ "signumD $dst, $dst" %}
20847   ins_encode %{
20848     int opcode = this->ideal_Opcode();
20849     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20850   %}
20851   ins_pipe( pipe_slow );
20852 %}
20853 
20854 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20855   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20856   match(Set dst (SignumVF src (Binary zero one)));
20857   match(Set dst (SignumVD src (Binary zero one)));
20858   effect(TEMP dst, TEMP xtmp1);
20859   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20860   ins_encode %{
20861     int opcode = this->ideal_Opcode();
20862     int vec_enc = vector_length_encoding(this);
20863     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20864                          $xtmp1$$XMMRegister, vec_enc);
20865   %}
20866   ins_pipe( pipe_slow );
20867 %}
20868 
20869 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20870   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20871   match(Set dst (SignumVF src (Binary zero one)));
20872   match(Set dst (SignumVD src (Binary zero one)));
20873   effect(TEMP dst, TEMP ktmp1);
20874   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20875   ins_encode %{
20876     int opcode = this->ideal_Opcode();
20877     int vec_enc = vector_length_encoding(this);
20878     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20879                           $ktmp1$$KRegister, vec_enc);
20880   %}
20881   ins_pipe( pipe_slow );
20882 %}
20883 
20884 // ---------------------------------------
20885 // For copySign use 0xE4 as writemask for vpternlog
20886 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20887 // C (xmm2) is set to 0x7FFFFFFF
20888 // Wherever xmm2 is 0, we want to pick from B (sign)
20889 // Wherever xmm2 is 1, we want to pick from A (src)
20890 //
20891 // A B C Result
20892 // 0 0 0 0
20893 // 0 0 1 0
20894 // 0 1 0 1
20895 // 0 1 1 0
20896 // 1 0 0 0
20897 // 1 0 1 1
20898 // 1 1 0 1
20899 // 1 1 1 1
20900 //
20901 // Result going from high bit to low bit is 0x11100100 = 0xe4
20902 // ---------------------------------------
20903 
20904 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20905   match(Set dst (CopySignF dst src));
20906   effect(TEMP tmp1, TEMP tmp2);
20907   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20908   ins_encode %{
20909     __ movl($tmp2$$Register, 0x7FFFFFFF);
20910     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20911     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20912   %}
20913   ins_pipe( pipe_slow );
20914 %}
20915 
20916 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20917   match(Set dst (CopySignD dst (Binary src zero)));
20918   ins_cost(100);
20919   effect(TEMP tmp1, TEMP tmp2);
20920   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20921   ins_encode %{
20922     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20923     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20924     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20925   %}
20926   ins_pipe( pipe_slow );
20927 %}
20928 
20929 //----------------------------- CompressBits/ExpandBits ------------------------
20930 
20931 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20932   predicate(n->bottom_type()->isa_int());
20933   match(Set dst (CompressBits src mask));
20934   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20935   ins_encode %{
20936     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20937   %}
20938   ins_pipe( pipe_slow );
20939 %}
20940 
20941 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20942   predicate(n->bottom_type()->isa_int());
20943   match(Set dst (ExpandBits src mask));
20944   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20945   ins_encode %{
20946     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20947   %}
20948   ins_pipe( pipe_slow );
20949 %}
20950 
20951 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20952   predicate(n->bottom_type()->isa_int());
20953   match(Set dst (CompressBits src (LoadI mask)));
20954   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20955   ins_encode %{
20956     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20957   %}
20958   ins_pipe( pipe_slow );
20959 %}
20960 
20961 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20962   predicate(n->bottom_type()->isa_int());
20963   match(Set dst (ExpandBits src (LoadI mask)));
20964   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20965   ins_encode %{
20966     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20967   %}
20968   ins_pipe( pipe_slow );
20969 %}
20970 
20971 // --------------------------------- Sqrt --------------------------------------
20972 
20973 instruct vsqrtF_reg(vec dst, vec src) %{
20974   match(Set dst (SqrtVF src));
20975   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
20976   ins_encode %{
20977     assert(UseAVX > 0, "required");
20978     int vlen_enc = vector_length_encoding(this);
20979     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20980   %}
20981   ins_pipe( pipe_slow );
20982 %}
20983 
20984 instruct vsqrtF_mem(vec dst, memory mem) %{
20985   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20986   match(Set dst (SqrtVF (LoadVector mem)));
20987   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
20988   ins_encode %{
20989     assert(UseAVX > 0, "required");
20990     int vlen_enc = vector_length_encoding(this);
20991     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20992   %}
20993   ins_pipe( pipe_slow );
20994 %}
20995 
20996 // Floating point vector sqrt
20997 instruct vsqrtD_reg(vec dst, vec src) %{
20998   match(Set dst (SqrtVD src));
20999   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21000   ins_encode %{
21001     assert(UseAVX > 0, "required");
21002     int vlen_enc = vector_length_encoding(this);
21003     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21004   %}
21005   ins_pipe( pipe_slow );
21006 %}
21007 
21008 instruct vsqrtD_mem(vec dst, memory mem) %{
21009   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21010   match(Set dst (SqrtVD (LoadVector mem)));
21011   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21012   ins_encode %{
21013     assert(UseAVX > 0, "required");
21014     int vlen_enc = vector_length_encoding(this);
21015     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21016   %}
21017   ins_pipe( pipe_slow );
21018 %}
21019 
21020 // ------------------------------ Shift ---------------------------------------
21021 
21022 // Left and right shift count vectors are the same on x86
21023 // (only lowest bits of xmm reg are used for count).
21024 instruct vshiftcnt(vec dst, rRegI cnt) %{
21025   match(Set dst (LShiftCntV cnt));
21026   match(Set dst (RShiftCntV cnt));
21027   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21028   ins_encode %{
21029     __ movdl($dst$$XMMRegister, $cnt$$Register);
21030   %}
21031   ins_pipe( pipe_slow );
21032 %}
21033 
21034 // Byte vector shift
21035 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21036   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21037   match(Set dst ( LShiftVB src shift));
21038   match(Set dst ( RShiftVB src shift));
21039   match(Set dst (URShiftVB src shift));
21040   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21041   format %{"vector_byte_shift $dst,$src,$shift" %}
21042   ins_encode %{
21043     assert(UseSSE > 3, "required");
21044     int opcode = this->ideal_Opcode();
21045     bool sign = (opcode != Op_URShiftVB);
21046     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21047     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21048     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21049     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21050     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21051   %}
21052   ins_pipe( pipe_slow );
21053 %}
21054 
21055 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21056   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21057             UseAVX <= 1);
21058   match(Set dst ( LShiftVB src shift));
21059   match(Set dst ( RShiftVB src shift));
21060   match(Set dst (URShiftVB src shift));
21061   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21062   format %{"vector_byte_shift $dst,$src,$shift" %}
21063   ins_encode %{
21064     assert(UseSSE > 3, "required");
21065     int opcode = this->ideal_Opcode();
21066     bool sign = (opcode != Op_URShiftVB);
21067     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21068     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21069     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21070     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21071     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21072     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21073     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21074     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21075     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21076   %}
21077   ins_pipe( pipe_slow );
21078 %}
21079 
21080 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21081   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21082             UseAVX > 1);
21083   match(Set dst ( LShiftVB src shift));
21084   match(Set dst ( RShiftVB src shift));
21085   match(Set dst (URShiftVB src shift));
21086   effect(TEMP dst, TEMP tmp);
21087   format %{"vector_byte_shift $dst,$src,$shift" %}
21088   ins_encode %{
21089     int opcode = this->ideal_Opcode();
21090     bool sign = (opcode != Op_URShiftVB);
21091     int vlen_enc = Assembler::AVX_256bit;
21092     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21093     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21094     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21095     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21096     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21097   %}
21098   ins_pipe( pipe_slow );
21099 %}
21100 
21101 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21102   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21103   match(Set dst ( LShiftVB src shift));
21104   match(Set dst ( RShiftVB src shift));
21105   match(Set dst (URShiftVB src shift));
21106   effect(TEMP dst, TEMP tmp);
21107   format %{"vector_byte_shift $dst,$src,$shift" %}
21108   ins_encode %{
21109     assert(UseAVX > 1, "required");
21110     int opcode = this->ideal_Opcode();
21111     bool sign = (opcode != Op_URShiftVB);
21112     int vlen_enc = Assembler::AVX_256bit;
21113     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21114     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21115     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21116     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21117     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21118     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21119     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21120     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21121     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21122   %}
21123   ins_pipe( pipe_slow );
21124 %}
21125 
21126 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21127   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21128   match(Set dst ( LShiftVB src shift));
21129   match(Set dst  (RShiftVB src shift));
21130   match(Set dst (URShiftVB src shift));
21131   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21132   format %{"vector_byte_shift $dst,$src,$shift" %}
21133   ins_encode %{
21134     assert(UseAVX > 2, "required");
21135     int opcode = this->ideal_Opcode();
21136     bool sign = (opcode != Op_URShiftVB);
21137     int vlen_enc = Assembler::AVX_512bit;
21138     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21139     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21140     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21141     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21142     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21143     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21144     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21145     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21146     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21147     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21148     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21149     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21150   %}
21151   ins_pipe( pipe_slow );
21152 %}
21153 
21154 // Shorts vector logical right shift produces incorrect Java result
21155 // for negative data because java code convert short value into int with
21156 // sign extension before a shift. But char vectors are fine since chars are
21157 // unsigned values.
21158 // Shorts/Chars vector left shift
21159 instruct vshiftS(vec dst, vec src, vec shift) %{
21160   predicate(!n->as_ShiftV()->is_var_shift());
21161   match(Set dst ( LShiftVS src shift));
21162   match(Set dst ( RShiftVS src shift));
21163   match(Set dst (URShiftVS src shift));
21164   effect(TEMP dst, USE src, USE shift);
21165   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21166   ins_encode %{
21167     int opcode = this->ideal_Opcode();
21168     if (UseAVX > 0) {
21169       int vlen_enc = vector_length_encoding(this);
21170       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21171     } else {
21172       int vlen = Matcher::vector_length(this);
21173       if (vlen == 2) {
21174         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21175         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21176       } else if (vlen == 4) {
21177         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21178         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21179       } else {
21180         assert (vlen == 8, "sanity");
21181         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21182         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21183       }
21184     }
21185   %}
21186   ins_pipe( pipe_slow );
21187 %}
21188 
21189 // Integers vector left shift
21190 instruct vshiftI(vec dst, vec src, vec shift) %{
21191   predicate(!n->as_ShiftV()->is_var_shift());
21192   match(Set dst ( LShiftVI src shift));
21193   match(Set dst ( RShiftVI src shift));
21194   match(Set dst (URShiftVI src shift));
21195   effect(TEMP dst, USE src, USE shift);
21196   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21197   ins_encode %{
21198     int opcode = this->ideal_Opcode();
21199     if (UseAVX > 0) {
21200       int vlen_enc = vector_length_encoding(this);
21201       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21202     } else {
21203       int vlen = Matcher::vector_length(this);
21204       if (vlen == 2) {
21205         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21206         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21207       } else {
21208         assert(vlen == 4, "sanity");
21209         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21210         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21211       }
21212     }
21213   %}
21214   ins_pipe( pipe_slow );
21215 %}
21216 
21217 // Integers vector left constant shift
21218 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21219   match(Set dst (LShiftVI src (LShiftCntV shift)));
21220   match(Set dst (RShiftVI src (RShiftCntV shift)));
21221   match(Set dst (URShiftVI src (RShiftCntV shift)));
21222   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21223   ins_encode %{
21224     int opcode = this->ideal_Opcode();
21225     if (UseAVX > 0) {
21226       int vector_len = vector_length_encoding(this);
21227       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21228     } else {
21229       int vlen = Matcher::vector_length(this);
21230       if (vlen == 2) {
21231         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21232         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21233       } else {
21234         assert(vlen == 4, "sanity");
21235         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21236         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21237       }
21238     }
21239   %}
21240   ins_pipe( pipe_slow );
21241 %}
21242 
21243 // Longs vector shift
21244 instruct vshiftL(vec dst, vec src, vec shift) %{
21245   predicate(!n->as_ShiftV()->is_var_shift());
21246   match(Set dst ( LShiftVL src shift));
21247   match(Set dst (URShiftVL src shift));
21248   effect(TEMP dst, USE src, USE shift);
21249   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21250   ins_encode %{
21251     int opcode = this->ideal_Opcode();
21252     if (UseAVX > 0) {
21253       int vlen_enc = vector_length_encoding(this);
21254       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21255     } else {
21256       assert(Matcher::vector_length(this) == 2, "");
21257       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21258       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21259     }
21260   %}
21261   ins_pipe( pipe_slow );
21262 %}
21263 
21264 // Longs vector constant shift
21265 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21266   match(Set dst (LShiftVL src (LShiftCntV shift)));
21267   match(Set dst (URShiftVL src (RShiftCntV shift)));
21268   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21269   ins_encode %{
21270     int opcode = this->ideal_Opcode();
21271     if (UseAVX > 0) {
21272       int vector_len = vector_length_encoding(this);
21273       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21274     } else {
21275       assert(Matcher::vector_length(this) == 2, "");
21276       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21277       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21278     }
21279   %}
21280   ins_pipe( pipe_slow );
21281 %}
21282 
21283 // -------------------ArithmeticRightShift -----------------------------------
21284 // Long vector arithmetic right shift
21285 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21286   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21287   match(Set dst (RShiftVL src shift));
21288   effect(TEMP dst, TEMP tmp);
21289   format %{ "vshiftq $dst,$src,$shift" %}
21290   ins_encode %{
21291     uint vlen = Matcher::vector_length(this);
21292     if (vlen == 2) {
21293       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21294       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21295       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21296       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21297       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21298       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21299     } else {
21300       assert(vlen == 4, "sanity");
21301       assert(UseAVX > 1, "required");
21302       int vlen_enc = Assembler::AVX_256bit;
21303       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21304       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21305       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21306       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21307       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21308     }
21309   %}
21310   ins_pipe( pipe_slow );
21311 %}
21312 
21313 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21314   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21315   match(Set dst (RShiftVL src shift));
21316   format %{ "vshiftq $dst,$src,$shift" %}
21317   ins_encode %{
21318     int vlen_enc = vector_length_encoding(this);
21319     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21320   %}
21321   ins_pipe( pipe_slow );
21322 %}
21323 
21324 // ------------------- Variable Shift -----------------------------
21325 // Byte variable shift
21326 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21327   predicate(Matcher::vector_length(n) <= 8 &&
21328             n->as_ShiftV()->is_var_shift() &&
21329             !VM_Version::supports_avx512bw());
21330   match(Set dst ( LShiftVB src shift));
21331   match(Set dst ( RShiftVB src shift));
21332   match(Set dst (URShiftVB src shift));
21333   effect(TEMP dst, TEMP vtmp);
21334   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21335   ins_encode %{
21336     assert(UseAVX >= 2, "required");
21337 
21338     int opcode = this->ideal_Opcode();
21339     int vlen_enc = Assembler::AVX_128bit;
21340     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21341     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21342   %}
21343   ins_pipe( pipe_slow );
21344 %}
21345 
21346 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21347   predicate(Matcher::vector_length(n) == 16 &&
21348             n->as_ShiftV()->is_var_shift() &&
21349             !VM_Version::supports_avx512bw());
21350   match(Set dst ( LShiftVB src shift));
21351   match(Set dst ( RShiftVB src shift));
21352   match(Set dst (URShiftVB src shift));
21353   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21354   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21355   ins_encode %{
21356     assert(UseAVX >= 2, "required");
21357 
21358     int opcode = this->ideal_Opcode();
21359     int vlen_enc = Assembler::AVX_128bit;
21360     // Shift lower half and get word result in dst
21361     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21362 
21363     // Shift upper half and get word result in vtmp1
21364     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21365     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21366     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21367 
21368     // Merge and down convert the two word results to byte in dst
21369     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21370   %}
21371   ins_pipe( pipe_slow );
21372 %}
21373 
21374 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21375   predicate(Matcher::vector_length(n) == 32 &&
21376             n->as_ShiftV()->is_var_shift() &&
21377             !VM_Version::supports_avx512bw());
21378   match(Set dst ( LShiftVB src shift));
21379   match(Set dst ( RShiftVB src shift));
21380   match(Set dst (URShiftVB src shift));
21381   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21382   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21383   ins_encode %{
21384     assert(UseAVX >= 2, "required");
21385 
21386     int opcode = this->ideal_Opcode();
21387     int vlen_enc = Assembler::AVX_128bit;
21388     // Process lower 128 bits and get result in dst
21389     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21390     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21391     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21392     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21393     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21394 
21395     // Process higher 128 bits and get result in vtmp3
21396     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21397     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21398     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21399     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21400     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21401     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21402     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21403 
21404     // Merge the two results in dst
21405     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21406   %}
21407   ins_pipe( pipe_slow );
21408 %}
21409 
21410 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21411   predicate(Matcher::vector_length(n) <= 32 &&
21412             n->as_ShiftV()->is_var_shift() &&
21413             VM_Version::supports_avx512bw());
21414   match(Set dst ( LShiftVB src shift));
21415   match(Set dst ( RShiftVB src shift));
21416   match(Set dst (URShiftVB src shift));
21417   effect(TEMP dst, TEMP vtmp);
21418   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21419   ins_encode %{
21420     assert(UseAVX > 2, "required");
21421 
21422     int opcode = this->ideal_Opcode();
21423     int vlen_enc = vector_length_encoding(this);
21424     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21425   %}
21426   ins_pipe( pipe_slow );
21427 %}
21428 
21429 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21430   predicate(Matcher::vector_length(n) == 64 &&
21431             n->as_ShiftV()->is_var_shift() &&
21432             VM_Version::supports_avx512bw());
21433   match(Set dst ( LShiftVB src shift));
21434   match(Set dst ( RShiftVB src shift));
21435   match(Set dst (URShiftVB src shift));
21436   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21437   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21438   ins_encode %{
21439     assert(UseAVX > 2, "required");
21440 
21441     int opcode = this->ideal_Opcode();
21442     int vlen_enc = Assembler::AVX_256bit;
21443     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21444     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21445     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21446     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21447     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21448   %}
21449   ins_pipe( pipe_slow );
21450 %}
21451 
21452 // Short variable shift
21453 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21454   predicate(Matcher::vector_length(n) <= 8 &&
21455             n->as_ShiftV()->is_var_shift() &&
21456             !VM_Version::supports_avx512bw());
21457   match(Set dst ( LShiftVS src shift));
21458   match(Set dst ( RShiftVS src shift));
21459   match(Set dst (URShiftVS src shift));
21460   effect(TEMP dst, TEMP vtmp);
21461   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21462   ins_encode %{
21463     assert(UseAVX >= 2, "required");
21464 
21465     int opcode = this->ideal_Opcode();
21466     bool sign = (opcode != Op_URShiftVS);
21467     int vlen_enc = Assembler::AVX_256bit;
21468     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21469     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21470     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21471     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21472     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21473     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21474   %}
21475   ins_pipe( pipe_slow );
21476 %}
21477 
21478 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21479   predicate(Matcher::vector_length(n) == 16 &&
21480             n->as_ShiftV()->is_var_shift() &&
21481             !VM_Version::supports_avx512bw());
21482   match(Set dst ( LShiftVS src shift));
21483   match(Set dst ( RShiftVS src shift));
21484   match(Set dst (URShiftVS src shift));
21485   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21486   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21487   ins_encode %{
21488     assert(UseAVX >= 2, "required");
21489 
21490     int opcode = this->ideal_Opcode();
21491     bool sign = (opcode != Op_URShiftVS);
21492     int vlen_enc = Assembler::AVX_256bit;
21493     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21494     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21495     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21496     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21497     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21498 
21499     // Shift upper half, with result in dst using vtmp1 as TEMP
21500     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21501     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21502     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21503     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21504     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21505     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21506 
21507     // Merge lower and upper half result into dst
21508     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21509     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21510   %}
21511   ins_pipe( pipe_slow );
21512 %}
21513 
21514 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21515   predicate(n->as_ShiftV()->is_var_shift() &&
21516             VM_Version::supports_avx512bw());
21517   match(Set dst ( LShiftVS src shift));
21518   match(Set dst ( RShiftVS src shift));
21519   match(Set dst (URShiftVS src shift));
21520   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21521   ins_encode %{
21522     assert(UseAVX > 2, "required");
21523 
21524     int opcode = this->ideal_Opcode();
21525     int vlen_enc = vector_length_encoding(this);
21526     if (!VM_Version::supports_avx512vl()) {
21527       vlen_enc = Assembler::AVX_512bit;
21528     }
21529     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21530   %}
21531   ins_pipe( pipe_slow );
21532 %}
21533 
21534 //Integer variable shift
21535 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21536   predicate(n->as_ShiftV()->is_var_shift());
21537   match(Set dst ( LShiftVI src shift));
21538   match(Set dst ( RShiftVI src shift));
21539   match(Set dst (URShiftVI src shift));
21540   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21541   ins_encode %{
21542     assert(UseAVX >= 2, "required");
21543 
21544     int opcode = this->ideal_Opcode();
21545     int vlen_enc = vector_length_encoding(this);
21546     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21547   %}
21548   ins_pipe( pipe_slow );
21549 %}
21550 
21551 //Long variable shift
21552 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21553   predicate(n->as_ShiftV()->is_var_shift());
21554   match(Set dst ( LShiftVL src shift));
21555   match(Set dst (URShiftVL src shift));
21556   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21557   ins_encode %{
21558     assert(UseAVX >= 2, "required");
21559 
21560     int opcode = this->ideal_Opcode();
21561     int vlen_enc = vector_length_encoding(this);
21562     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21563   %}
21564   ins_pipe( pipe_slow );
21565 %}
21566 
21567 //Long variable right shift arithmetic
21568 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21569   predicate(Matcher::vector_length(n) <= 4 &&
21570             n->as_ShiftV()->is_var_shift() &&
21571             UseAVX == 2);
21572   match(Set dst (RShiftVL src shift));
21573   effect(TEMP dst, TEMP vtmp);
21574   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21575   ins_encode %{
21576     int opcode = this->ideal_Opcode();
21577     int vlen_enc = vector_length_encoding(this);
21578     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21579                  $vtmp$$XMMRegister);
21580   %}
21581   ins_pipe( pipe_slow );
21582 %}
21583 
21584 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21585   predicate(n->as_ShiftV()->is_var_shift() &&
21586             UseAVX > 2);
21587   match(Set dst (RShiftVL src shift));
21588   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21589   ins_encode %{
21590     int opcode = this->ideal_Opcode();
21591     int vlen_enc = vector_length_encoding(this);
21592     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21593   %}
21594   ins_pipe( pipe_slow );
21595 %}
21596 
21597 // --------------------------------- AND --------------------------------------
21598 
21599 instruct vand(vec dst, vec src) %{
21600   predicate(UseAVX == 0);
21601   match(Set dst (AndV dst src));
21602   format %{ "pand    $dst,$src\t! and vectors" %}
21603   ins_encode %{
21604     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21605   %}
21606   ins_pipe( pipe_slow );
21607 %}
21608 
21609 instruct vand_reg(vec dst, vec src1, vec src2) %{
21610   predicate(UseAVX > 0);
21611   match(Set dst (AndV src1 src2));
21612   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21613   ins_encode %{
21614     int vlen_enc = vector_length_encoding(this);
21615     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21616   %}
21617   ins_pipe( pipe_slow );
21618 %}
21619 
21620 instruct vand_mem(vec dst, vec src, memory mem) %{
21621   predicate((UseAVX > 0) &&
21622             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21623   match(Set dst (AndV src (LoadVector mem)));
21624   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21625   ins_encode %{
21626     int vlen_enc = vector_length_encoding(this);
21627     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21628   %}
21629   ins_pipe( pipe_slow );
21630 %}
21631 
21632 // --------------------------------- OR ---------------------------------------
21633 
21634 instruct vor(vec dst, vec src) %{
21635   predicate(UseAVX == 0);
21636   match(Set dst (OrV dst src));
21637   format %{ "por     $dst,$src\t! or vectors" %}
21638   ins_encode %{
21639     __ por($dst$$XMMRegister, $src$$XMMRegister);
21640   %}
21641   ins_pipe( pipe_slow );
21642 %}
21643 
21644 instruct vor_reg(vec dst, vec src1, vec src2) %{
21645   predicate(UseAVX > 0);
21646   match(Set dst (OrV src1 src2));
21647   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21648   ins_encode %{
21649     int vlen_enc = vector_length_encoding(this);
21650     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21651   %}
21652   ins_pipe( pipe_slow );
21653 %}
21654 
21655 instruct vor_mem(vec dst, vec src, memory mem) %{
21656   predicate((UseAVX > 0) &&
21657             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21658   match(Set dst (OrV src (LoadVector mem)));
21659   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21660   ins_encode %{
21661     int vlen_enc = vector_length_encoding(this);
21662     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21663   %}
21664   ins_pipe( pipe_slow );
21665 %}
21666 
21667 // --------------------------------- XOR --------------------------------------
21668 
21669 instruct vxor(vec dst, vec src) %{
21670   predicate(UseAVX == 0);
21671   match(Set dst (XorV dst src));
21672   format %{ "pxor    $dst,$src\t! xor vectors" %}
21673   ins_encode %{
21674     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21675   %}
21676   ins_pipe( pipe_slow );
21677 %}
21678 
21679 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21680   predicate(UseAVX > 0);
21681   match(Set dst (XorV src1 src2));
21682   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21683   ins_encode %{
21684     int vlen_enc = vector_length_encoding(this);
21685     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21686   %}
21687   ins_pipe( pipe_slow );
21688 %}
21689 
21690 instruct vxor_mem(vec dst, vec src, memory mem) %{
21691   predicate((UseAVX > 0) &&
21692             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21693   match(Set dst (XorV src (LoadVector mem)));
21694   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21695   ins_encode %{
21696     int vlen_enc = vector_length_encoding(this);
21697     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21698   %}
21699   ins_pipe( pipe_slow );
21700 %}
21701 
21702 // --------------------------------- VectorCast --------------------------------------
21703 
21704 instruct vcastBtoX(vec dst, vec src) %{
21705   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21706   match(Set dst (VectorCastB2X src));
21707   format %{ "vector_cast_b2x $dst,$src\t!" %}
21708   ins_encode %{
21709     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21710     int vlen_enc = vector_length_encoding(this);
21711     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21712   %}
21713   ins_pipe( pipe_slow );
21714 %}
21715 
21716 instruct vcastBtoD(legVec dst, legVec src) %{
21717   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21718   match(Set dst (VectorCastB2X src));
21719   format %{ "vector_cast_b2x $dst,$src\t!" %}
21720   ins_encode %{
21721     int vlen_enc = vector_length_encoding(this);
21722     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21723   %}
21724   ins_pipe( pipe_slow );
21725 %}
21726 
21727 instruct castStoX(vec dst, vec src) %{
21728   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21729             Matcher::vector_length(n->in(1)) <= 8 && // src
21730             Matcher::vector_element_basic_type(n) == T_BYTE);
21731   match(Set dst (VectorCastS2X src));
21732   format %{ "vector_cast_s2x $dst,$src" %}
21733   ins_encode %{
21734     assert(UseAVX > 0, "required");
21735 
21736     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21737     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21738   %}
21739   ins_pipe( pipe_slow );
21740 %}
21741 
21742 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21743   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21744             Matcher::vector_length(n->in(1)) == 16 && // src
21745             Matcher::vector_element_basic_type(n) == T_BYTE);
21746   effect(TEMP dst, TEMP vtmp);
21747   match(Set dst (VectorCastS2X src));
21748   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21749   ins_encode %{
21750     assert(UseAVX > 0, "required");
21751 
21752     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21753     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21754     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21755     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21756   %}
21757   ins_pipe( pipe_slow );
21758 %}
21759 
21760 instruct vcastStoX_evex(vec dst, vec src) %{
21761   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21762             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21763   match(Set dst (VectorCastS2X src));
21764   format %{ "vector_cast_s2x $dst,$src\t!" %}
21765   ins_encode %{
21766     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21767     int src_vlen_enc = vector_length_encoding(this, $src);
21768     int vlen_enc = vector_length_encoding(this);
21769     switch (to_elem_bt) {
21770       case T_BYTE:
21771         if (!VM_Version::supports_avx512vl()) {
21772           vlen_enc = Assembler::AVX_512bit;
21773         }
21774         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21775         break;
21776       case T_INT:
21777         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21778         break;
21779       case T_FLOAT:
21780         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21781         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21782         break;
21783       case T_LONG:
21784         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21785         break;
21786       case T_DOUBLE: {
21787         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21788         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21789         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21790         break;
21791       }
21792       default:
21793         ShouldNotReachHere();
21794     }
21795   %}
21796   ins_pipe( pipe_slow );
21797 %}
21798 
21799 instruct castItoX(vec dst, vec src) %{
21800   predicate(UseAVX <= 2 &&
21801             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21802             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21803   match(Set dst (VectorCastI2X src));
21804   format %{ "vector_cast_i2x $dst,$src" %}
21805   ins_encode %{
21806     assert(UseAVX > 0, "required");
21807 
21808     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21809     int vlen_enc = vector_length_encoding(this, $src);
21810 
21811     if (to_elem_bt == T_BYTE) {
21812       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21813       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21814       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21815     } else {
21816       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21817       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21818       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21819     }
21820   %}
21821   ins_pipe( pipe_slow );
21822 %}
21823 
21824 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21825   predicate(UseAVX <= 2 &&
21826             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21827             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21828   match(Set dst (VectorCastI2X src));
21829   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21830   effect(TEMP dst, TEMP vtmp);
21831   ins_encode %{
21832     assert(UseAVX > 0, "required");
21833 
21834     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21835     int vlen_enc = vector_length_encoding(this, $src);
21836 
21837     if (to_elem_bt == T_BYTE) {
21838       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21839       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21840       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21841       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21842     } else {
21843       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21844       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21845       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21846       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21847     }
21848   %}
21849   ins_pipe( pipe_slow );
21850 %}
21851 
21852 instruct vcastItoX_evex(vec dst, vec src) %{
21853   predicate(UseAVX > 2 ||
21854             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21855   match(Set dst (VectorCastI2X src));
21856   format %{ "vector_cast_i2x $dst,$src\t!" %}
21857   ins_encode %{
21858     assert(UseAVX > 0, "required");
21859 
21860     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21861     int src_vlen_enc = vector_length_encoding(this, $src);
21862     int dst_vlen_enc = vector_length_encoding(this);
21863     switch (dst_elem_bt) {
21864       case T_BYTE:
21865         if (!VM_Version::supports_avx512vl()) {
21866           src_vlen_enc = Assembler::AVX_512bit;
21867         }
21868         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21869         break;
21870       case T_SHORT:
21871         if (!VM_Version::supports_avx512vl()) {
21872           src_vlen_enc = Assembler::AVX_512bit;
21873         }
21874         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21875         break;
21876       case T_FLOAT:
21877         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21878         break;
21879       case T_LONG:
21880         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21881         break;
21882       case T_DOUBLE:
21883         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21884         break;
21885       default:
21886         ShouldNotReachHere();
21887     }
21888   %}
21889   ins_pipe( pipe_slow );
21890 %}
21891 
21892 instruct vcastLtoBS(vec dst, vec src) %{
21893   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21894             UseAVX <= 2);
21895   match(Set dst (VectorCastL2X src));
21896   format %{ "vector_cast_l2x  $dst,$src" %}
21897   ins_encode %{
21898     assert(UseAVX > 0, "required");
21899 
21900     int vlen = Matcher::vector_length_in_bytes(this, $src);
21901     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21902     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21903                                                       : ExternalAddress(vector_int_to_short_mask());
21904     if (vlen <= 16) {
21905       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21906       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21907       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21908     } else {
21909       assert(vlen <= 32, "required");
21910       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21911       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21912       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21913       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21914     }
21915     if (to_elem_bt == T_BYTE) {
21916       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21917     }
21918   %}
21919   ins_pipe( pipe_slow );
21920 %}
21921 
21922 instruct vcastLtoX_evex(vec dst, vec src) %{
21923   predicate(UseAVX > 2 ||
21924             (Matcher::vector_element_basic_type(n) == T_INT ||
21925              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21926              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21927   match(Set dst (VectorCastL2X src));
21928   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21929   ins_encode %{
21930     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21931     int vlen = Matcher::vector_length_in_bytes(this, $src);
21932     int vlen_enc = vector_length_encoding(this, $src);
21933     switch (to_elem_bt) {
21934       case T_BYTE:
21935         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21936           vlen_enc = Assembler::AVX_512bit;
21937         }
21938         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21939         break;
21940       case T_SHORT:
21941         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21942           vlen_enc = Assembler::AVX_512bit;
21943         }
21944         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21945         break;
21946       case T_INT:
21947         if (vlen == 8) {
21948           if ($dst$$XMMRegister != $src$$XMMRegister) {
21949             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21950           }
21951         } else if (vlen == 16) {
21952           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21953         } else if (vlen == 32) {
21954           if (UseAVX > 2) {
21955             if (!VM_Version::supports_avx512vl()) {
21956               vlen_enc = Assembler::AVX_512bit;
21957             }
21958             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21959           } else {
21960             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21961             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21962           }
21963         } else { // vlen == 64
21964           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21965         }
21966         break;
21967       case T_FLOAT:
21968         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21969         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21970         break;
21971       case T_DOUBLE:
21972         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21973         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21974         break;
21975 
21976       default: assert(false, "%s", type2name(to_elem_bt));
21977     }
21978   %}
21979   ins_pipe( pipe_slow );
21980 %}
21981 
21982 instruct vcastFtoD_reg(vec dst, vec src) %{
21983   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21984   match(Set dst (VectorCastF2X src));
21985   format %{ "vector_cast_f2d  $dst,$src\t!" %}
21986   ins_encode %{
21987     int vlen_enc = vector_length_encoding(this);
21988     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21989   %}
21990   ins_pipe( pipe_slow );
21991 %}
21992 
21993 
21994 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21995   predicate(!VM_Version::supports_avx10_2() &&
21996             !VM_Version::supports_avx512vl() &&
21997             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21998             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21999             is_integral_type(Matcher::vector_element_basic_type(n)));
22000   match(Set dst (VectorCastF2X src));
22001   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22002   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22003   ins_encode %{
22004     int vlen_enc = vector_length_encoding(this, $src);
22005     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22006     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22007     // 32 bit addresses for register indirect addressing mode since stub constants
22008     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22009     // However, targets are free to increase this limit, but having a large code cache size
22010     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22011     // cap we save a temporary register allocation which in limiting case can prevent
22012     // spilling in high register pressure blocks.
22013     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22014                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22015                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22016   %}
22017   ins_pipe( pipe_slow );
22018 %}
22019 
22020 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22021   predicate(!VM_Version::supports_avx10_2() &&
22022             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22023             is_integral_type(Matcher::vector_element_basic_type(n)));
22024   match(Set dst (VectorCastF2X src));
22025   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22026   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22027   ins_encode %{
22028     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22029     if (to_elem_bt == T_LONG) {
22030       int vlen_enc = vector_length_encoding(this);
22031       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22032                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22033                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22034     } else {
22035       int vlen_enc = vector_length_encoding(this, $src);
22036       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22037                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22038                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22039     }
22040   %}
22041   ins_pipe( pipe_slow );
22042 %}
22043 
22044 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22045   predicate(VM_Version::supports_avx10_2() &&
22046             is_integral_type(Matcher::vector_element_basic_type(n)));
22047   match(Set dst (VectorCastF2X src));
22048   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22049   ins_encode %{
22050     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22051     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22052     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22053   %}
22054   ins_pipe( pipe_slow );
22055 %}
22056 
22057 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22058   predicate(VM_Version::supports_avx10_2() &&
22059             is_integral_type(Matcher::vector_element_basic_type(n)));
22060   match(Set dst (VectorCastF2X (LoadVector src)));
22061   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22062   ins_encode %{
22063     int vlen = Matcher::vector_length(this);
22064     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22065     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22066     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22067   %}
22068   ins_pipe( pipe_slow );
22069 %}
22070 
22071 instruct vcastDtoF_reg(vec dst, vec src) %{
22072   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22073   match(Set dst (VectorCastD2X src));
22074   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22075   ins_encode %{
22076     int vlen_enc = vector_length_encoding(this, $src);
22077     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22078   %}
22079   ins_pipe( pipe_slow );
22080 %}
22081 
22082 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22083   predicate(!VM_Version::supports_avx10_2() &&
22084             !VM_Version::supports_avx512vl() &&
22085             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22086             is_integral_type(Matcher::vector_element_basic_type(n)));
22087   match(Set dst (VectorCastD2X src));
22088   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22089   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22090   ins_encode %{
22091     int vlen_enc = vector_length_encoding(this, $src);
22092     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22093     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22094                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22095                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22096   %}
22097   ins_pipe( pipe_slow );
22098 %}
22099 
22100 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22101   predicate(!VM_Version::supports_avx10_2() &&
22102             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22103             is_integral_type(Matcher::vector_element_basic_type(n)));
22104   match(Set dst (VectorCastD2X src));
22105   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22106   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22107   ins_encode %{
22108     int vlen_enc = vector_length_encoding(this, $src);
22109     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22110     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22111                               ExternalAddress(vector_float_signflip());
22112     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22113                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22114   %}
22115   ins_pipe( pipe_slow );
22116 %}
22117 
22118 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22119   predicate(VM_Version::supports_avx10_2() &&
22120             is_integral_type(Matcher::vector_element_basic_type(n)));
22121   match(Set dst (VectorCastD2X src));
22122   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22123   ins_encode %{
22124     int vlen_enc = vector_length_encoding(this, $src);
22125     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22126     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22127   %}
22128   ins_pipe( pipe_slow );
22129 %}
22130 
22131 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22132   predicate(VM_Version::supports_avx10_2() &&
22133             is_integral_type(Matcher::vector_element_basic_type(n)));
22134   match(Set dst (VectorCastD2X (LoadVector src)));
22135   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22136   ins_encode %{
22137     int vlen = Matcher::vector_length(this);
22138     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22139     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22140     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22141   %}
22142   ins_pipe( pipe_slow );
22143 %}
22144 
22145 instruct vucast(vec dst, vec src) %{
22146   match(Set dst (VectorUCastB2X src));
22147   match(Set dst (VectorUCastS2X src));
22148   match(Set dst (VectorUCastI2X src));
22149   format %{ "vector_ucast $dst,$src\t!" %}
22150   ins_encode %{
22151     assert(UseAVX > 0, "required");
22152 
22153     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22154     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22155     int vlen_enc = vector_length_encoding(this);
22156     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22157   %}
22158   ins_pipe( pipe_slow );
22159 %}
22160 
22161 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22162   predicate(!VM_Version::supports_avx512vl() &&
22163             Matcher::vector_length_in_bytes(n) < 64 &&
22164             Matcher::vector_element_basic_type(n) == T_INT);
22165   match(Set dst (RoundVF src));
22166   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22167   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22168   ins_encode %{
22169     int vlen_enc = vector_length_encoding(this);
22170     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22171     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22172                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22173                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22174   %}
22175   ins_pipe( pipe_slow );
22176 %}
22177 
22178 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22179   predicate((VM_Version::supports_avx512vl() ||
22180              Matcher::vector_length_in_bytes(n) == 64) &&
22181              Matcher::vector_element_basic_type(n) == T_INT);
22182   match(Set dst (RoundVF src));
22183   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22184   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22185   ins_encode %{
22186     int vlen_enc = vector_length_encoding(this);
22187     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22188     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22189                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22190                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22191   %}
22192   ins_pipe( pipe_slow );
22193 %}
22194 
22195 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22196   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22197   match(Set dst (RoundVD src));
22198   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22199   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22200   ins_encode %{
22201     int vlen_enc = vector_length_encoding(this);
22202     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22203     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22204                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22205                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22206   %}
22207   ins_pipe( pipe_slow );
22208 %}
22209 
22210 // --------------------------------- VectorMaskCmp --------------------------------------
22211 
22212 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22213   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22214             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22215             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22216             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22217   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22218   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22219   ins_encode %{
22220     int vlen_enc = vector_length_encoding(this, $src1);
22221     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22222     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22223       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22224     } else {
22225       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22226     }
22227   %}
22228   ins_pipe( pipe_slow );
22229 %}
22230 
22231 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22232   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22233             n->bottom_type()->isa_vectmask() == nullptr &&
22234             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22235   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22236   effect(TEMP ktmp);
22237   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22238   ins_encode %{
22239     int vlen_enc = Assembler::AVX_512bit;
22240     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22241     KRegister mask = k0; // The comparison itself is not being masked.
22242     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22243       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22244       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22245     } else {
22246       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22247       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22248     }
22249   %}
22250   ins_pipe( pipe_slow );
22251 %}
22252 
22253 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22254   predicate(n->bottom_type()->isa_vectmask() &&
22255             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22256   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22257   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22258   ins_encode %{
22259     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22260     int vlen_enc = vector_length_encoding(this, $src1);
22261     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22262     KRegister mask = k0; // The comparison itself is not being masked.
22263     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22264       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22265     } else {
22266       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22267     }
22268   %}
22269   ins_pipe( pipe_slow );
22270 %}
22271 
22272 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22273   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22274             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22275             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22276             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22277             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22278             (n->in(2)->get_int() == BoolTest::eq ||
22279              n->in(2)->get_int() == BoolTest::lt ||
22280              n->in(2)->get_int() == BoolTest::gt)); // cond
22281   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22282   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22283   ins_encode %{
22284     int vlen_enc = vector_length_encoding(this, $src1);
22285     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22286     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22287     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22288   %}
22289   ins_pipe( pipe_slow );
22290 %}
22291 
22292 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22293   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22294             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22295             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22296             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22297             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22298             (n->in(2)->get_int() == BoolTest::ne ||
22299              n->in(2)->get_int() == BoolTest::le ||
22300              n->in(2)->get_int() == BoolTest::ge)); // cond
22301   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22302   effect(TEMP dst, TEMP xtmp);
22303   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22304   ins_encode %{
22305     int vlen_enc = vector_length_encoding(this, $src1);
22306     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22307     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22308     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22309   %}
22310   ins_pipe( pipe_slow );
22311 %}
22312 
22313 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22314   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22315             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22316             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22317             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22318             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22319   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22320   effect(TEMP dst, TEMP xtmp);
22321   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22322   ins_encode %{
22323     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22324     int vlen_enc = vector_length_encoding(this, $src1);
22325     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22326     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22327 
22328     if (vlen_enc == Assembler::AVX_128bit) {
22329       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22330     } else {
22331       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22332     }
22333     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22334     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22335     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22336   %}
22337   ins_pipe( pipe_slow );
22338 %}
22339 
22340 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22341   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22342              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22343              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22344   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22345   effect(TEMP ktmp);
22346   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22347   ins_encode %{
22348     assert(UseAVX > 2, "required");
22349 
22350     int vlen_enc = vector_length_encoding(this, $src1);
22351     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22352     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22353     KRegister mask = k0; // The comparison itself is not being masked.
22354     bool merge = false;
22355     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22356 
22357     switch (src1_elem_bt) {
22358       case T_INT: {
22359         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22360         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22361         break;
22362       }
22363       case T_LONG: {
22364         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22365         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22366         break;
22367       }
22368       default: assert(false, "%s", type2name(src1_elem_bt));
22369     }
22370   %}
22371   ins_pipe( pipe_slow );
22372 %}
22373 
22374 
22375 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22376   predicate(n->bottom_type()->isa_vectmask() &&
22377             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22378   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22379   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22380   ins_encode %{
22381     assert(UseAVX > 2, "required");
22382     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22383 
22384     int vlen_enc = vector_length_encoding(this, $src1);
22385     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22386     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22387     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22388 
22389     // Comparison i
22390     switch (src1_elem_bt) {
22391       case T_BYTE: {
22392         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22393         break;
22394       }
22395       case T_SHORT: {
22396         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22397         break;
22398       }
22399       case T_INT: {
22400         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22401         break;
22402       }
22403       case T_LONG: {
22404         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22405         break;
22406       }
22407       default: assert(false, "%s", type2name(src1_elem_bt));
22408     }
22409   %}
22410   ins_pipe( pipe_slow );
22411 %}
22412 
22413 // Extract
22414 
22415 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22416   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22417   match(Set dst (ExtractI src idx));
22418   match(Set dst (ExtractS src idx));
22419   match(Set dst (ExtractB src idx));
22420   format %{ "extractI $dst,$src,$idx\t!" %}
22421   ins_encode %{
22422     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22423 
22424     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22425     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22426   %}
22427   ins_pipe( pipe_slow );
22428 %}
22429 
22430 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22431   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22432             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22433   match(Set dst (ExtractI src idx));
22434   match(Set dst (ExtractS src idx));
22435   match(Set dst (ExtractB src idx));
22436   effect(TEMP vtmp);
22437   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22438   ins_encode %{
22439     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22440 
22441     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22442     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22443     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22444   %}
22445   ins_pipe( pipe_slow );
22446 %}
22447 
22448 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22449   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22450   match(Set dst (ExtractL src idx));
22451   format %{ "extractL $dst,$src,$idx\t!" %}
22452   ins_encode %{
22453     assert(UseSSE >= 4, "required");
22454     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22455 
22456     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22457   %}
22458   ins_pipe( pipe_slow );
22459 %}
22460 
22461 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22462   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22463             Matcher::vector_length(n->in(1)) == 8);  // src
22464   match(Set dst (ExtractL src idx));
22465   effect(TEMP vtmp);
22466   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22467   ins_encode %{
22468     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22469 
22470     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22471     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22472   %}
22473   ins_pipe( pipe_slow );
22474 %}
22475 
22476 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22477   predicate(Matcher::vector_length(n->in(1)) <= 4);
22478   match(Set dst (ExtractF src idx));
22479   effect(TEMP dst, TEMP vtmp);
22480   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22481   ins_encode %{
22482     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22483 
22484     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22485   %}
22486   ins_pipe( pipe_slow );
22487 %}
22488 
22489 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22490   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22491             Matcher::vector_length(n->in(1)/*src*/) == 16);
22492   match(Set dst (ExtractF src idx));
22493   effect(TEMP vtmp);
22494   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22495   ins_encode %{
22496     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22497 
22498     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22499     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22500   %}
22501   ins_pipe( pipe_slow );
22502 %}
22503 
22504 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22505   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22506   match(Set dst (ExtractD src idx));
22507   format %{ "extractD $dst,$src,$idx\t!" %}
22508   ins_encode %{
22509     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22510 
22511     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22512   %}
22513   ins_pipe( pipe_slow );
22514 %}
22515 
22516 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22517   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22518             Matcher::vector_length(n->in(1)) == 8);  // src
22519   match(Set dst (ExtractD src idx));
22520   effect(TEMP vtmp);
22521   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22522   ins_encode %{
22523     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22524 
22525     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22526     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22527   %}
22528   ins_pipe( pipe_slow );
22529 %}
22530 
22531 // --------------------------------- Vector Blend --------------------------------------
22532 
22533 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22534   predicate(UseAVX == 0);
22535   match(Set dst (VectorBlend (Binary dst src) mask));
22536   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22537   effect(TEMP tmp);
22538   ins_encode %{
22539     assert(UseSSE >= 4, "required");
22540 
22541     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22542       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22543     }
22544     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22545   %}
22546   ins_pipe( pipe_slow );
22547 %}
22548 
22549 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22550   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22551             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22552             Matcher::vector_length_in_bytes(n) <= 32 &&
22553             is_integral_type(Matcher::vector_element_basic_type(n)));
22554   match(Set dst (VectorBlend (Binary src1 src2) mask));
22555   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22556   ins_encode %{
22557     int vlen_enc = vector_length_encoding(this);
22558     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22559   %}
22560   ins_pipe( pipe_slow );
22561 %}
22562 
22563 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22564   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22565             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22566             Matcher::vector_length_in_bytes(n) <= 32 &&
22567             !is_integral_type(Matcher::vector_element_basic_type(n)));
22568   match(Set dst (VectorBlend (Binary src1 src2) mask));
22569   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22570   ins_encode %{
22571     int vlen_enc = vector_length_encoding(this);
22572     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22573   %}
22574   ins_pipe( pipe_slow );
22575 %}
22576 
22577 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22578   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22579             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22580             Matcher::vector_length_in_bytes(n) <= 32);
22581   match(Set dst (VectorBlend (Binary src1 src2) mask));
22582   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22583   effect(TEMP vtmp, TEMP dst);
22584   ins_encode %{
22585     int vlen_enc = vector_length_encoding(this);
22586     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22587     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22588     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22589   %}
22590   ins_pipe( pipe_slow );
22591 %}
22592 
22593 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22594   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22595             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22596   match(Set dst (VectorBlend (Binary src1 src2) mask));
22597   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22598   effect(TEMP ktmp);
22599   ins_encode %{
22600      int vlen_enc = Assembler::AVX_512bit;
22601      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22602     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22603     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22604   %}
22605   ins_pipe( pipe_slow );
22606 %}
22607 
22608 
22609 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22610   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22611             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22612              VM_Version::supports_avx512bw()));
22613   match(Set dst (VectorBlend (Binary src1 src2) mask));
22614   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22615   ins_encode %{
22616     int vlen_enc = vector_length_encoding(this);
22617     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22618     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22619   %}
22620   ins_pipe( pipe_slow );
22621 %}
22622 
22623 // --------------------------------- ABS --------------------------------------
22624 // a = |a|
22625 instruct vabsB_reg(vec dst, vec src) %{
22626   match(Set dst (AbsVB  src));
22627   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22628   ins_encode %{
22629     uint vlen = Matcher::vector_length(this);
22630     if (vlen <= 16) {
22631       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22632     } else {
22633       int vlen_enc = vector_length_encoding(this);
22634       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22635     }
22636   %}
22637   ins_pipe( pipe_slow );
22638 %}
22639 
22640 instruct vabsS_reg(vec dst, vec src) %{
22641   match(Set dst (AbsVS  src));
22642   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22643   ins_encode %{
22644     uint vlen = Matcher::vector_length(this);
22645     if (vlen <= 8) {
22646       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22647     } else {
22648       int vlen_enc = vector_length_encoding(this);
22649       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22650     }
22651   %}
22652   ins_pipe( pipe_slow );
22653 %}
22654 
22655 instruct vabsI_reg(vec dst, vec src) %{
22656   match(Set dst (AbsVI  src));
22657   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22658   ins_encode %{
22659     uint vlen = Matcher::vector_length(this);
22660     if (vlen <= 4) {
22661       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22662     } else {
22663       int vlen_enc = vector_length_encoding(this);
22664       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22665     }
22666   %}
22667   ins_pipe( pipe_slow );
22668 %}
22669 
22670 instruct vabsL_reg(vec dst, vec src) %{
22671   match(Set dst (AbsVL  src));
22672   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22673   ins_encode %{
22674     assert(UseAVX > 2, "required");
22675     int vlen_enc = vector_length_encoding(this);
22676     if (!VM_Version::supports_avx512vl()) {
22677       vlen_enc = Assembler::AVX_512bit;
22678     }
22679     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22680   %}
22681   ins_pipe( pipe_slow );
22682 %}
22683 
22684 // --------------------------------- ABSNEG --------------------------------------
22685 
22686 instruct vabsnegF(vec dst, vec src) %{
22687   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22688   match(Set dst (AbsVF src));
22689   match(Set dst (NegVF src));
22690   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22691   ins_cost(150);
22692   ins_encode %{
22693     int opcode = this->ideal_Opcode();
22694     int vlen = Matcher::vector_length(this);
22695     if (vlen == 2) {
22696       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22697     } else {
22698       assert(vlen == 8 || vlen == 16, "required");
22699       int vlen_enc = vector_length_encoding(this);
22700       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22701     }
22702   %}
22703   ins_pipe( pipe_slow );
22704 %}
22705 
22706 instruct vabsneg4F(vec dst) %{
22707   predicate(Matcher::vector_length(n) == 4);
22708   match(Set dst (AbsVF dst));
22709   match(Set dst (NegVF dst));
22710   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22711   ins_cost(150);
22712   ins_encode %{
22713     int opcode = this->ideal_Opcode();
22714     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22715   %}
22716   ins_pipe( pipe_slow );
22717 %}
22718 
22719 instruct vabsnegD(vec dst, vec src) %{
22720   match(Set dst (AbsVD  src));
22721   match(Set dst (NegVD  src));
22722   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22723   ins_encode %{
22724     int opcode = this->ideal_Opcode();
22725     uint vlen = Matcher::vector_length(this);
22726     if (vlen == 2) {
22727       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22728     } else {
22729       int vlen_enc = vector_length_encoding(this);
22730       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22731     }
22732   %}
22733   ins_pipe( pipe_slow );
22734 %}
22735 
22736 //------------------------------------- VectorTest --------------------------------------------
22737 
22738 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22739   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22740   match(Set cr (VectorTest src1 src2));
22741   effect(TEMP vtmp);
22742   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22743   ins_encode %{
22744     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22745     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22746     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22747   %}
22748   ins_pipe( pipe_slow );
22749 %}
22750 
22751 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22752   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22753   match(Set cr (VectorTest src1 src2));
22754   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22755   ins_encode %{
22756     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22757     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22758     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22759   %}
22760   ins_pipe( pipe_slow );
22761 %}
22762 
22763 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22764   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22765              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22766             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22767   match(Set cr (VectorTest src1 src2));
22768   effect(TEMP tmp);
22769   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22770   ins_encode %{
22771     uint masklen = Matcher::vector_length(this, $src1);
22772     __ kmovwl($tmp$$Register, $src1$$KRegister);
22773     __ andl($tmp$$Register, (1 << masklen) - 1);
22774     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22775   %}
22776   ins_pipe( pipe_slow );
22777 %}
22778 
22779 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22780   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22781              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22782             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22783   match(Set cr (VectorTest src1 src2));
22784   effect(TEMP tmp);
22785   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22786   ins_encode %{
22787     uint masklen = Matcher::vector_length(this, $src1);
22788     __ kmovwl($tmp$$Register, $src1$$KRegister);
22789     __ andl($tmp$$Register, (1 << masklen) - 1);
22790   %}
22791   ins_pipe( pipe_slow );
22792 %}
22793 
22794 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22795   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22796             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22797   match(Set cr (VectorTest src1 src2));
22798   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22799   ins_encode %{
22800     uint masklen = Matcher::vector_length(this, $src1);
22801     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22802   %}
22803   ins_pipe( pipe_slow );
22804 %}
22805 
22806 //------------------------------------- LoadMask --------------------------------------------
22807 
22808 instruct loadMask(legVec dst, legVec src) %{
22809   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22810   match(Set dst (VectorLoadMask src));
22811   effect(TEMP dst);
22812   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22813   ins_encode %{
22814     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22815     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22816     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22817   %}
22818   ins_pipe( pipe_slow );
22819 %}
22820 
22821 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22822   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22823   match(Set dst (VectorLoadMask src));
22824   effect(TEMP xtmp);
22825   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22826   ins_encode %{
22827     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22828                         true, Assembler::AVX_512bit);
22829   %}
22830   ins_pipe( pipe_slow );
22831 %}
22832 
22833 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22834   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22835   match(Set dst (VectorLoadMask src));
22836   effect(TEMP xtmp);
22837   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22838   ins_encode %{
22839     int vlen_enc = vector_length_encoding(in(1));
22840     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22841                         false, vlen_enc);
22842   %}
22843   ins_pipe( pipe_slow );
22844 %}
22845 
22846 //------------------------------------- StoreMask --------------------------------------------
22847 
22848 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22849   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22850   match(Set dst (VectorStoreMask src size));
22851   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22852   ins_encode %{
22853     int vlen = Matcher::vector_length(this);
22854     if (vlen <= 16 && UseAVX <= 2) {
22855       assert(UseSSE >= 3, "required");
22856       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22857     } else {
22858       assert(UseAVX > 0, "required");
22859       int src_vlen_enc = vector_length_encoding(this, $src);
22860       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22861     }
22862   %}
22863   ins_pipe( pipe_slow );
22864 %}
22865 
22866 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22867   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22868   match(Set dst (VectorStoreMask src size));
22869   effect(TEMP_DEF dst, TEMP xtmp);
22870   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22871   ins_encode %{
22872     int vlen_enc = Assembler::AVX_128bit;
22873     int vlen = Matcher::vector_length(this);
22874     if (vlen <= 8) {
22875       assert(UseSSE >= 3, "required");
22876       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22877       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22878       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22879     } else {
22880       assert(UseAVX > 0, "required");
22881       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22882       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22883       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22884     }
22885   %}
22886   ins_pipe( pipe_slow );
22887 %}
22888 
22889 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22890   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22891   match(Set dst (VectorStoreMask src size));
22892   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22893   effect(TEMP_DEF dst, TEMP xtmp);
22894   ins_encode %{
22895     int vlen_enc = Assembler::AVX_128bit;
22896     int vlen = Matcher::vector_length(this);
22897     if (vlen <= 4) {
22898       assert(UseSSE >= 3, "required");
22899       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22900       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22901       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22902       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22903     } else {
22904       assert(UseAVX > 0, "required");
22905       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22906       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22907       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22908       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22909       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22910     }
22911   %}
22912   ins_pipe( pipe_slow );
22913 %}
22914 
22915 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22916   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22917   match(Set dst (VectorStoreMask src size));
22918   effect(TEMP_DEF dst, TEMP xtmp);
22919   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22920   ins_encode %{
22921     assert(UseSSE >= 3, "required");
22922     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22923     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22924     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22925     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22926     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22927   %}
22928   ins_pipe( pipe_slow );
22929 %}
22930 
22931 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22932   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22933   match(Set dst (VectorStoreMask src size));
22934   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22935   effect(TEMP_DEF dst, TEMP vtmp);
22936   ins_encode %{
22937     int vlen_enc = Assembler::AVX_128bit;
22938     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22939     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22940     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22941     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22942     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22943     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22944     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22945   %}
22946   ins_pipe( pipe_slow );
22947 %}
22948 
22949 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22950   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22951   match(Set dst (VectorStoreMask src size));
22952   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22953   ins_encode %{
22954     int src_vlen_enc = vector_length_encoding(this, $src);
22955     int dst_vlen_enc = vector_length_encoding(this);
22956     if (!VM_Version::supports_avx512vl()) {
22957       src_vlen_enc = Assembler::AVX_512bit;
22958     }
22959     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22960     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22961   %}
22962   ins_pipe( pipe_slow );
22963 %}
22964 
22965 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22966   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22967   match(Set dst (VectorStoreMask src size));
22968   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22969   ins_encode %{
22970     int src_vlen_enc = vector_length_encoding(this, $src);
22971     int dst_vlen_enc = vector_length_encoding(this);
22972     if (!VM_Version::supports_avx512vl()) {
22973       src_vlen_enc = Assembler::AVX_512bit;
22974     }
22975     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22976     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22977   %}
22978   ins_pipe( pipe_slow );
22979 %}
22980 
22981 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22982   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22983   match(Set dst (VectorStoreMask mask size));
22984   effect(TEMP_DEF dst);
22985   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22986   ins_encode %{
22987     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22988     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22989                  false, Assembler::AVX_512bit, noreg);
22990     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22991   %}
22992   ins_pipe( pipe_slow );
22993 %}
22994 
22995 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22996   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22997   match(Set dst (VectorStoreMask mask size));
22998   effect(TEMP_DEF dst);
22999   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23000   ins_encode %{
23001     int dst_vlen_enc = vector_length_encoding(this);
23002     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23003     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23004   %}
23005   ins_pipe( pipe_slow );
23006 %}
23007 
23008 instruct vmaskcast_evex(kReg dst) %{
23009   match(Set dst (VectorMaskCast dst));
23010   ins_cost(0);
23011   format %{ "vector_mask_cast $dst" %}
23012   ins_encode %{
23013     // empty
23014   %}
23015   ins_pipe(empty);
23016 %}
23017 
23018 instruct vmaskcast(vec dst) %{
23019   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23020   match(Set dst (VectorMaskCast dst));
23021   ins_cost(0);
23022   format %{ "vector_mask_cast $dst" %}
23023   ins_encode %{
23024     // empty
23025   %}
23026   ins_pipe(empty);
23027 %}
23028 
23029 instruct vmaskcast_avx(vec dst, vec src) %{
23030   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23031   match(Set dst (VectorMaskCast src));
23032   format %{ "vector_mask_cast $dst, $src" %}
23033   ins_encode %{
23034     int vlen = Matcher::vector_length(this);
23035     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23036     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23037     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23038   %}
23039   ins_pipe(pipe_slow);
23040 %}
23041 
23042 //-------------------------------- Load Iota Indices ----------------------------------
23043 
23044 instruct loadIotaIndices(vec dst, immI_0 src) %{
23045   match(Set dst (VectorLoadConst src));
23046   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23047   ins_encode %{
23048      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23049      BasicType bt = Matcher::vector_element_basic_type(this);
23050      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23051   %}
23052   ins_pipe( pipe_slow );
23053 %}
23054 
23055 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23056   match(Set dst (PopulateIndex src1 src2));
23057   effect(TEMP dst, TEMP vtmp);
23058   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23059   ins_encode %{
23060      assert($src2$$constant == 1, "required");
23061      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23062      int vlen_enc = vector_length_encoding(this);
23063      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23064      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23065      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23066      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23067   %}
23068   ins_pipe( pipe_slow );
23069 %}
23070 
23071 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23072   match(Set dst (PopulateIndex src1 src2));
23073   effect(TEMP dst, TEMP vtmp);
23074   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23075   ins_encode %{
23076      assert($src2$$constant == 1, "required");
23077      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23078      int vlen_enc = vector_length_encoding(this);
23079      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23080      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23081      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23082      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23083   %}
23084   ins_pipe( pipe_slow );
23085 %}
23086 
23087 //-------------------------------- Rearrange ----------------------------------
23088 
23089 // LoadShuffle/Rearrange for Byte
23090 instruct rearrangeB(vec dst, vec shuffle) %{
23091   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23092             Matcher::vector_length(n) < 32);
23093   match(Set dst (VectorRearrange dst shuffle));
23094   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23095   ins_encode %{
23096     assert(UseSSE >= 4, "required");
23097     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23098   %}
23099   ins_pipe( pipe_slow );
23100 %}
23101 
23102 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23103   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23104             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23105   match(Set dst (VectorRearrange src shuffle));
23106   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23107   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23108   ins_encode %{
23109     assert(UseAVX >= 2, "required");
23110     // Swap src into vtmp1
23111     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23112     // Shuffle swapped src to get entries from other 128 bit lane
23113     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23114     // Shuffle original src to get entries from self 128 bit lane
23115     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23116     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23117     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23118     // Perform the blend
23119     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23120   %}
23121   ins_pipe( pipe_slow );
23122 %}
23123 
23124 
23125 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23126   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23127             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23128   match(Set dst (VectorRearrange src shuffle));
23129   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23130   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23131   ins_encode %{
23132     int vlen_enc = vector_length_encoding(this);
23133     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23134                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23135                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23136   %}
23137   ins_pipe( pipe_slow );
23138 %}
23139 
23140 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23141   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23142             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23143   match(Set dst (VectorRearrange src shuffle));
23144   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23145   ins_encode %{
23146     int vlen_enc = vector_length_encoding(this);
23147     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23148   %}
23149   ins_pipe( pipe_slow );
23150 %}
23151 
23152 // LoadShuffle/Rearrange for Short
23153 
23154 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23155   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23156             !VM_Version::supports_avx512bw());
23157   match(Set dst (VectorLoadShuffle src));
23158   effect(TEMP dst, TEMP vtmp);
23159   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23160   ins_encode %{
23161     // Create a byte shuffle mask from short shuffle mask
23162     // only byte shuffle instruction available on these platforms
23163     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23164     if (UseAVX == 0) {
23165       assert(vlen_in_bytes <= 16, "required");
23166       // Multiply each shuffle by two to get byte index
23167       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23168       __ psllw($vtmp$$XMMRegister, 1);
23169 
23170       // Duplicate to create 2 copies of byte index
23171       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23172       __ psllw($dst$$XMMRegister, 8);
23173       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23174 
23175       // Add one to get alternate byte index
23176       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23177       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23178     } else {
23179       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23180       int vlen_enc = vector_length_encoding(this);
23181       // Multiply each shuffle by two to get byte index
23182       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23183 
23184       // Duplicate to create 2 copies of byte index
23185       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23186       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23187 
23188       // Add one to get alternate byte index
23189       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23190     }
23191   %}
23192   ins_pipe( pipe_slow );
23193 %}
23194 
23195 instruct rearrangeS(vec dst, vec shuffle) %{
23196   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23197             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23198   match(Set dst (VectorRearrange dst shuffle));
23199   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23200   ins_encode %{
23201     assert(UseSSE >= 4, "required");
23202     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23203   %}
23204   ins_pipe( pipe_slow );
23205 %}
23206 
23207 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23208   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23209             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23210   match(Set dst (VectorRearrange src shuffle));
23211   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23212   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23213   ins_encode %{
23214     assert(UseAVX >= 2, "required");
23215     // Swap src into vtmp1
23216     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23217     // Shuffle swapped src to get entries from other 128 bit lane
23218     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23219     // Shuffle original src to get entries from self 128 bit lane
23220     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23221     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23222     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23223     // Perform the blend
23224     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23225   %}
23226   ins_pipe( pipe_slow );
23227 %}
23228 
23229 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23230   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23231             VM_Version::supports_avx512bw());
23232   match(Set dst (VectorRearrange src shuffle));
23233   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23234   ins_encode %{
23235     int vlen_enc = vector_length_encoding(this);
23236     if (!VM_Version::supports_avx512vl()) {
23237       vlen_enc = Assembler::AVX_512bit;
23238     }
23239     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23240   %}
23241   ins_pipe( pipe_slow );
23242 %}
23243 
23244 // LoadShuffle/Rearrange for Integer and Float
23245 
23246 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23247   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23248             Matcher::vector_length(n) == 4 && UseAVX == 0);
23249   match(Set dst (VectorLoadShuffle src));
23250   effect(TEMP dst, TEMP vtmp);
23251   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23252   ins_encode %{
23253     assert(UseSSE >= 4, "required");
23254 
23255     // Create a byte shuffle mask from int shuffle mask
23256     // only byte shuffle instruction available on these platforms
23257 
23258     // Duplicate and multiply each shuffle by 4
23259     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23260     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23261     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23262     __ psllw($vtmp$$XMMRegister, 2);
23263 
23264     // Duplicate again to create 4 copies of byte index
23265     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23266     __ psllw($dst$$XMMRegister, 8);
23267     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23268 
23269     // Add 3,2,1,0 to get alternate byte index
23270     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23271     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23272   %}
23273   ins_pipe( pipe_slow );
23274 %}
23275 
23276 instruct rearrangeI(vec dst, vec shuffle) %{
23277   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23278             UseAVX == 0);
23279   match(Set dst (VectorRearrange dst shuffle));
23280   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23281   ins_encode %{
23282     assert(UseSSE >= 4, "required");
23283     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23284   %}
23285   ins_pipe( pipe_slow );
23286 %}
23287 
23288 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23289   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23290             UseAVX > 0);
23291   match(Set dst (VectorRearrange src shuffle));
23292   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23293   ins_encode %{
23294     int vlen_enc = vector_length_encoding(this);
23295     BasicType bt = Matcher::vector_element_basic_type(this);
23296     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23297   %}
23298   ins_pipe( pipe_slow );
23299 %}
23300 
23301 // LoadShuffle/Rearrange for Long and Double
23302 
23303 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23304   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23305             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23306   match(Set dst (VectorLoadShuffle src));
23307   effect(TEMP dst, TEMP vtmp);
23308   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23309   ins_encode %{
23310     assert(UseAVX >= 2, "required");
23311 
23312     int vlen_enc = vector_length_encoding(this);
23313     // Create a double word shuffle mask from long shuffle mask
23314     // only double word shuffle instruction available on these platforms
23315 
23316     // Multiply each shuffle by two to get double word index
23317     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23318 
23319     // Duplicate each double word shuffle
23320     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23321     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23322 
23323     // Add one to get alternate double word index
23324     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23330   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23331             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23332   match(Set dst (VectorRearrange src shuffle));
23333   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23334   ins_encode %{
23335     assert(UseAVX >= 2, "required");
23336 
23337     int vlen_enc = vector_length_encoding(this);
23338     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23339   %}
23340   ins_pipe( pipe_slow );
23341 %}
23342 
23343 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23344   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23345             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23346   match(Set dst (VectorRearrange src shuffle));
23347   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23348   ins_encode %{
23349     assert(UseAVX > 2, "required");
23350 
23351     int vlen_enc = vector_length_encoding(this);
23352     if (vlen_enc == Assembler::AVX_128bit) {
23353       vlen_enc = Assembler::AVX_256bit;
23354     }
23355     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23356   %}
23357   ins_pipe( pipe_slow );
23358 %}
23359 
23360 // --------------------------------- FMA --------------------------------------
23361 // a * b + c
23362 
23363 instruct vfmaF_reg(vec a, vec b, vec c) %{
23364   match(Set c (FmaVF  c (Binary a b)));
23365   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23366   ins_cost(150);
23367   ins_encode %{
23368     assert(UseFMA, "not enabled");
23369     int vlen_enc = vector_length_encoding(this);
23370     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23371   %}
23372   ins_pipe( pipe_slow );
23373 %}
23374 
23375 instruct vfmaF_mem(vec a, memory b, vec c) %{
23376   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23377   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23378   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23379   ins_cost(150);
23380   ins_encode %{
23381     assert(UseFMA, "not enabled");
23382     int vlen_enc = vector_length_encoding(this);
23383     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23384   %}
23385   ins_pipe( pipe_slow );
23386 %}
23387 
23388 instruct vfmaD_reg(vec a, vec b, vec c) %{
23389   match(Set c (FmaVD  c (Binary a b)));
23390   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23391   ins_cost(150);
23392   ins_encode %{
23393     assert(UseFMA, "not enabled");
23394     int vlen_enc = vector_length_encoding(this);
23395     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23396   %}
23397   ins_pipe( pipe_slow );
23398 %}
23399 
23400 instruct vfmaD_mem(vec a, memory b, vec c) %{
23401   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23402   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23403   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23404   ins_cost(150);
23405   ins_encode %{
23406     assert(UseFMA, "not enabled");
23407     int vlen_enc = vector_length_encoding(this);
23408     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23409   %}
23410   ins_pipe( pipe_slow );
23411 %}
23412 
23413 // --------------------------------- Vector Multiply Add --------------------------------------
23414 
23415 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23416   predicate(UseAVX == 0);
23417   match(Set dst (MulAddVS2VI dst src1));
23418   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23419   ins_encode %{
23420     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23421   %}
23422   ins_pipe( pipe_slow );
23423 %}
23424 
23425 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23426   predicate(UseAVX > 0);
23427   match(Set dst (MulAddVS2VI src1 src2));
23428   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23429   ins_encode %{
23430     int vlen_enc = vector_length_encoding(this);
23431     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23432   %}
23433   ins_pipe( pipe_slow );
23434 %}
23435 
23436 // --------------------------------- Vector Multiply Add Add ----------------------------------
23437 
23438 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23439   predicate(VM_Version::supports_avx512_vnni());
23440   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23441   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23442   ins_encode %{
23443     assert(UseAVX > 2, "required");
23444     int vlen_enc = vector_length_encoding(this);
23445     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23446   %}
23447   ins_pipe( pipe_slow );
23448   ins_cost(10);
23449 %}
23450 
23451 // --------------------------------- PopCount --------------------------------------
23452 
23453 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23454   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23455   match(Set dst (PopCountVI src));
23456   match(Set dst (PopCountVL src));
23457   format %{ "vector_popcount_integral $dst, $src" %}
23458   ins_encode %{
23459     int opcode = this->ideal_Opcode();
23460     int vlen_enc = vector_length_encoding(this, $src);
23461     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23462     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23463   %}
23464   ins_pipe( pipe_slow );
23465 %}
23466 
23467 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23468   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23469   match(Set dst (PopCountVI src mask));
23470   match(Set dst (PopCountVL src mask));
23471   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23472   ins_encode %{
23473     int vlen_enc = vector_length_encoding(this, $src);
23474     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23475     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23476     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23477   %}
23478   ins_pipe( pipe_slow );
23479 %}
23480 
23481 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23482   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23483   match(Set dst (PopCountVI src));
23484   match(Set dst (PopCountVL src));
23485   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23486   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23487   ins_encode %{
23488     int opcode = this->ideal_Opcode();
23489     int vlen_enc = vector_length_encoding(this, $src);
23490     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23491     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23492                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23493   %}
23494   ins_pipe( pipe_slow );
23495 %}
23496 
23497 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23498 
23499 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23500   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23501                                               Matcher::vector_length_in_bytes(n->in(1))));
23502   match(Set dst (CountTrailingZerosV src));
23503   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23504   ins_cost(400);
23505   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23506   ins_encode %{
23507     int vlen_enc = vector_length_encoding(this, $src);
23508     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23509     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23510                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23511   %}
23512   ins_pipe( pipe_slow );
23513 %}
23514 
23515 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23516   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23517             VM_Version::supports_avx512cd() &&
23518             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23519   match(Set dst (CountTrailingZerosV src));
23520   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23521   ins_cost(400);
23522   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23523   ins_encode %{
23524     int vlen_enc = vector_length_encoding(this, $src);
23525     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23526     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23527                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23528   %}
23529   ins_pipe( pipe_slow );
23530 %}
23531 
23532 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23533   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23534   match(Set dst (CountTrailingZerosV src));
23535   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23536   ins_cost(400);
23537   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23538   ins_encode %{
23539     int vlen_enc = vector_length_encoding(this, $src);
23540     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23541     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23542                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23543                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23544   %}
23545   ins_pipe( pipe_slow );
23546 %}
23547 
23548 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23549   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23550   match(Set dst (CountTrailingZerosV src));
23551   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23552   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23553   ins_encode %{
23554     int vlen_enc = vector_length_encoding(this, $src);
23555     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23556     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23557                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23558   %}
23559   ins_pipe( pipe_slow );
23560 %}
23561 
23562 
23563 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23564 
23565 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23566   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23567   effect(TEMP dst);
23568   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23569   ins_encode %{
23570     int vector_len = vector_length_encoding(this);
23571     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23572   %}
23573   ins_pipe( pipe_slow );
23574 %}
23575 
23576 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23577   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23578   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23579   effect(TEMP dst);
23580   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23581   ins_encode %{
23582     int vector_len = vector_length_encoding(this);
23583     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23584   %}
23585   ins_pipe( pipe_slow );
23586 %}
23587 
23588 // --------------------------------- Rotation Operations ----------------------------------
23589 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23590   match(Set dst (RotateLeftV src shift));
23591   match(Set dst (RotateRightV src shift));
23592   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23593   ins_encode %{
23594     int opcode      = this->ideal_Opcode();
23595     int vector_len  = vector_length_encoding(this);
23596     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23597     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23598   %}
23599   ins_pipe( pipe_slow );
23600 %}
23601 
23602 instruct vprorate(vec dst, vec src, vec shift) %{
23603   match(Set dst (RotateLeftV src shift));
23604   match(Set dst (RotateRightV src shift));
23605   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23606   ins_encode %{
23607     int opcode      = this->ideal_Opcode();
23608     int vector_len  = vector_length_encoding(this);
23609     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23610     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23611   %}
23612   ins_pipe( pipe_slow );
23613 %}
23614 
23615 // ---------------------------------- Masked Operations ------------------------------------
23616 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23617   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23618   match(Set dst (LoadVectorMasked mem mask));
23619   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23620   ins_encode %{
23621     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23622     int vlen_enc = vector_length_encoding(this);
23623     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23624   %}
23625   ins_pipe( pipe_slow );
23626 %}
23627 
23628 
23629 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23630   predicate(n->in(3)->bottom_type()->isa_vectmask());
23631   match(Set dst (LoadVectorMasked mem mask));
23632   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23633   ins_encode %{
23634     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23635     int vector_len = vector_length_encoding(this);
23636     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23637   %}
23638   ins_pipe( pipe_slow );
23639 %}
23640 
23641 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23642   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23643   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23644   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23645   ins_encode %{
23646     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23647     int vlen_enc = vector_length_encoding(src_node);
23648     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23649     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23650   %}
23651   ins_pipe( pipe_slow );
23652 %}
23653 
23654 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23655   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23656   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23657   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23658   ins_encode %{
23659     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23660     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23661     int vlen_enc = vector_length_encoding(src_node);
23662     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23663   %}
23664   ins_pipe( pipe_slow );
23665 %}
23666 
23667 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23668   match(Set addr (VerifyVectorAlignment addr mask));
23669   effect(KILL cr);
23670   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23671   ins_encode %{
23672     Label Lskip;
23673     // check if masked bits of addr are zero
23674     __ testq($addr$$Register, $mask$$constant);
23675     __ jccb(Assembler::equal, Lskip);
23676     __ stop("verify_vector_alignment found a misaligned vector memory access");
23677     __ bind(Lskip);
23678   %}
23679   ins_pipe(pipe_slow);
23680 %}
23681 
23682 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23683   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23684   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23685   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23686   ins_encode %{
23687     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23688     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23689 
23690     Label DONE;
23691     int vlen_enc = vector_length_encoding(this, $src1);
23692     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23693 
23694     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23695     __ mov64($dst$$Register, -1L);
23696     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23697     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23698     __ jccb(Assembler::carrySet, DONE);
23699     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23700     __ notq($dst$$Register);
23701     __ tzcntq($dst$$Register, $dst$$Register);
23702     __ bind(DONE);
23703   %}
23704   ins_pipe( pipe_slow );
23705 %}
23706 
23707 
23708 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23709   match(Set dst (VectorMaskGen len));
23710   effect(TEMP temp, KILL cr);
23711   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23712   ins_encode %{
23713     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23714   %}
23715   ins_pipe( pipe_slow );
23716 %}
23717 
23718 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23719   match(Set dst (VectorMaskGen len));
23720   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23721   effect(TEMP temp);
23722   ins_encode %{
23723     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23724     __ kmovql($dst$$KRegister, $temp$$Register);
23725   %}
23726   ins_pipe( pipe_slow );
23727 %}
23728 
23729 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23730   predicate(n->in(1)->bottom_type()->isa_vectmask());
23731   match(Set dst (VectorMaskToLong mask));
23732   effect(TEMP dst, KILL cr);
23733   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23734   ins_encode %{
23735     int opcode = this->ideal_Opcode();
23736     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23737     int mask_len = Matcher::vector_length(this, $mask);
23738     int mask_size = mask_len * type2aelembytes(mbt);
23739     int vlen_enc = vector_length_encoding(this, $mask);
23740     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23741                              $dst$$Register, mask_len, mask_size, vlen_enc);
23742   %}
23743   ins_pipe( pipe_slow );
23744 %}
23745 
23746 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23747   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23748   match(Set dst (VectorMaskToLong mask));
23749   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23750   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23751   ins_encode %{
23752     int opcode = this->ideal_Opcode();
23753     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23754     int mask_len = Matcher::vector_length(this, $mask);
23755     int vlen_enc = vector_length_encoding(this, $mask);
23756     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23757                              $dst$$Register, mask_len, mbt, vlen_enc);
23758   %}
23759   ins_pipe( pipe_slow );
23760 %}
23761 
23762 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23763   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23764   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23765   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23766   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23767   ins_encode %{
23768     int opcode = this->ideal_Opcode();
23769     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23770     int mask_len = Matcher::vector_length(this, $mask);
23771     int vlen_enc = vector_length_encoding(this, $mask);
23772     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23773                              $dst$$Register, mask_len, mbt, vlen_enc);
23774   %}
23775   ins_pipe( pipe_slow );
23776 %}
23777 
23778 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23779   predicate(n->in(1)->bottom_type()->isa_vectmask());
23780   match(Set dst (VectorMaskTrueCount mask));
23781   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23782   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23783   ins_encode %{
23784     int opcode = this->ideal_Opcode();
23785     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23786     int mask_len = Matcher::vector_length(this, $mask);
23787     int mask_size = mask_len * type2aelembytes(mbt);
23788     int vlen_enc = vector_length_encoding(this, $mask);
23789     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23790                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23791   %}
23792   ins_pipe( pipe_slow );
23793 %}
23794 
23795 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23796   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23797   match(Set dst (VectorMaskTrueCount mask));
23798   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23799   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23800   ins_encode %{
23801     int opcode = this->ideal_Opcode();
23802     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23803     int mask_len = Matcher::vector_length(this, $mask);
23804     int vlen_enc = vector_length_encoding(this, $mask);
23805     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23806                              $tmp$$Register, mask_len, mbt, vlen_enc);
23807   %}
23808   ins_pipe( pipe_slow );
23809 %}
23810 
23811 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23812   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23813   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23814   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23815   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23816   ins_encode %{
23817     int opcode = this->ideal_Opcode();
23818     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23819     int mask_len = Matcher::vector_length(this, $mask);
23820     int vlen_enc = vector_length_encoding(this, $mask);
23821     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23822                              $tmp$$Register, mask_len, mbt, vlen_enc);
23823   %}
23824   ins_pipe( pipe_slow );
23825 %}
23826 
23827 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23828   predicate(n->in(1)->bottom_type()->isa_vectmask());
23829   match(Set dst (VectorMaskFirstTrue mask));
23830   match(Set dst (VectorMaskLastTrue mask));
23831   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23832   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23833   ins_encode %{
23834     int opcode = this->ideal_Opcode();
23835     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23836     int mask_len = Matcher::vector_length(this, $mask);
23837     int mask_size = mask_len * type2aelembytes(mbt);
23838     int vlen_enc = vector_length_encoding(this, $mask);
23839     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23840                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23841   %}
23842   ins_pipe( pipe_slow );
23843 %}
23844 
23845 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23846   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23847   match(Set dst (VectorMaskFirstTrue mask));
23848   match(Set dst (VectorMaskLastTrue mask));
23849   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23850   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23851   ins_encode %{
23852     int opcode = this->ideal_Opcode();
23853     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23854     int mask_len = Matcher::vector_length(this, $mask);
23855     int vlen_enc = vector_length_encoding(this, $mask);
23856     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23857                              $tmp$$Register, mask_len, mbt, vlen_enc);
23858   %}
23859   ins_pipe( pipe_slow );
23860 %}
23861 
23862 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23863   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23864   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23865   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23866   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23867   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23868   ins_encode %{
23869     int opcode = this->ideal_Opcode();
23870     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23871     int mask_len = Matcher::vector_length(this, $mask);
23872     int vlen_enc = vector_length_encoding(this, $mask);
23873     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23874                              $tmp$$Register, mask_len, mbt, vlen_enc);
23875   %}
23876   ins_pipe( pipe_slow );
23877 %}
23878 
23879 // --------------------------------- Compress/Expand Operations ---------------------------
23880 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23881   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23882   match(Set dst (CompressV src mask));
23883   match(Set dst (ExpandV src mask));
23884   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23885   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23886   ins_encode %{
23887     int opcode = this->ideal_Opcode();
23888     int vlen_enc = vector_length_encoding(this);
23889     BasicType bt  = Matcher::vector_element_basic_type(this);
23890     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23891                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23892   %}
23893   ins_pipe( pipe_slow );
23894 %}
23895 
23896 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23897   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23898   match(Set dst (CompressV src mask));
23899   match(Set dst (ExpandV src mask));
23900   format %{ "vector_compress_expand $dst, $src, $mask" %}
23901   ins_encode %{
23902     int opcode = this->ideal_Opcode();
23903     int vector_len = vector_length_encoding(this);
23904     BasicType bt  = Matcher::vector_element_basic_type(this);
23905     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23906   %}
23907   ins_pipe( pipe_slow );
23908 %}
23909 
23910 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23911   match(Set dst (CompressM mask));
23912   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23913   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23914   ins_encode %{
23915     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23916     int mask_len = Matcher::vector_length(this);
23917     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23918   %}
23919   ins_pipe( pipe_slow );
23920 %}
23921 
23922 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23923 
23924 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23925   predicate(!VM_Version::supports_gfni());
23926   match(Set dst (ReverseV src));
23927   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23928   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23929   ins_encode %{
23930     int vec_enc = vector_length_encoding(this);
23931     BasicType bt = Matcher::vector_element_basic_type(this);
23932     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23933                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23934   %}
23935   ins_pipe( pipe_slow );
23936 %}
23937 
23938 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23939   predicate(VM_Version::supports_gfni());
23940   match(Set dst (ReverseV src));
23941   effect(TEMP dst, TEMP xtmp);
23942   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23943   ins_encode %{
23944     int vec_enc = vector_length_encoding(this);
23945     BasicType bt  = Matcher::vector_element_basic_type(this);
23946     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23947     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23948                                $xtmp$$XMMRegister);
23949   %}
23950   ins_pipe( pipe_slow );
23951 %}
23952 
23953 instruct vreverse_byte_reg(vec dst, vec src) %{
23954   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23955   match(Set dst (ReverseBytesV src));
23956   effect(TEMP dst);
23957   format %{ "vector_reverse_byte $dst, $src" %}
23958   ins_encode %{
23959     int vec_enc = vector_length_encoding(this);
23960     BasicType bt = Matcher::vector_element_basic_type(this);
23961     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23962   %}
23963   ins_pipe( pipe_slow );
23964 %}
23965 
23966 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23967   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23968   match(Set dst (ReverseBytesV src));
23969   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23970   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23971   ins_encode %{
23972     int vec_enc = vector_length_encoding(this);
23973     BasicType bt = Matcher::vector_element_basic_type(this);
23974     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23975                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23976   %}
23977   ins_pipe( pipe_slow );
23978 %}
23979 
23980 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23981 
23982 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23983   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23984                                               Matcher::vector_length_in_bytes(n->in(1))));
23985   match(Set dst (CountLeadingZerosV src));
23986   format %{ "vector_count_leading_zeros $dst, $src" %}
23987   ins_encode %{
23988      int vlen_enc = vector_length_encoding(this, $src);
23989      BasicType bt = Matcher::vector_element_basic_type(this, $src);
23990      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23991                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23992   %}
23993   ins_pipe( pipe_slow );
23994 %}
23995 
23996 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23997   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23998                                               Matcher::vector_length_in_bytes(n->in(1))));
23999   match(Set dst (CountLeadingZerosV src mask));
24000   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24001   ins_encode %{
24002     int vlen_enc = vector_length_encoding(this, $src);
24003     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24004     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24005     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24006                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24007   %}
24008   ins_pipe( pipe_slow );
24009 %}
24010 
24011 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24012   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24013             VM_Version::supports_avx512cd() &&
24014             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24015   match(Set dst (CountLeadingZerosV src));
24016   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24017   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24018   ins_encode %{
24019     int vlen_enc = vector_length_encoding(this, $src);
24020     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24021     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24022                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24023   %}
24024   ins_pipe( pipe_slow );
24025 %}
24026 
24027 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24028   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24029   match(Set dst (CountLeadingZerosV src));
24030   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24031   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24032   ins_encode %{
24033     int vlen_enc = vector_length_encoding(this, $src);
24034     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24036                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24037                                        $rtmp$$Register, true, vlen_enc);
24038   %}
24039   ins_pipe( pipe_slow );
24040 %}
24041 
24042 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24043   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24044             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24045   match(Set dst (CountLeadingZerosV src));
24046   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24047   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24048   ins_encode %{
24049     int vlen_enc = vector_length_encoding(this, $src);
24050     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24051     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24052                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24053   %}
24054   ins_pipe( pipe_slow );
24055 %}
24056 
24057 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24058   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24059             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24060   match(Set dst (CountLeadingZerosV src));
24061   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24062   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24063   ins_encode %{
24064     int vlen_enc = vector_length_encoding(this, $src);
24065     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24066     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24067                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24068   %}
24069   ins_pipe( pipe_slow );
24070 %}
24071 
24072 // ---------------------------------- Vector Masked Operations ------------------------------------
24073 
24074 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24075   match(Set dst (AddVB (Binary dst src2) mask));
24076   match(Set dst (AddVS (Binary dst src2) mask));
24077   match(Set dst (AddVI (Binary dst src2) mask));
24078   match(Set dst (AddVL (Binary dst src2) mask));
24079   match(Set dst (AddVF (Binary dst src2) mask));
24080   match(Set dst (AddVD (Binary dst src2) mask));
24081   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24082   ins_encode %{
24083     int vlen_enc = vector_length_encoding(this);
24084     BasicType bt = Matcher::vector_element_basic_type(this);
24085     int opc = this->ideal_Opcode();
24086     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24088   %}
24089   ins_pipe( pipe_slow );
24090 %}
24091 
24092 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24093   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24094   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24095   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24096   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24097   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24098   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24099   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24100   ins_encode %{
24101     int vlen_enc = vector_length_encoding(this);
24102     BasicType bt = Matcher::vector_element_basic_type(this);
24103     int opc = this->ideal_Opcode();
24104     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24105                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24106   %}
24107   ins_pipe( pipe_slow );
24108 %}
24109 
24110 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24111   match(Set dst (XorV (Binary dst src2) mask));
24112   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24113   ins_encode %{
24114     int vlen_enc = vector_length_encoding(this);
24115     BasicType bt = Matcher::vector_element_basic_type(this);
24116     int opc = this->ideal_Opcode();
24117     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24118                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24119   %}
24120   ins_pipe( pipe_slow );
24121 %}
24122 
24123 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24124   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24125   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24126   ins_encode %{
24127     int vlen_enc = vector_length_encoding(this);
24128     BasicType bt = Matcher::vector_element_basic_type(this);
24129     int opc = this->ideal_Opcode();
24130     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24131                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24132   %}
24133   ins_pipe( pipe_slow );
24134 %}
24135 
24136 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24137   match(Set dst (OrV (Binary dst src2) mask));
24138   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24139   ins_encode %{
24140     int vlen_enc = vector_length_encoding(this);
24141     BasicType bt = Matcher::vector_element_basic_type(this);
24142     int opc = this->ideal_Opcode();
24143     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24144                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24145   %}
24146   ins_pipe( pipe_slow );
24147 %}
24148 
24149 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24150   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24151   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24152   ins_encode %{
24153     int vlen_enc = vector_length_encoding(this);
24154     BasicType bt = Matcher::vector_element_basic_type(this);
24155     int opc = this->ideal_Opcode();
24156     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24157                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24158   %}
24159   ins_pipe( pipe_slow );
24160 %}
24161 
24162 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24163   match(Set dst (AndV (Binary dst src2) mask));
24164   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24165   ins_encode %{
24166     int vlen_enc = vector_length_encoding(this);
24167     BasicType bt = Matcher::vector_element_basic_type(this);
24168     int opc = this->ideal_Opcode();
24169     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24170                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24171   %}
24172   ins_pipe( pipe_slow );
24173 %}
24174 
24175 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24176   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24177   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24178   ins_encode %{
24179     int vlen_enc = vector_length_encoding(this);
24180     BasicType bt = Matcher::vector_element_basic_type(this);
24181     int opc = this->ideal_Opcode();
24182     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24183                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24184   %}
24185   ins_pipe( pipe_slow );
24186 %}
24187 
24188 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24189   match(Set dst (SubVB (Binary dst src2) mask));
24190   match(Set dst (SubVS (Binary dst src2) mask));
24191   match(Set dst (SubVI (Binary dst src2) mask));
24192   match(Set dst (SubVL (Binary dst src2) mask));
24193   match(Set dst (SubVF (Binary dst src2) mask));
24194   match(Set dst (SubVD (Binary dst src2) mask));
24195   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24196   ins_encode %{
24197     int vlen_enc = vector_length_encoding(this);
24198     BasicType bt = Matcher::vector_element_basic_type(this);
24199     int opc = this->ideal_Opcode();
24200     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24201                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24202   %}
24203   ins_pipe( pipe_slow );
24204 %}
24205 
24206 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24207   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24208   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24209   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24210   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24211   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24212   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24213   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24214   ins_encode %{
24215     int vlen_enc = vector_length_encoding(this);
24216     BasicType bt = Matcher::vector_element_basic_type(this);
24217     int opc = this->ideal_Opcode();
24218     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24219                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24220   %}
24221   ins_pipe( pipe_slow );
24222 %}
24223 
24224 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24225   match(Set dst (MulVS (Binary dst src2) mask));
24226   match(Set dst (MulVI (Binary dst src2) mask));
24227   match(Set dst (MulVL (Binary dst src2) mask));
24228   match(Set dst (MulVF (Binary dst src2) mask));
24229   match(Set dst (MulVD (Binary dst src2) mask));
24230   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24231   ins_encode %{
24232     int vlen_enc = vector_length_encoding(this);
24233     BasicType bt = Matcher::vector_element_basic_type(this);
24234     int opc = this->ideal_Opcode();
24235     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24236                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24237   %}
24238   ins_pipe( pipe_slow );
24239 %}
24240 
24241 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24242   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24243   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24244   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24245   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24246   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24247   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24248   ins_encode %{
24249     int vlen_enc = vector_length_encoding(this);
24250     BasicType bt = Matcher::vector_element_basic_type(this);
24251     int opc = this->ideal_Opcode();
24252     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24253                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24254   %}
24255   ins_pipe( pipe_slow );
24256 %}
24257 
24258 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24259   match(Set dst (SqrtVF dst mask));
24260   match(Set dst (SqrtVD dst mask));
24261   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24262   ins_encode %{
24263     int vlen_enc = vector_length_encoding(this);
24264     BasicType bt = Matcher::vector_element_basic_type(this);
24265     int opc = this->ideal_Opcode();
24266     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24267                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24268   %}
24269   ins_pipe( pipe_slow );
24270 %}
24271 
24272 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24273   match(Set dst (DivVF (Binary dst src2) mask));
24274   match(Set dst (DivVD (Binary dst src2) mask));
24275   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24276   ins_encode %{
24277     int vlen_enc = vector_length_encoding(this);
24278     BasicType bt = Matcher::vector_element_basic_type(this);
24279     int opc = this->ideal_Opcode();
24280     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24282   %}
24283   ins_pipe( pipe_slow );
24284 %}
24285 
24286 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24287   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24288   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24289   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24290   ins_encode %{
24291     int vlen_enc = vector_length_encoding(this);
24292     BasicType bt = Matcher::vector_element_basic_type(this);
24293     int opc = this->ideal_Opcode();
24294     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24295                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24296   %}
24297   ins_pipe( pipe_slow );
24298 %}
24299 
24300 
24301 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24302   match(Set dst (RotateLeftV (Binary dst shift) mask));
24303   match(Set dst (RotateRightV (Binary dst shift) mask));
24304   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24305   ins_encode %{
24306     int vlen_enc = vector_length_encoding(this);
24307     BasicType bt = Matcher::vector_element_basic_type(this);
24308     int opc = this->ideal_Opcode();
24309     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24310                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24311   %}
24312   ins_pipe( pipe_slow );
24313 %}
24314 
24315 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24316   match(Set dst (RotateLeftV (Binary dst src2) mask));
24317   match(Set dst (RotateRightV (Binary dst src2) mask));
24318   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24319   ins_encode %{
24320     int vlen_enc = vector_length_encoding(this);
24321     BasicType bt = Matcher::vector_element_basic_type(this);
24322     int opc = this->ideal_Opcode();
24323     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24324                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24325   %}
24326   ins_pipe( pipe_slow );
24327 %}
24328 
24329 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24330   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24331   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24332   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24333   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24334   ins_encode %{
24335     int vlen_enc = vector_length_encoding(this);
24336     BasicType bt = Matcher::vector_element_basic_type(this);
24337     int opc = this->ideal_Opcode();
24338     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24339                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24340   %}
24341   ins_pipe( pipe_slow );
24342 %}
24343 
24344 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24345   predicate(!n->as_ShiftV()->is_var_shift());
24346   match(Set dst (LShiftVS (Binary dst src2) mask));
24347   match(Set dst (LShiftVI (Binary dst src2) mask));
24348   match(Set dst (LShiftVL (Binary dst src2) mask));
24349   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24350   ins_encode %{
24351     int vlen_enc = vector_length_encoding(this);
24352     BasicType bt = Matcher::vector_element_basic_type(this);
24353     int opc = this->ideal_Opcode();
24354     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24356   %}
24357   ins_pipe( pipe_slow );
24358 %}
24359 
24360 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24361   predicate(n->as_ShiftV()->is_var_shift());
24362   match(Set dst (LShiftVS (Binary dst src2) mask));
24363   match(Set dst (LShiftVI (Binary dst src2) mask));
24364   match(Set dst (LShiftVL (Binary dst src2) mask));
24365   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24366   ins_encode %{
24367     int vlen_enc = vector_length_encoding(this);
24368     BasicType bt = Matcher::vector_element_basic_type(this);
24369     int opc = this->ideal_Opcode();
24370     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24372   %}
24373   ins_pipe( pipe_slow );
24374 %}
24375 
24376 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24377   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24378   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24379   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24380   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24381   ins_encode %{
24382     int vlen_enc = vector_length_encoding(this);
24383     BasicType bt = Matcher::vector_element_basic_type(this);
24384     int opc = this->ideal_Opcode();
24385     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24386                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24387   %}
24388   ins_pipe( pipe_slow );
24389 %}
24390 
24391 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24392   predicate(!n->as_ShiftV()->is_var_shift());
24393   match(Set dst (RShiftVS (Binary dst src2) mask));
24394   match(Set dst (RShiftVI (Binary dst src2) mask));
24395   match(Set dst (RShiftVL (Binary dst src2) mask));
24396   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24397   ins_encode %{
24398     int vlen_enc = vector_length_encoding(this);
24399     BasicType bt = Matcher::vector_element_basic_type(this);
24400     int opc = this->ideal_Opcode();
24401     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24403   %}
24404   ins_pipe( pipe_slow );
24405 %}
24406 
24407 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24408   predicate(n->as_ShiftV()->is_var_shift());
24409   match(Set dst (RShiftVS (Binary dst src2) mask));
24410   match(Set dst (RShiftVI (Binary dst src2) mask));
24411   match(Set dst (RShiftVL (Binary dst src2) mask));
24412   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24413   ins_encode %{
24414     int vlen_enc = vector_length_encoding(this);
24415     BasicType bt = Matcher::vector_element_basic_type(this);
24416     int opc = this->ideal_Opcode();
24417     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24419   %}
24420   ins_pipe( pipe_slow );
24421 %}
24422 
24423 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24424   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24425   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24426   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24427   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24428   ins_encode %{
24429     int vlen_enc = vector_length_encoding(this);
24430     BasicType bt = Matcher::vector_element_basic_type(this);
24431     int opc = this->ideal_Opcode();
24432     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24433                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24434   %}
24435   ins_pipe( pipe_slow );
24436 %}
24437 
24438 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24439   predicate(!n->as_ShiftV()->is_var_shift());
24440   match(Set dst (URShiftVS (Binary dst src2) mask));
24441   match(Set dst (URShiftVI (Binary dst src2) mask));
24442   match(Set dst (URShiftVL (Binary dst src2) mask));
24443   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24444   ins_encode %{
24445     int vlen_enc = vector_length_encoding(this);
24446     BasicType bt = Matcher::vector_element_basic_type(this);
24447     int opc = this->ideal_Opcode();
24448     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24449                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24450   %}
24451   ins_pipe( pipe_slow );
24452 %}
24453 
24454 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24455   predicate(n->as_ShiftV()->is_var_shift());
24456   match(Set dst (URShiftVS (Binary dst src2) mask));
24457   match(Set dst (URShiftVI (Binary dst src2) mask));
24458   match(Set dst (URShiftVL (Binary dst src2) mask));
24459   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24460   ins_encode %{
24461     int vlen_enc = vector_length_encoding(this);
24462     BasicType bt = Matcher::vector_element_basic_type(this);
24463     int opc = this->ideal_Opcode();
24464     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24465                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24466   %}
24467   ins_pipe( pipe_slow );
24468 %}
24469 
24470 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24471   match(Set dst (MaxV (Binary dst src2) mask));
24472   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24473   ins_encode %{
24474     int vlen_enc = vector_length_encoding(this);
24475     BasicType bt = Matcher::vector_element_basic_type(this);
24476     int opc = this->ideal_Opcode();
24477     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24479   %}
24480   ins_pipe( pipe_slow );
24481 %}
24482 
24483 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24484   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24485   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24486   ins_encode %{
24487     int vlen_enc = vector_length_encoding(this);
24488     BasicType bt = Matcher::vector_element_basic_type(this);
24489     int opc = this->ideal_Opcode();
24490     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24492   %}
24493   ins_pipe( pipe_slow );
24494 %}
24495 
24496 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24497   match(Set dst (MinV (Binary dst src2) mask));
24498   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24499   ins_encode %{
24500     int vlen_enc = vector_length_encoding(this);
24501     BasicType bt = Matcher::vector_element_basic_type(this);
24502     int opc = this->ideal_Opcode();
24503     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24504                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24505   %}
24506   ins_pipe( pipe_slow );
24507 %}
24508 
24509 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24510   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24511   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24512   ins_encode %{
24513     int vlen_enc = vector_length_encoding(this);
24514     BasicType bt = Matcher::vector_element_basic_type(this);
24515     int opc = this->ideal_Opcode();
24516     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24517                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24518   %}
24519   ins_pipe( pipe_slow );
24520 %}
24521 
24522 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24523   match(Set dst (VectorRearrange (Binary dst src2) mask));
24524   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24525   ins_encode %{
24526     int vlen_enc = vector_length_encoding(this);
24527     BasicType bt = Matcher::vector_element_basic_type(this);
24528     int opc = this->ideal_Opcode();
24529     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24531   %}
24532   ins_pipe( pipe_slow );
24533 %}
24534 
24535 instruct vabs_masked(vec dst, kReg mask) %{
24536   match(Set dst (AbsVB dst mask));
24537   match(Set dst (AbsVS dst mask));
24538   match(Set dst (AbsVI dst mask));
24539   match(Set dst (AbsVL dst mask));
24540   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24541   ins_encode %{
24542     int vlen_enc = vector_length_encoding(this);
24543     BasicType bt = Matcher::vector_element_basic_type(this);
24544     int opc = this->ideal_Opcode();
24545     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24546                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24547   %}
24548   ins_pipe( pipe_slow );
24549 %}
24550 
24551 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24552   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24553   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24554   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24555   ins_encode %{
24556     assert(UseFMA, "Needs FMA instructions support.");
24557     int vlen_enc = vector_length_encoding(this);
24558     BasicType bt = Matcher::vector_element_basic_type(this);
24559     int opc = this->ideal_Opcode();
24560     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24561                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24562   %}
24563   ins_pipe( pipe_slow );
24564 %}
24565 
24566 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24567   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24568   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24569   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24570   ins_encode %{
24571     assert(UseFMA, "Needs FMA instructions support.");
24572     int vlen_enc = vector_length_encoding(this);
24573     BasicType bt = Matcher::vector_element_basic_type(this);
24574     int opc = this->ideal_Opcode();
24575     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24577   %}
24578   ins_pipe( pipe_slow );
24579 %}
24580 
24581 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24582   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24583   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24584   ins_encode %{
24585     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24586     int vlen_enc = vector_length_encoding(this, $src1);
24587     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24588 
24589     // Comparison i
24590     switch (src1_elem_bt) {
24591       case T_BYTE: {
24592         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24593         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24594         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24595         break;
24596       }
24597       case T_SHORT: {
24598         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24599         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24600         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24601         break;
24602       }
24603       case T_INT: {
24604         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24605         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24606         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24607         break;
24608       }
24609       case T_LONG: {
24610         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24611         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24612         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24613         break;
24614       }
24615       case T_FLOAT: {
24616         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24617         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24618         break;
24619       }
24620       case T_DOUBLE: {
24621         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24622         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24623         break;
24624       }
24625       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24626     }
24627   %}
24628   ins_pipe( pipe_slow );
24629 %}
24630 
24631 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24632   predicate(Matcher::vector_length(n) <= 32);
24633   match(Set dst (MaskAll src));
24634   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24635   ins_encode %{
24636     int mask_len = Matcher::vector_length(this);
24637     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24638   %}
24639   ins_pipe( pipe_slow );
24640 %}
24641 
24642 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24643   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24644   match(Set dst (XorVMask src (MaskAll cnt)));
24645   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24646   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24647   ins_encode %{
24648     uint masklen = Matcher::vector_length(this);
24649     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24650   %}
24651   ins_pipe( pipe_slow );
24652 %}
24653 
24654 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24655   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24656             (Matcher::vector_length(n) == 16) ||
24657             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24658   match(Set dst (XorVMask src (MaskAll cnt)));
24659   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24660   ins_encode %{
24661     uint masklen = Matcher::vector_length(this);
24662     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24663   %}
24664   ins_pipe( pipe_slow );
24665 %}
24666 
24667 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24668   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24669   match(Set dst (VectorLongToMask src));
24670   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24671   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24672   ins_encode %{
24673     int mask_len = Matcher::vector_length(this);
24674     int vec_enc  = vector_length_encoding(mask_len);
24675     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24676                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24677   %}
24678   ins_pipe( pipe_slow );
24679 %}
24680 
24681 
24682 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24683   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24684   match(Set dst (VectorLongToMask src));
24685   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24686   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24687   ins_encode %{
24688     int mask_len = Matcher::vector_length(this);
24689     assert(mask_len <= 32, "invalid mask length");
24690     int vec_enc  = vector_length_encoding(mask_len);
24691     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24692                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24693   %}
24694   ins_pipe( pipe_slow );
24695 %}
24696 
24697 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24698   predicate(n->bottom_type()->isa_vectmask());
24699   match(Set dst (VectorLongToMask src));
24700   format %{ "long_to_mask_evex $dst, $src\t!" %}
24701   ins_encode %{
24702     __ kmov($dst$$KRegister, $src$$Register);
24703   %}
24704   ins_pipe( pipe_slow );
24705 %}
24706 
24707 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24708   match(Set dst (AndVMask src1 src2));
24709   match(Set dst (OrVMask src1 src2));
24710   match(Set dst (XorVMask src1 src2));
24711   effect(TEMP kscratch);
24712   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24713   ins_encode %{
24714     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24715     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24716     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24717     uint masklen = Matcher::vector_length(this);
24718     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24719     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24720   %}
24721   ins_pipe( pipe_slow );
24722 %}
24723 
24724 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24725   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24726   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24727   ins_encode %{
24728     int vlen_enc = vector_length_encoding(this);
24729     BasicType bt = Matcher::vector_element_basic_type(this);
24730     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24731                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24732   %}
24733   ins_pipe( pipe_slow );
24734 %}
24735 
24736 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24737   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24738   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24739   ins_encode %{
24740     int vlen_enc = vector_length_encoding(this);
24741     BasicType bt = Matcher::vector_element_basic_type(this);
24742     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24743                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24744   %}
24745   ins_pipe( pipe_slow );
24746 %}
24747 
24748 instruct castMM(kReg dst)
24749 %{
24750   match(Set dst (CastVV dst));
24751 
24752   size(0);
24753   format %{ "# castVV of $dst" %}
24754   ins_encode(/* empty encoding */);
24755   ins_cost(0);
24756   ins_pipe(empty);
24757 %}
24758 
24759 instruct castVV(vec dst)
24760 %{
24761   match(Set dst (CastVV dst));
24762 
24763   size(0);
24764   format %{ "# castVV of $dst" %}
24765   ins_encode(/* empty encoding */);
24766   ins_cost(0);
24767   ins_pipe(empty);
24768 %}
24769 
24770 instruct castVVLeg(legVec dst)
24771 %{
24772   match(Set dst (CastVV dst));
24773 
24774   size(0);
24775   format %{ "# castVV of $dst" %}
24776   ins_encode(/* empty encoding */);
24777   ins_cost(0);
24778   ins_pipe(empty);
24779 %}
24780 
24781 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24782 %{
24783   match(Set dst (IsInfiniteF src));
24784   effect(TEMP ktmp, KILL cr);
24785   format %{ "float_class_check $dst, $src" %}
24786   ins_encode %{
24787     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24788     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24789   %}
24790   ins_pipe(pipe_slow);
24791 %}
24792 
24793 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24794 %{
24795   match(Set dst (IsInfiniteD src));
24796   effect(TEMP ktmp, KILL cr);
24797   format %{ "double_class_check $dst, $src" %}
24798   ins_encode %{
24799     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24800     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24801   %}
24802   ins_pipe(pipe_slow);
24803 %}
24804 
24805 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24806 %{
24807   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24808             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24809   match(Set dst (SaturatingAddV src1 src2));
24810   match(Set dst (SaturatingSubV src1 src2));
24811   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24812   ins_encode %{
24813     int vlen_enc = vector_length_encoding(this);
24814     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24815     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24816                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24817   %}
24818   ins_pipe(pipe_slow);
24819 %}
24820 
24821 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24822 %{
24823   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24824             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24825   match(Set dst (SaturatingAddV src1 src2));
24826   match(Set dst (SaturatingSubV src1 src2));
24827   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24828   ins_encode %{
24829     int vlen_enc = vector_length_encoding(this);
24830     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24831     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24832                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24833   %}
24834   ins_pipe(pipe_slow);
24835 %}
24836 
24837 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24838 %{
24839   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24840             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24841             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24842   match(Set dst (SaturatingAddV src1 src2));
24843   match(Set dst (SaturatingSubV src1 src2));
24844   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24845   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24846   ins_encode %{
24847     int vlen_enc = vector_length_encoding(this);
24848     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24849     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24850                                         $src1$$XMMRegister, $src2$$XMMRegister,
24851                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24852                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24853   %}
24854   ins_pipe(pipe_slow);
24855 %}
24856 
24857 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24858 %{
24859   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24860             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24861             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24862   match(Set dst (SaturatingAddV src1 src2));
24863   match(Set dst (SaturatingSubV src1 src2));
24864   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24865   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24866   ins_encode %{
24867     int vlen_enc = vector_length_encoding(this);
24868     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24869     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24870                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24871                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24872   %}
24873   ins_pipe(pipe_slow);
24874 %}
24875 
24876 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24877 %{
24878   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24879             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24880             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24881   match(Set dst (SaturatingAddV src1 src2));
24882   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24883   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24884   ins_encode %{
24885     int vlen_enc = vector_length_encoding(this);
24886     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24887     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24888                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24889   %}
24890   ins_pipe(pipe_slow);
24891 %}
24892 
24893 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24894 %{
24895   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24896             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24897             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24898   match(Set dst (SaturatingAddV src1 src2));
24899   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24900   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24901   ins_encode %{
24902     int vlen_enc = vector_length_encoding(this);
24903     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24904     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24905                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24906   %}
24907   ins_pipe(pipe_slow);
24908 %}
24909 
24910 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24911 %{
24912   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24913             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24914             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24915   match(Set dst (SaturatingSubV src1 src2));
24916   effect(TEMP ktmp);
24917   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24918   ins_encode %{
24919     int vlen_enc = vector_length_encoding(this);
24920     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24921     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24922                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24923   %}
24924   ins_pipe(pipe_slow);
24925 %}
24926 
24927 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24928 %{
24929   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24930             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24931             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24932   match(Set dst (SaturatingSubV src1 src2));
24933   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24934   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24935   ins_encode %{
24936     int vlen_enc = vector_length_encoding(this);
24937     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24938     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24939                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24940   %}
24941   ins_pipe(pipe_slow);
24942 %}
24943 
24944 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24945 %{
24946   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24947             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24948   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24949   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24950   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24951   ins_encode %{
24952     int vlen_enc = vector_length_encoding(this);
24953     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24954     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24955                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24956   %}
24957   ins_pipe(pipe_slow);
24958 %}
24959 
24960 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24961 %{
24962   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24963             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24964   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24965   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24966   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24967   ins_encode %{
24968     int vlen_enc = vector_length_encoding(this);
24969     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24970     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24971                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24972   %}
24973   ins_pipe(pipe_slow);
24974 %}
24975 
24976 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24977   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24978             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24979   match(Set dst (SaturatingAddV (Binary dst src) mask));
24980   match(Set dst (SaturatingSubV (Binary dst src) mask));
24981   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24982   ins_encode %{
24983     int vlen_enc = vector_length_encoding(this);
24984     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24985     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24986                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24987   %}
24988   ins_pipe( pipe_slow );
24989 %}
24990 
24991 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24992   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24993             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24994   match(Set dst (SaturatingAddV (Binary dst src) mask));
24995   match(Set dst (SaturatingSubV (Binary dst src) mask));
24996   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24997   ins_encode %{
24998     int vlen_enc = vector_length_encoding(this);
24999     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25000     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25001                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25002   %}
25003   ins_pipe( pipe_slow );
25004 %}
25005 
25006 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25007   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25008             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25009   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25010   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25011   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25012   ins_encode %{
25013     int vlen_enc = vector_length_encoding(this);
25014     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25015     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25016                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25017   %}
25018   ins_pipe( pipe_slow );
25019 %}
25020 
25021 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25022   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25023             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25024   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25025   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25026   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25027   ins_encode %{
25028     int vlen_enc = vector_length_encoding(this);
25029     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25030     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25031                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25032   %}
25033   ins_pipe( pipe_slow );
25034 %}
25035 
25036 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25037 %{
25038   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25039   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25040   ins_encode %{
25041     int vlen_enc = vector_length_encoding(this);
25042     BasicType bt = Matcher::vector_element_basic_type(this);
25043     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25044   %}
25045   ins_pipe(pipe_slow);
25046 %}
25047 
25048 instruct reinterpretS2HF(regF dst, rRegI src)
25049 %{
25050   match(Set dst (ReinterpretS2HF src));
25051   format %{ "vmovw $dst, $src" %}
25052   ins_encode %{
25053     __ vmovw($dst$$XMMRegister, $src$$Register);
25054   %}
25055   ins_pipe(pipe_slow);
25056 %}
25057 
25058 instruct reinterpretHF2S(rRegI dst, regF src)
25059 %{
25060   match(Set dst (ReinterpretHF2S src));
25061   format %{ "vmovw $dst, $src" %}
25062   ins_encode %{
25063     __ vmovw($dst$$Register, $src$$XMMRegister);
25064   %}
25065   ins_pipe(pipe_slow);
25066 %}
25067 
25068 instruct convF2HFAndS2HF(regF dst, regF src)
25069 %{
25070   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25071   format %{ "convF2HFAndS2HF $dst, $src" %}
25072   ins_encode %{
25073     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25074   %}
25075   ins_pipe(pipe_slow);
25076 %}
25077 
25078 instruct convHF2SAndHF2F(regF dst, regF src)
25079 %{
25080   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25081   format %{ "convHF2SAndHF2F $dst, $src" %}
25082   ins_encode %{
25083     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25084   %}
25085   ins_pipe(pipe_slow);
25086 %}
25087 
25088 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25089 %{
25090   match(Set dst (SqrtHF src));
25091   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25092   ins_encode %{
25093     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25094   %}
25095   ins_pipe(pipe_slow);
25096 %}
25097 
25098 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25099 %{
25100   match(Set dst (AddHF src1 src2));
25101   match(Set dst (DivHF src1 src2));
25102   match(Set dst (MulHF src1 src2));
25103   match(Set dst (SubHF src1 src2));
25104   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25105   ins_encode %{
25106     int opcode = this->ideal_Opcode();
25107     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25108   %}
25109   ins_pipe(pipe_slow);
25110 %}
25111 
25112 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25113 %{
25114   predicate(VM_Version::supports_avx10_2());
25115   match(Set dst (MaxHF src1 src2));
25116   match(Set dst (MinHF src1 src2));
25117   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25118   ins_encode %{
25119     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25120     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25121   %}
25122   ins_pipe( pipe_slow );
25123 %}
25124 
25125 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25126 %{
25127   predicate(!VM_Version::supports_avx10_2());
25128   match(Set dst (MaxHF src1 src2));
25129   match(Set dst (MinHF src1 src2));
25130   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25131   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25132   ins_encode %{
25133     int opcode = this->ideal_Opcode();
25134     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25135                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25136   %}
25137   ins_pipe( pipe_slow );
25138 %}
25139 
25140 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25141 %{
25142   match(Set dst (FmaHF  src2 (Binary dst src1)));
25143   effect(DEF dst);
25144   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25145   ins_encode %{
25146     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25147   %}
25148   ins_pipe( pipe_slow );
25149 %}
25150 
25151 
25152 instruct vector_sqrt_HF_reg(vec dst, vec src)
25153 %{
25154   match(Set dst (SqrtVHF src));
25155   format %{ "vector_sqrt_fp16 $dst, $src" %}
25156   ins_encode %{
25157     int vlen_enc = vector_length_encoding(this);
25158     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25159   %}
25160   ins_pipe(pipe_slow);
25161 %}
25162 
25163 instruct vector_sqrt_HF_mem(vec dst, memory src)
25164 %{
25165   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25166   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25167   ins_encode %{
25168     int vlen_enc = vector_length_encoding(this);
25169     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25170   %}
25171   ins_pipe(pipe_slow);
25172 %}
25173 
25174 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25175 %{
25176   match(Set dst (AddVHF src1 src2));
25177   match(Set dst (DivVHF src1 src2));
25178   match(Set dst (MulVHF src1 src2));
25179   match(Set dst (SubVHF src1 src2));
25180   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25181   ins_encode %{
25182     int vlen_enc = vector_length_encoding(this);
25183     int opcode = this->ideal_Opcode();
25184     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25185   %}
25186   ins_pipe(pipe_slow);
25187 %}
25188 
25189 
25190 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25191 %{
25192   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25193   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25194   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25195   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25196   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25197   ins_encode %{
25198     int vlen_enc = vector_length_encoding(this);
25199     int opcode = this->ideal_Opcode();
25200     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25201   %}
25202   ins_pipe(pipe_slow);
25203 %}
25204 
25205 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25206 %{
25207   match(Set dst (FmaVHF src2 (Binary dst src1)));
25208   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25209   ins_encode %{
25210     int vlen_enc = vector_length_encoding(this);
25211     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25212   %}
25213   ins_pipe( pipe_slow );
25214 %}
25215 
25216 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25217 %{
25218   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25219   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25220   ins_encode %{
25221     int vlen_enc = vector_length_encoding(this);
25222     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25223   %}
25224   ins_pipe( pipe_slow );
25225 %}
25226 
25227 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25228 %{
25229   predicate(VM_Version::supports_avx10_2());
25230   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25231   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25232   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25233   ins_encode %{
25234     int vlen_enc = vector_length_encoding(this);
25235     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25236     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25237   %}
25238   ins_pipe( pipe_slow );
25239 %}
25240 
25241 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25242 %{
25243   predicate(VM_Version::supports_avx10_2());
25244   match(Set dst (MinVHF src1 src2));
25245   match(Set dst (MaxVHF src1 src2));
25246   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25247   ins_encode %{
25248     int vlen_enc = vector_length_encoding(this);
25249     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25250     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25251   %}
25252   ins_pipe( pipe_slow );
25253 %}
25254 
25255 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25256 %{
25257   predicate(!VM_Version::supports_avx10_2());
25258   match(Set dst (MinVHF src1 src2));
25259   match(Set dst (MaxVHF src1 src2));
25260   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25261   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25262   ins_encode %{
25263     int vlen_enc = vector_length_encoding(this);
25264     int opcode = this->ideal_Opcode();
25265     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25266                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25267   %}
25268   ins_pipe( pipe_slow );
25269 %}
25270 
25271 //----------PEEPHOLE RULES-----------------------------------------------------
25272 // These must follow all instruction definitions as they use the names
25273 // defined in the instructions definitions.
25274 //
25275 // peeppredicate ( rule_predicate );
25276 // // the predicate unless which the peephole rule will be ignored
25277 //
25278 // peepmatch ( root_instr_name [preceding_instruction]* );
25279 //
25280 // peepprocedure ( procedure_name );
25281 // // provide a procedure name to perform the optimization, the procedure should
25282 // // reside in the architecture dependent peephole file, the method has the
25283 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25284 // // with the arguments being the basic block, the current node index inside the
25285 // // block, the register allocator, the functions upon invoked return a new node
25286 // // defined in peepreplace, and the rules of the nodes appearing in the
25287 // // corresponding peepmatch, the function return true if successful, else
25288 // // return false
25289 //
25290 // peepconstraint %{
25291 // (instruction_number.operand_name relational_op instruction_number.operand_name
25292 //  [, ...] );
25293 // // instruction numbers are zero-based using left to right order in peepmatch
25294 //
25295 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25296 // // provide an instruction_number.operand_name for each operand that appears
25297 // // in the replacement instruction's match rule
25298 //
25299 // ---------VM FLAGS---------------------------------------------------------
25300 //
25301 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25302 //
25303 // Each peephole rule is given an identifying number starting with zero and
25304 // increasing by one in the order seen by the parser.  An individual peephole
25305 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25306 // on the command-line.
25307 //
25308 // ---------CURRENT LIMITATIONS----------------------------------------------
25309 //
25310 // Only transformations inside a basic block (do we need more for peephole)
25311 //
25312 // ---------EXAMPLE----------------------------------------------------------
25313 //
25314 // // pertinent parts of existing instructions in architecture description
25315 // instruct movI(rRegI dst, rRegI src)
25316 // %{
25317 //   match(Set dst (CopyI src));
25318 // %}
25319 //
25320 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25321 // %{
25322 //   match(Set dst (AddI dst src));
25323 //   effect(KILL cr);
25324 // %}
25325 //
25326 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25327 // %{
25328 //   match(Set dst (AddI dst src));
25329 // %}
25330 //
25331 // 1. Simple replacement
25332 // - Only match adjacent instructions in same basic block
25333 // - Only equality constraints
25334 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25335 // - Only one replacement instruction
25336 //
25337 // // Change (inc mov) to lea
25338 // peephole %{
25339 //   // lea should only be emitted when beneficial
25340 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25341 //   // increment preceded by register-register move
25342 //   peepmatch ( incI_rReg movI );
25343 //   // require that the destination register of the increment
25344 //   // match the destination register of the move
25345 //   peepconstraint ( 0.dst == 1.dst );
25346 //   // construct a replacement instruction that sets
25347 //   // the destination to ( move's source register + one )
25348 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25349 // %}
25350 //
25351 // 2. Procedural replacement
25352 // - More flexible finding relevent nodes
25353 // - More flexible constraints
25354 // - More flexible transformations
25355 // - May utilise architecture-dependent API more effectively
25356 // - Currently only one replacement instruction due to adlc parsing capabilities
25357 //
25358 // // Change (inc mov) to lea
25359 // peephole %{
25360 //   // lea should only be emitted when beneficial
25361 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25362 //   // the rule numbers of these nodes inside are passed into the function below
25363 //   peepmatch ( incI_rReg movI );
25364 //   // the method that takes the responsibility of transformation
25365 //   peepprocedure ( inc_mov_to_lea );
25366 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25367 //   // node is passed into the function above
25368 //   peepreplace ( leaI_rReg_immI() );
25369 // %}
25370 
25371 // These instructions is not matched by the matcher but used by the peephole
25372 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25373 %{
25374   predicate(false);
25375   match(Set dst (AddI src1 src2));
25376   format %{ "leal    $dst, [$src1 + $src2]" %}
25377   ins_encode %{
25378     Register dst = $dst$$Register;
25379     Register src1 = $src1$$Register;
25380     Register src2 = $src2$$Register;
25381     if (src1 != rbp && src1 != r13) {
25382       __ leal(dst, Address(src1, src2, Address::times_1));
25383     } else {
25384       assert(src2 != rbp && src2 != r13, "");
25385       __ leal(dst, Address(src2, src1, Address::times_1));
25386     }
25387   %}
25388   ins_pipe(ialu_reg_reg);
25389 %}
25390 
25391 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25392 %{
25393   predicate(false);
25394   match(Set dst (AddI src1 src2));
25395   format %{ "leal    $dst, [$src1 + $src2]" %}
25396   ins_encode %{
25397     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25398   %}
25399   ins_pipe(ialu_reg_reg);
25400 %}
25401 
25402 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25403 %{
25404   predicate(false);
25405   match(Set dst (LShiftI src shift));
25406   format %{ "leal    $dst, [$src << $shift]" %}
25407   ins_encode %{
25408     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25409     Register src = $src$$Register;
25410     if (scale == Address::times_2 && src != rbp && src != r13) {
25411       __ leal($dst$$Register, Address(src, src, Address::times_1));
25412     } else {
25413       __ leal($dst$$Register, Address(noreg, src, scale));
25414     }
25415   %}
25416   ins_pipe(ialu_reg_reg);
25417 %}
25418 
25419 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25420 %{
25421   predicate(false);
25422   match(Set dst (AddL src1 src2));
25423   format %{ "leaq    $dst, [$src1 + $src2]" %}
25424   ins_encode %{
25425     Register dst = $dst$$Register;
25426     Register src1 = $src1$$Register;
25427     Register src2 = $src2$$Register;
25428     if (src1 != rbp && src1 != r13) {
25429       __ leaq(dst, Address(src1, src2, Address::times_1));
25430     } else {
25431       assert(src2 != rbp && src2 != r13, "");
25432       __ leaq(dst, Address(src2, src1, Address::times_1));
25433     }
25434   %}
25435   ins_pipe(ialu_reg_reg);
25436 %}
25437 
25438 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25439 %{
25440   predicate(false);
25441   match(Set dst (AddL src1 src2));
25442   format %{ "leaq    $dst, [$src1 + $src2]" %}
25443   ins_encode %{
25444     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25445   %}
25446   ins_pipe(ialu_reg_reg);
25447 %}
25448 
25449 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25450 %{
25451   predicate(false);
25452   match(Set dst (LShiftL src shift));
25453   format %{ "leaq    $dst, [$src << $shift]" %}
25454   ins_encode %{
25455     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25456     Register src = $src$$Register;
25457     if (scale == Address::times_2 && src != rbp && src != r13) {
25458       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25459     } else {
25460       __ leaq($dst$$Register, Address(noreg, src, scale));
25461     }
25462   %}
25463   ins_pipe(ialu_reg_reg);
25464 %}
25465 
25466 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25467 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25468 // processors with at least partial ALU support for lea
25469 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25470 // beneficial for processors with full ALU support
25471 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25472 
25473 peephole
25474 %{
25475   peeppredicate(VM_Version::supports_fast_2op_lea());
25476   peepmatch (addI_rReg);
25477   peepprocedure (lea_coalesce_reg);
25478   peepreplace (leaI_rReg_rReg_peep());
25479 %}
25480 
25481 peephole
25482 %{
25483   peeppredicate(VM_Version::supports_fast_2op_lea());
25484   peepmatch (addI_rReg_imm);
25485   peepprocedure (lea_coalesce_imm);
25486   peepreplace (leaI_rReg_immI_peep());
25487 %}
25488 
25489 peephole
25490 %{
25491   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25492                 VM_Version::is_intel_cascade_lake());
25493   peepmatch (incI_rReg);
25494   peepprocedure (lea_coalesce_imm);
25495   peepreplace (leaI_rReg_immI_peep());
25496 %}
25497 
25498 peephole
25499 %{
25500   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25501                 VM_Version::is_intel_cascade_lake());
25502   peepmatch (decI_rReg);
25503   peepprocedure (lea_coalesce_imm);
25504   peepreplace (leaI_rReg_immI_peep());
25505 %}
25506 
25507 peephole
25508 %{
25509   peeppredicate(VM_Version::supports_fast_2op_lea());
25510   peepmatch (salI_rReg_immI2);
25511   peepprocedure (lea_coalesce_imm);
25512   peepreplace (leaI_rReg_immI2_peep());
25513 %}
25514 
25515 peephole
25516 %{
25517   peeppredicate(VM_Version::supports_fast_2op_lea());
25518   peepmatch (addL_rReg);
25519   peepprocedure (lea_coalesce_reg);
25520   peepreplace (leaL_rReg_rReg_peep());
25521 %}
25522 
25523 peephole
25524 %{
25525   peeppredicate(VM_Version::supports_fast_2op_lea());
25526   peepmatch (addL_rReg_imm);
25527   peepprocedure (lea_coalesce_imm);
25528   peepreplace (leaL_rReg_immL32_peep());
25529 %}
25530 
25531 peephole
25532 %{
25533   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25534                 VM_Version::is_intel_cascade_lake());
25535   peepmatch (incL_rReg);
25536   peepprocedure (lea_coalesce_imm);
25537   peepreplace (leaL_rReg_immL32_peep());
25538 %}
25539 
25540 peephole
25541 %{
25542   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25543                 VM_Version::is_intel_cascade_lake());
25544   peepmatch (decL_rReg);
25545   peepprocedure (lea_coalesce_imm);
25546   peepreplace (leaL_rReg_immL32_peep());
25547 %}
25548 
25549 peephole
25550 %{
25551   peeppredicate(VM_Version::supports_fast_2op_lea());
25552   peepmatch (salL_rReg_immI2);
25553   peepprocedure (lea_coalesce_imm);
25554   peepreplace (leaL_rReg_immI2_peep());
25555 %}
25556 
25557 peephole
25558 %{
25559   peepmatch (leaPCompressedOopOffset);
25560   peepprocedure (lea_remove_redundant);
25561 %}
25562 
25563 peephole
25564 %{
25565   peepmatch (leaP8Narrow);
25566   peepprocedure (lea_remove_redundant);
25567 %}
25568 
25569 peephole
25570 %{
25571   peepmatch (leaP32Narrow);
25572   peepprocedure (lea_remove_redundant);
25573 %}
25574 
25575 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25576 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25577 
25578 //int variant
25579 peephole
25580 %{
25581   peepmatch (testI_reg);
25582   peepprocedure (test_may_remove);
25583 %}
25584 
25585 //long variant
25586 peephole
25587 %{
25588   peepmatch (testL_reg);
25589   peepprocedure (test_may_remove);
25590 %}
25591 
25592 
25593 //----------SMARTSPILL RULES---------------------------------------------------
25594 // These must follow all instruction definitions as they use the names
25595 // defined in the instructions definitions.