1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2637   assert(EnableVectorSupport, "sanity");
 2638   int lo = XMM0_num;
 2639   int hi = XMM0b_num;
 2640   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2641   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2642   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2643   return OptoRegPair(hi, lo);
 2644 }
 2645 
 2646 // Is this branch offset short enough that a short branch can be used?
 2647 //
 2648 // NOTE: If the platform does not provide any short branch variants, then
 2649 //       this method should return false for offset 0.
 2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2651   // The passed offset is relative to address of the branch.
 2652   // On 86 a branch displacement is calculated relative to address
 2653   // of a next instruction.
 2654   offset -= br_size;
 2655 
 2656   // the short version of jmpConUCF2 contains multiple branches,
 2657   // making the reach slightly less
 2658   if (rule == jmpConUCF2_rule)
 2659     return (-126 <= offset && offset <= 125);
 2660   return (-128 <= offset && offset <= 127);
 2661 }
 2662 
 2663 // Return whether or not this register is ever used as an argument.
 2664 // This function is used on startup to build the trampoline stubs in
 2665 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2666 // call in the trampoline, and arguments in those registers not be
 2667 // available to the callee.
 2668 bool Matcher::can_be_java_arg(int reg)
 2669 {
 2670   return
 2671     reg ==  RDI_num || reg == RDI_H_num ||
 2672     reg ==  RSI_num || reg == RSI_H_num ||
 2673     reg ==  RDX_num || reg == RDX_H_num ||
 2674     reg ==  RCX_num || reg == RCX_H_num ||
 2675     reg ==   R8_num || reg ==  R8_H_num ||
 2676     reg ==   R9_num || reg ==  R9_H_num ||
 2677     reg ==  R12_num || reg == R12_H_num ||
 2678     reg == XMM0_num || reg == XMM0b_num ||
 2679     reg == XMM1_num || reg == XMM1b_num ||
 2680     reg == XMM2_num || reg == XMM2b_num ||
 2681     reg == XMM3_num || reg == XMM3b_num ||
 2682     reg == XMM4_num || reg == XMM4b_num ||
 2683     reg == XMM5_num || reg == XMM5b_num ||
 2684     reg == XMM6_num || reg == XMM6b_num ||
 2685     reg == XMM7_num || reg == XMM7b_num;
 2686 }
 2687 
 2688 bool Matcher::is_spillable_arg(int reg)
 2689 {
 2690   return can_be_java_arg(reg);
 2691 }
 2692 
 2693 uint Matcher::int_pressure_limit()
 2694 {
 2695   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2696 }
 2697 
 2698 uint Matcher::float_pressure_limit()
 2699 {
 2700   // After experiment around with different values, the following default threshold
 2701   // works best for LCM's register pressure scheduling on x64.
 2702   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2703   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2704   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2705 }
 2706 
 2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2708   // In 64 bit mode a code which use multiply when
 2709   // devisor is constant is faster than hardware
 2710   // DIV instruction (it uses MulHiL).
 2711   return false;
 2712 }
 2713 
 2714 // Register for DIVI projection of divmodI
 2715 const RegMask& Matcher::divI_proj_mask() {
 2716   return INT_RAX_REG_mask();
 2717 }
 2718 
 2719 // Register for MODI projection of divmodI
 2720 const RegMask& Matcher::modI_proj_mask() {
 2721   return INT_RDX_REG_mask();
 2722 }
 2723 
 2724 // Register for DIVL projection of divmodL
 2725 const RegMask& Matcher::divL_proj_mask() {
 2726   return LONG_RAX_REG_mask();
 2727 }
 2728 
 2729 // Register for MODL projection of divmodL
 2730 const RegMask& Matcher::modL_proj_mask() {
 2731   return LONG_RDX_REG_mask();
 2732 }
 2733 
 2734 %}
 2735 
 2736 source_hpp %{
 2737 // Header information of the source block.
 2738 // Method declarations/definitions which are used outside
 2739 // the ad-scope can conveniently be defined here.
 2740 //
 2741 // To keep related declarations/definitions/uses close together,
 2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2743 
 2744 #include "runtime/vm_version.hpp"
 2745 
 2746 class NativeJump;
 2747 
 2748 class CallStubImpl {
 2749 
 2750   //--------------------------------------------------------------
 2751   //---<  Used for optimization in Compile::shorten_branches  >---
 2752   //--------------------------------------------------------------
 2753 
 2754  public:
 2755   // Size of call trampoline stub.
 2756   static uint size_call_trampoline() {
 2757     return 0; // no call trampolines on this platform
 2758   }
 2759 
 2760   // number of relocations needed by a call trampoline stub
 2761   static uint reloc_call_trampoline() {
 2762     return 0; // no call trampolines on this platform
 2763   }
 2764 };
 2765 
 2766 class HandlerImpl {
 2767 
 2768  public:
 2769 
 2770   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2771 
 2772   static uint size_deopt_handler() {
 2773     // one call and one jmp.
 2774     return 7;
 2775   }
 2776 };
 2777 
 2778 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2779   switch(bytes) {
 2780     case  4: // fall-through
 2781     case  8: // fall-through
 2782     case 16: return Assembler::AVX_128bit;
 2783     case 32: return Assembler::AVX_256bit;
 2784     case 64: return Assembler::AVX_512bit;
 2785 
 2786     default: {
 2787       ShouldNotReachHere();
 2788       return Assembler::AVX_NoVec;
 2789     }
 2790   }
 2791 }
 2792 
 2793 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2794   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2795 }
 2796 
 2797 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2798   uint def_idx = use->operand_index(opnd);
 2799   Node* def = use->in(def_idx);
 2800   return vector_length_encoding(def);
 2801 }
 2802 
 2803 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2804   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2805          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2806 }
 2807 
 2808 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2809   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2810            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2811 }
 2812 
 2813 class Node::PD {
 2814 public:
 2815   enum NodeFlags {
 2816     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2817     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2818     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2819     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2820     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2821     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2822     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2823     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2824     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2825     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2826     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2827     _last_flag                = Flag_clears_sign_flag
 2828   };
 2829 };
 2830 
 2831 %} // end source_hpp
 2832 
 2833 source %{
 2834 
 2835 #include "opto/addnode.hpp"
 2836 #include "c2_intelJccErratum_x86.hpp"
 2837 
 2838 void PhaseOutput::pd_perform_mach_node_analysis() {
 2839   if (VM_Version::has_intel_jcc_erratum()) {
 2840     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2841     _buf_sizes._code += extra_padding;
 2842   }
 2843 }
 2844 
 2845 int MachNode::pd_alignment_required() const {
 2846   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2847     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2848     return IntelJccErratum::largest_jcc_size() + 1;
 2849   } else {
 2850     return 1;
 2851   }
 2852 }
 2853 
 2854 int MachNode::compute_padding(int current_offset) const {
 2855   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2856     Compile* C = Compile::current();
 2857     PhaseOutput* output = C->output();
 2858     Block* block = output->block();
 2859     int index = output->index();
 2860     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2861   } else {
 2862     return 0;
 2863   }
 2864 }
 2865 
 2866 // Emit deopt handler code.
 2867 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2868 
 2869   // Note that the code buffer's insts_mark is always relative to insts.
 2870   // That's why we must use the macroassembler to generate a handler.
 2871   address base = __ start_a_stub(size_deopt_handler());
 2872   if (base == nullptr) {
 2873     ciEnv::current()->record_failure("CodeCache is full");
 2874     return 0;  // CodeBuffer::expand failed
 2875   }
 2876   int offset = __ offset();
 2877 
 2878   Label start;
 2879   __ bind(start);
 2880 
 2881   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2882 
 2883   int entry_offset = __ offset();
 2884 
 2885   __ jmp(start);
 2886 
 2887   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2888   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2889          "out of bounds read in post-call NOP check");
 2890   __ end_a_stub();
 2891   return entry_offset;
 2892 }
 2893 
 2894 static Assembler::Width widthForType(BasicType bt) {
 2895   if (bt == T_BYTE) {
 2896     return Assembler::B;
 2897   } else if (bt == T_SHORT) {
 2898     return Assembler::W;
 2899   } else if (bt == T_INT) {
 2900     return Assembler::D;
 2901   } else {
 2902     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2903     return Assembler::Q;
 2904   }
 2905 }
 2906 
 2907 //=============================================================================
 2908 
 2909   // Float masks come from different places depending on platform.
 2910   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2911   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2912   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2913   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2914   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2915   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2916   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2917   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2918   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2919   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2920   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2921   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2922   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2923   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2924   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2925   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2926   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2927   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2928   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2929 
 2930 //=============================================================================
 2931 bool Matcher::match_rule_supported(int opcode) {
 2932   if (!has_match_rule(opcode)) {
 2933     return false; // no match rule present
 2934   }
 2935   switch (opcode) {
 2936     case Op_AbsVL:
 2937     case Op_StoreVectorScatter:
 2938       if (UseAVX < 3) {
 2939         return false;
 2940       }
 2941       break;
 2942     case Op_PopCountI:
 2943     case Op_PopCountL:
 2944       if (!UsePopCountInstruction) {
 2945         return false;
 2946       }
 2947       break;
 2948     case Op_PopCountVI:
 2949       if (UseAVX < 2) {
 2950         return false;
 2951       }
 2952       break;
 2953     case Op_CompressV:
 2954     case Op_ExpandV:
 2955     case Op_PopCountVL:
 2956       if (UseAVX < 2) {
 2957         return false;
 2958       }
 2959       break;
 2960     case Op_MulVI:
 2961       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 2962         return false;
 2963       }
 2964       break;
 2965     case Op_MulVL:
 2966       if (UseSSE < 4) { // only with SSE4_1 or AVX
 2967         return false;
 2968       }
 2969       break;
 2970     case Op_MulReductionVL:
 2971       if (VM_Version::supports_avx512dq() == false) {
 2972         return false;
 2973       }
 2974       break;
 2975     case Op_AbsVB:
 2976     case Op_AbsVS:
 2977     case Op_AbsVI:
 2978     case Op_AddReductionVI:
 2979     case Op_AndReductionV:
 2980     case Op_OrReductionV:
 2981     case Op_XorReductionV:
 2982       if (UseSSE < 3) { // requires at least SSSE3
 2983         return false;
 2984       }
 2985       break;
 2986     case Op_MaxHF:
 2987     case Op_MinHF:
 2988       if (!VM_Version::supports_avx512vlbw()) {
 2989         return false;
 2990       }  // fallthrough
 2991     case Op_AddHF:
 2992     case Op_DivHF:
 2993     case Op_FmaHF:
 2994     case Op_MulHF:
 2995     case Op_ReinterpretS2HF:
 2996     case Op_ReinterpretHF2S:
 2997     case Op_SubHF:
 2998     case Op_SqrtHF:
 2999       if (!VM_Version::supports_avx512_fp16()) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_VectorLoadShuffle:
 3004     case Op_VectorRearrange:
 3005     case Op_MulReductionVI:
 3006       if (UseSSE < 4) { // requires at least SSE4
 3007         return false;
 3008       }
 3009       break;
 3010     case Op_IsInfiniteF:
 3011     case Op_IsInfiniteD:
 3012       if (!VM_Version::supports_avx512dq()) {
 3013         return false;
 3014       }
 3015       break;
 3016     case Op_SqrtVD:
 3017     case Op_SqrtVF:
 3018     case Op_VectorMaskCmp:
 3019     case Op_VectorCastB2X:
 3020     case Op_VectorCastS2X:
 3021     case Op_VectorCastI2X:
 3022     case Op_VectorCastL2X:
 3023     case Op_VectorCastF2X:
 3024     case Op_VectorCastD2X:
 3025     case Op_VectorUCastB2X:
 3026     case Op_VectorUCastS2X:
 3027     case Op_VectorUCastI2X:
 3028     case Op_VectorMaskCast:
 3029       if (UseAVX < 1) { // enabled for AVX only
 3030         return false;
 3031       }
 3032       break;
 3033     case Op_PopulateIndex:
 3034       if (UseAVX < 2) {
 3035         return false;
 3036       }
 3037       break;
 3038     case Op_RoundVF:
 3039       if (UseAVX < 2) { // enabled for AVX2 only
 3040         return false;
 3041       }
 3042       break;
 3043     case Op_RoundVD:
 3044       if (UseAVX < 3) {
 3045         return false;  // enabled for AVX3 only
 3046       }
 3047       break;
 3048     case Op_CompareAndSwapL:
 3049     case Op_CompareAndSwapP:
 3050       break;
 3051     case Op_StrIndexOf:
 3052       if (!UseSSE42Intrinsics) {
 3053         return false;
 3054       }
 3055       break;
 3056     case Op_StrIndexOfChar:
 3057       if (!UseSSE42Intrinsics) {
 3058         return false;
 3059       }
 3060       break;
 3061     case Op_OnSpinWait:
 3062       if (VM_Version::supports_on_spin_wait() == false) {
 3063         return false;
 3064       }
 3065       break;
 3066     case Op_MulVB:
 3067     case Op_LShiftVB:
 3068     case Op_RShiftVB:
 3069     case Op_URShiftVB:
 3070     case Op_VectorInsert:
 3071     case Op_VectorLoadMask:
 3072     case Op_VectorStoreMask:
 3073     case Op_VectorBlend:
 3074       if (UseSSE < 4) {
 3075         return false;
 3076       }
 3077       break;
 3078     case Op_MaxD:
 3079     case Op_MaxF:
 3080     case Op_MinD:
 3081     case Op_MinF:
 3082       if (UseAVX < 1) { // enabled for AVX only
 3083         return false;
 3084       }
 3085       break;
 3086     case Op_CacheWB:
 3087     case Op_CacheWBPreSync:
 3088     case Op_CacheWBPostSync:
 3089       if (!VM_Version::supports_data_cache_line_flush()) {
 3090         return false;
 3091       }
 3092       break;
 3093     case Op_ExtractB:
 3094     case Op_ExtractL:
 3095     case Op_ExtractI:
 3096     case Op_RoundDoubleMode:
 3097       if (UseSSE < 4) {
 3098         return false;
 3099       }
 3100       break;
 3101     case Op_RoundDoubleModeV:
 3102       if (VM_Version::supports_avx() == false) {
 3103         return false; // 128bit vroundpd is not available
 3104       }
 3105       break;
 3106     case Op_LoadVectorGather:
 3107     case Op_LoadVectorGatherMasked:
 3108       if (UseAVX < 2) {
 3109         return false;
 3110       }
 3111       break;
 3112     case Op_FmaF:
 3113     case Op_FmaD:
 3114     case Op_FmaVD:
 3115     case Op_FmaVF:
 3116       if (!UseFMA) {
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_MacroLogicV:
 3121       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3122         return false;
 3123       }
 3124       break;
 3125 
 3126     case Op_VectorCmpMasked:
 3127     case Op_VectorMaskGen:
 3128       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3129         return false;
 3130       }
 3131       break;
 3132     case Op_VectorMaskFirstTrue:
 3133     case Op_VectorMaskLastTrue:
 3134     case Op_VectorMaskTrueCount:
 3135     case Op_VectorMaskToLong:
 3136       if (UseAVX < 1) {
 3137          return false;
 3138       }
 3139       break;
 3140     case Op_RoundF:
 3141     case Op_RoundD:
 3142       break;
 3143     case Op_CopySignD:
 3144     case Op_CopySignF:
 3145       if (UseAVX < 3)  {
 3146         return false;
 3147       }
 3148       if (!VM_Version::supports_avx512vl()) {
 3149         return false;
 3150       }
 3151       break;
 3152     case Op_CompressBits:
 3153     case Op_ExpandBits:
 3154       if (!VM_Version::supports_bmi2()) {
 3155         return false;
 3156       }
 3157       break;
 3158     case Op_CompressM:
 3159       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3160         return false;
 3161       }
 3162       break;
 3163     case Op_ConvF2HF:
 3164     case Op_ConvHF2F:
 3165       if (!VM_Version::supports_float16()) {
 3166         return false;
 3167       }
 3168       break;
 3169     case Op_VectorCastF2HF:
 3170     case Op_VectorCastHF2F:
 3171       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3172         return false;
 3173       }
 3174       break;
 3175   }
 3176   return true;  // Match rules are supported by default.
 3177 }
 3178 
 3179 //------------------------------------------------------------------------
 3180 
 3181 static inline bool is_pop_count_instr_target(BasicType bt) {
 3182   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3183          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3184 }
 3185 
 3186 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3187   return match_rule_supported_vector(opcode, vlen, bt);
 3188 }
 3189 
 3190 // Identify extra cases that we might want to provide match rules for vector nodes and
 3191 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3192 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3193   if (!match_rule_supported(opcode)) {
 3194     return false;
 3195   }
 3196   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3197   //   * SSE2 supports 128bit vectors for all types;
 3198   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3199   //   * AVX2 supports 256bit vectors for all types;
 3200   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3201   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3202   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3203   // And MaxVectorSize is taken into account as well.
 3204   if (!vector_size_supported(bt, vlen)) {
 3205     return false;
 3206   }
 3207   // Special cases which require vector length follow:
 3208   //   * implementation limitations
 3209   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3210   //   * 128bit vroundpd instruction is present only in AVX1
 3211   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3212   switch (opcode) {
 3213     case Op_MaxVHF:
 3214     case Op_MinVHF:
 3215       if (!VM_Version::supports_avx512bw()) {
 3216         return false;
 3217       }
 3218     case Op_AddVHF:
 3219     case Op_DivVHF:
 3220     case Op_FmaVHF:
 3221     case Op_MulVHF:
 3222     case Op_SubVHF:
 3223     case Op_SqrtVHF:
 3224       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3225         return false;
 3226       }
 3227       if (!VM_Version::supports_avx512_fp16()) {
 3228         return false;
 3229       }
 3230       break;
 3231     case Op_AbsVF:
 3232     case Op_NegVF:
 3233       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3234         return false; // 512bit vandps and vxorps are not available
 3235       }
 3236       break;
 3237     case Op_AbsVD:
 3238     case Op_NegVD:
 3239       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3240         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3241       }
 3242       break;
 3243     case Op_RotateRightV:
 3244     case Op_RotateLeftV:
 3245       if (bt != T_INT && bt != T_LONG) {
 3246         return false;
 3247       } // fallthrough
 3248     case Op_MacroLogicV:
 3249       if (!VM_Version::supports_evex() ||
 3250           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3251         return false;
 3252       }
 3253       break;
 3254     case Op_ClearArray:
 3255     case Op_VectorMaskGen:
 3256     case Op_VectorCmpMasked:
 3257       if (!VM_Version::supports_avx512bw()) {
 3258         return false;
 3259       }
 3260       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3261         return false;
 3262       }
 3263       break;
 3264     case Op_LoadVectorMasked:
 3265     case Op_StoreVectorMasked:
 3266       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3267         return false;
 3268       }
 3269       break;
 3270     case Op_UMinV:
 3271     case Op_UMaxV:
 3272       if (UseAVX == 0) {
 3273         return false;
 3274       }
 3275       break;
 3276     case Op_MaxV:
 3277     case Op_MinV:
 3278       if (UseSSE < 4 && is_integral_type(bt)) {
 3279         return false;
 3280       }
 3281       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3282           // Float/Double intrinsics are enabled for AVX family currently.
 3283           if (UseAVX == 0) {
 3284             return false;
 3285           }
 3286           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3287             return false;
 3288           }
 3289       }
 3290       break;
 3291     case Op_CallLeafVector:
 3292       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3293         return false;
 3294       }
 3295       break;
 3296     case Op_AddReductionVI:
 3297       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3298         return false;
 3299       }
 3300       // fallthrough
 3301     case Op_AndReductionV:
 3302     case Op_OrReductionV:
 3303     case Op_XorReductionV:
 3304       if (is_subword_type(bt) && (UseSSE < 4)) {
 3305         return false;
 3306       }
 3307       break;
 3308     case Op_MinReductionV:
 3309     case Op_MaxReductionV:
 3310       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3311         return false;
 3312       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3313         return false;
 3314       }
 3315       // Float/Double intrinsics enabled for AVX family.
 3316       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3317         return false;
 3318       }
 3319       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3320         return false;
 3321       }
 3322       break;
 3323     case Op_VectorTest:
 3324       if (UseSSE < 4) {
 3325         return false; // Implementation limitation
 3326       } else if (size_in_bits < 32) {
 3327         return false; // Implementation limitation
 3328       }
 3329       break;
 3330     case Op_VectorLoadShuffle:
 3331     case Op_VectorRearrange:
 3332       if(vlen == 2) {
 3333         return false; // Implementation limitation due to how shuffle is loaded
 3334       } else if (size_in_bits == 256 && UseAVX < 2) {
 3335         return false; // Implementation limitation
 3336       }
 3337       break;
 3338     case Op_VectorLoadMask:
 3339     case Op_VectorMaskCast:
 3340       if (size_in_bits == 256 && UseAVX < 2) {
 3341         return false; // Implementation limitation
 3342       }
 3343       // fallthrough
 3344     case Op_VectorStoreMask:
 3345       if (vlen == 2) {
 3346         return false; // Implementation limitation
 3347       }
 3348       break;
 3349     case Op_PopulateIndex:
 3350       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3351         return false;
 3352       }
 3353       break;
 3354     case Op_VectorCastB2X:
 3355     case Op_VectorCastS2X:
 3356     case Op_VectorCastI2X:
 3357       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3358         return false;
 3359       }
 3360       break;
 3361     case Op_VectorCastL2X:
 3362       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3363         return false;
 3364       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3365         return false;
 3366       }
 3367       break;
 3368     case Op_VectorCastF2X: {
 3369         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3370         // happen after intermediate conversion to integer and special handling
 3371         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3372         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3373         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3374           return false;
 3375         }
 3376       }
 3377       // fallthrough
 3378     case Op_VectorCastD2X:
 3379       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3380         return false;
 3381       }
 3382       break;
 3383     case Op_VectorCastF2HF:
 3384     case Op_VectorCastHF2F:
 3385       if (!VM_Version::supports_f16c() &&
 3386          ((!VM_Version::supports_evex() ||
 3387          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3388         return false;
 3389       }
 3390       break;
 3391     case Op_RoundVD:
 3392       if (!VM_Version::supports_avx512dq()) {
 3393         return false;
 3394       }
 3395       break;
 3396     case Op_MulReductionVI:
 3397       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_LoadVectorGatherMasked:
 3402       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3403         return false;
 3404       }
 3405       if (is_subword_type(bt) &&
 3406          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3407           (size_in_bits < 64)                                      ||
 3408           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3409         return false;
 3410       }
 3411       break;
 3412     case Op_StoreVectorScatterMasked:
 3413     case Op_StoreVectorScatter:
 3414       if (is_subword_type(bt)) {
 3415         return false;
 3416       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3417         return false;
 3418       }
 3419       // fallthrough
 3420     case Op_LoadVectorGather:
 3421       if (!is_subword_type(bt) && size_in_bits == 64) {
 3422         return false;
 3423       }
 3424       if (is_subword_type(bt) && size_in_bits < 64) {
 3425         return false;
 3426       }
 3427       break;
 3428     case Op_SaturatingAddV:
 3429     case Op_SaturatingSubV:
 3430       if (UseAVX < 1) {
 3431         return false; // Implementation limitation
 3432       }
 3433       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3434         return false;
 3435       }
 3436       break;
 3437     case Op_SelectFromTwoVector:
 3438        if (size_in_bits < 128) {
 3439          return false;
 3440        }
 3441        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3442          return false;
 3443        }
 3444        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3445          return false;
 3446        }
 3447        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3448          return false;
 3449        }
 3450        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3451          return false;
 3452        }
 3453        break;
 3454     case Op_MaskAll:
 3455       if (!VM_Version::supports_evex()) {
 3456         return false;
 3457       }
 3458       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3459         return false;
 3460       }
 3461       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3462         return false;
 3463       }
 3464       break;
 3465     case Op_VectorMaskCmp:
 3466       if (vlen < 2 || size_in_bits < 32) {
 3467         return false;
 3468       }
 3469       break;
 3470     case Op_CompressM:
 3471       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3472         return false;
 3473       }
 3474       break;
 3475     case Op_CompressV:
 3476     case Op_ExpandV:
 3477       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3478         return false;
 3479       }
 3480       if (size_in_bits < 128 ) {
 3481         return false;
 3482       }
 3483     case Op_VectorLongToMask:
 3484       if (UseAVX < 1) {
 3485         return false;
 3486       }
 3487       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3488         return false;
 3489       }
 3490       break;
 3491     case Op_SignumVD:
 3492     case Op_SignumVF:
 3493       if (UseAVX < 1) {
 3494         return false;
 3495       }
 3496       break;
 3497     case Op_PopCountVI:
 3498     case Op_PopCountVL: {
 3499         if (!is_pop_count_instr_target(bt) &&
 3500             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3501           return false;
 3502         }
 3503       }
 3504       break;
 3505     case Op_ReverseV:
 3506     case Op_ReverseBytesV:
 3507       if (UseAVX < 2) {
 3508         return false;
 3509       }
 3510       break;
 3511     case Op_CountTrailingZerosV:
 3512     case Op_CountLeadingZerosV:
 3513       if (UseAVX < 2) {
 3514         return false;
 3515       }
 3516       break;
 3517   }
 3518   return true;  // Per default match rules are supported.
 3519 }
 3520 
 3521 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3522   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3523   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3524   // of their non-masked counterpart with mask edge being the differentiator.
 3525   // This routine does a strict check on the existence of masked operation patterns
 3526   // by returning a default false value for all the other opcodes apart from the
 3527   // ones whose masked instruction patterns are defined in this file.
 3528   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3529     return false;
 3530   }
 3531 
 3532   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3533   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3534     return false;
 3535   }
 3536   switch(opcode) {
 3537     // Unary masked operations
 3538     case Op_AbsVB:
 3539     case Op_AbsVS:
 3540       if(!VM_Version::supports_avx512bw()) {
 3541         return false;  // Implementation limitation
 3542       }
 3543     case Op_AbsVI:
 3544     case Op_AbsVL:
 3545       return true;
 3546 
 3547     // Ternary masked operations
 3548     case Op_FmaVF:
 3549     case Op_FmaVD:
 3550       return true;
 3551 
 3552     case Op_MacroLogicV:
 3553       if(bt != T_INT && bt != T_LONG) {
 3554         return false;
 3555       }
 3556       return true;
 3557 
 3558     // Binary masked operations
 3559     case Op_AddVB:
 3560     case Op_AddVS:
 3561     case Op_SubVB:
 3562     case Op_SubVS:
 3563     case Op_MulVS:
 3564     case Op_LShiftVS:
 3565     case Op_RShiftVS:
 3566     case Op_URShiftVS:
 3567       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3568       if (!VM_Version::supports_avx512bw()) {
 3569         return false;  // Implementation limitation
 3570       }
 3571       return true;
 3572 
 3573     case Op_MulVL:
 3574       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3575       if (!VM_Version::supports_avx512dq()) {
 3576         return false;  // Implementation limitation
 3577       }
 3578       return true;
 3579 
 3580     case Op_AndV:
 3581     case Op_OrV:
 3582     case Op_XorV:
 3583     case Op_RotateRightV:
 3584     case Op_RotateLeftV:
 3585       if (bt != T_INT && bt != T_LONG) {
 3586         return false; // Implementation limitation
 3587       }
 3588       return true;
 3589 
 3590     case Op_VectorLoadMask:
 3591       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3592       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3593         return false;
 3594       }
 3595       return true;
 3596 
 3597     case Op_AddVI:
 3598     case Op_AddVL:
 3599     case Op_AddVF:
 3600     case Op_AddVD:
 3601     case Op_SubVI:
 3602     case Op_SubVL:
 3603     case Op_SubVF:
 3604     case Op_SubVD:
 3605     case Op_MulVI:
 3606     case Op_MulVF:
 3607     case Op_MulVD:
 3608     case Op_DivVF:
 3609     case Op_DivVD:
 3610     case Op_SqrtVF:
 3611     case Op_SqrtVD:
 3612     case Op_LShiftVI:
 3613     case Op_LShiftVL:
 3614     case Op_RShiftVI:
 3615     case Op_RShiftVL:
 3616     case Op_URShiftVI:
 3617     case Op_URShiftVL:
 3618     case Op_LoadVectorMasked:
 3619     case Op_StoreVectorMasked:
 3620     case Op_LoadVectorGatherMasked:
 3621     case Op_StoreVectorScatterMasked:
 3622       return true;
 3623 
 3624     case Op_UMinV:
 3625     case Op_UMaxV:
 3626       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3627         return false;
 3628       } // fallthrough
 3629     case Op_MaxV:
 3630     case Op_MinV:
 3631       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3632         return false; // Implementation limitation
 3633       }
 3634       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3635         return false; // Implementation limitation
 3636       }
 3637       return true;
 3638     case Op_SaturatingAddV:
 3639     case Op_SaturatingSubV:
 3640       if (!is_subword_type(bt)) {
 3641         return false;
 3642       }
 3643       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3644         return false; // Implementation limitation
 3645       }
 3646       return true;
 3647 
 3648     case Op_VectorMaskCmp:
 3649       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3650         return false; // Implementation limitation
 3651       }
 3652       return true;
 3653 
 3654     case Op_VectorRearrange:
 3655       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3656         return false; // Implementation limitation
 3657       }
 3658       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3659         return false; // Implementation limitation
 3660       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3661         return false; // Implementation limitation
 3662       }
 3663       return true;
 3664 
 3665     // Binary Logical operations
 3666     case Op_AndVMask:
 3667     case Op_OrVMask:
 3668     case Op_XorVMask:
 3669       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3670         return false; // Implementation limitation
 3671       }
 3672       return true;
 3673 
 3674     case Op_PopCountVI:
 3675     case Op_PopCountVL:
 3676       if (!is_pop_count_instr_target(bt)) {
 3677         return false;
 3678       }
 3679       return true;
 3680 
 3681     case Op_MaskAll:
 3682       return true;
 3683 
 3684     case Op_CountLeadingZerosV:
 3685       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3686         return true;
 3687       }
 3688     default:
 3689       return false;
 3690   }
 3691 }
 3692 
 3693 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3694   return false;
 3695 }
 3696 
 3697 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3698 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3699   switch (elem_bt) {
 3700     case T_BYTE:  return false;
 3701     case T_SHORT: return !VM_Version::supports_avx512bw();
 3702     case T_INT:   return !VM_Version::supports_avx();
 3703     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3704     default:
 3705       ShouldNotReachHere();
 3706       return false;
 3707   }
 3708 }
 3709 
 3710 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3711   // Prefer predicate if the mask type is "TypeVectMask".
 3712   return vt->isa_vectmask() != nullptr;
 3713 }
 3714 
 3715 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3716   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3717   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3718   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3719       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3720     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3721     return new legVecZOper();
 3722   }
 3723   if (legacy) {
 3724     switch (ideal_reg) {
 3725       case Op_VecS: return new legVecSOper();
 3726       case Op_VecD: return new legVecDOper();
 3727       case Op_VecX: return new legVecXOper();
 3728       case Op_VecY: return new legVecYOper();
 3729       case Op_VecZ: return new legVecZOper();
 3730     }
 3731   } else {
 3732     switch (ideal_reg) {
 3733       case Op_VecS: return new vecSOper();
 3734       case Op_VecD: return new vecDOper();
 3735       case Op_VecX: return new vecXOper();
 3736       case Op_VecY: return new vecYOper();
 3737       case Op_VecZ: return new vecZOper();
 3738     }
 3739   }
 3740   ShouldNotReachHere();
 3741   return nullptr;
 3742 }
 3743 
 3744 bool Matcher::is_reg2reg_move(MachNode* m) {
 3745   switch (m->rule()) {
 3746     case MoveVec2Leg_rule:
 3747     case MoveLeg2Vec_rule:
 3748     case MoveF2VL_rule:
 3749     case MoveF2LEG_rule:
 3750     case MoveVL2F_rule:
 3751     case MoveLEG2F_rule:
 3752     case MoveD2VL_rule:
 3753     case MoveD2LEG_rule:
 3754     case MoveVL2D_rule:
 3755     case MoveLEG2D_rule:
 3756       return true;
 3757     default:
 3758       return false;
 3759   }
 3760 }
 3761 
 3762 bool Matcher::is_generic_vector(MachOper* opnd) {
 3763   switch (opnd->opcode()) {
 3764     case VEC:
 3765     case LEGVEC:
 3766       return true;
 3767     default:
 3768       return false;
 3769   }
 3770 }
 3771 
 3772 //------------------------------------------------------------------------
 3773 
 3774 const RegMask* Matcher::predicate_reg_mask(void) {
 3775   return &_VECTMASK_REG_mask;
 3776 }
 3777 
 3778 // Max vector size in bytes. 0 if not supported.
 3779 int Matcher::vector_width_in_bytes(BasicType bt) {
 3780   assert(is_java_primitive(bt), "only primitive type vectors");
 3781   // SSE2 supports 128bit vectors for all types.
 3782   // AVX2 supports 256bit vectors for all types.
 3783   // AVX2/EVEX supports 512bit vectors for all types.
 3784   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3785   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3786   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3787     size = (UseAVX > 2) ? 64 : 32;
 3788   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3789     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3790   // Use flag to limit vector size.
 3791   size = MIN2(size,(int)MaxVectorSize);
 3792   // Minimum 2 values in vector (or 4 for bytes).
 3793   switch (bt) {
 3794   case T_DOUBLE:
 3795   case T_LONG:
 3796     if (size < 16) return 0;
 3797     break;
 3798   case T_FLOAT:
 3799   case T_INT:
 3800     if (size < 8) return 0;
 3801     break;
 3802   case T_BOOLEAN:
 3803     if (size < 4) return 0;
 3804     break;
 3805   case T_CHAR:
 3806     if (size < 4) return 0;
 3807     break;
 3808   case T_BYTE:
 3809     if (size < 4) return 0;
 3810     break;
 3811   case T_SHORT:
 3812     if (size < 4) return 0;
 3813     break;
 3814   default:
 3815     ShouldNotReachHere();
 3816   }
 3817   return size;
 3818 }
 3819 
 3820 // Limits on vector size (number of elements) loaded into vector.
 3821 int Matcher::max_vector_size(const BasicType bt) {
 3822   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3823 }
 3824 int Matcher::min_vector_size(const BasicType bt) {
 3825   int max_size = max_vector_size(bt);
 3826   // Min size which can be loaded into vector is 4 bytes.
 3827   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3828   // Support for calling svml double64 vectors
 3829   if (bt == T_DOUBLE) {
 3830     size = 1;
 3831   }
 3832   return MIN2(size,max_size);
 3833 }
 3834 
 3835 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3836   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3837   // by default on Cascade Lake
 3838   if (VM_Version::is_default_intel_cascade_lake()) {
 3839     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3840   }
 3841   return Matcher::max_vector_size(bt);
 3842 }
 3843 
 3844 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3845   return -1;
 3846 }
 3847 
 3848 // Vector ideal reg corresponding to specified size in bytes
 3849 uint Matcher::vector_ideal_reg(int size) {
 3850   assert(MaxVectorSize >= size, "");
 3851   switch(size) {
 3852     case  4: return Op_VecS;
 3853     case  8: return Op_VecD;
 3854     case 16: return Op_VecX;
 3855     case 32: return Op_VecY;
 3856     case 64: return Op_VecZ;
 3857   }
 3858   ShouldNotReachHere();
 3859   return 0;
 3860 }
 3861 
 3862 // Check for shift by small constant as well
 3863 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3864   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3865       shift->in(2)->get_int() <= 3 &&
 3866       // Are there other uses besides address expressions?
 3867       !matcher->is_visited(shift)) {
 3868     address_visited.set(shift->_idx); // Flag as address_visited
 3869     mstack.push(shift->in(2), Matcher::Visit);
 3870     Node *conv = shift->in(1);
 3871     // Allow Matcher to match the rule which bypass
 3872     // ConvI2L operation for an array index on LP64
 3873     // if the index value is positive.
 3874     if (conv->Opcode() == Op_ConvI2L &&
 3875         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3876         // Are there other uses besides address expressions?
 3877         !matcher->is_visited(conv)) {
 3878       address_visited.set(conv->_idx); // Flag as address_visited
 3879       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3880     } else {
 3881       mstack.push(conv, Matcher::Pre_Visit);
 3882     }
 3883     return true;
 3884   }
 3885   return false;
 3886 }
 3887 
 3888 // This function identifies sub-graphs in which a 'load' node is
 3889 // input to two different nodes, and such that it can be matched
 3890 // with BMI instructions like blsi, blsr, etc.
 3891 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3892 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3893 // refers to the same node.
 3894 //
 3895 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3896 // This is a temporary solution until we make DAGs expressible in ADL.
 3897 template<typename ConType>
 3898 class FusedPatternMatcher {
 3899   Node* _op1_node;
 3900   Node* _mop_node;
 3901   int _con_op;
 3902 
 3903   static int match_next(Node* n, int next_op, int next_op_idx) {
 3904     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3905       return -1;
 3906     }
 3907 
 3908     if (next_op_idx == -1) { // n is commutative, try rotations
 3909       if (n->in(1)->Opcode() == next_op) {
 3910         return 1;
 3911       } else if (n->in(2)->Opcode() == next_op) {
 3912         return 2;
 3913       }
 3914     } else {
 3915       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3916       if (n->in(next_op_idx)->Opcode() == next_op) {
 3917         return next_op_idx;
 3918       }
 3919     }
 3920     return -1;
 3921   }
 3922 
 3923  public:
 3924   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3925     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3926 
 3927   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3928              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 3929              typename ConType::NativeType con_value) {
 3930     if (_op1_node->Opcode() != op1) {
 3931       return false;
 3932     }
 3933     if (_mop_node->outcnt() > 2) {
 3934       return false;
 3935     }
 3936     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 3937     if (op1_op2_idx == -1) {
 3938       return false;
 3939     }
 3940     // Memory operation must be the other edge
 3941     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 3942 
 3943     // Check that the mop node is really what we want
 3944     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 3945       Node* op2_node = _op1_node->in(op1_op2_idx);
 3946       if (op2_node->outcnt() > 1) {
 3947         return false;
 3948       }
 3949       assert(op2_node->Opcode() == op2, "Should be");
 3950       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 3951       if (op2_con_idx == -1) {
 3952         return false;
 3953       }
 3954       // Memory operation must be the other edge
 3955       int op2_mop_idx = (op2_con_idx & 1) + 1;
 3956       // Check that the memory operation is the same node
 3957       if (op2_node->in(op2_mop_idx) == _mop_node) {
 3958         // Now check the constant
 3959         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 3960         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 3961           return true;
 3962         }
 3963       }
 3964     }
 3965     return false;
 3966   }
 3967 };
 3968 
 3969 static bool is_bmi_pattern(Node* n, Node* m) {
 3970   assert(UseBMI1Instructions, "sanity");
 3971   if (n != nullptr && m != nullptr) {
 3972     if (m->Opcode() == Op_LoadI) {
 3973       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 3974       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 3975              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 3976              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 3977     } else if (m->Opcode() == Op_LoadL) {
 3978       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 3979       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 3980              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 3981              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 3982     }
 3983   }
 3984   return false;
 3985 }
 3986 
 3987 // Should the matcher clone input 'm' of node 'n'?
 3988 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 3989   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 3990   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 3991     mstack.push(m, Visit);
 3992     return true;
 3993   }
 3994   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 3995     mstack.push(m, Visit);           // m = ShiftCntV
 3996     return true;
 3997   }
 3998   if (is_encode_and_store_pattern(n, m)) {
 3999     mstack.push(m, Visit);
 4000     return true;
 4001   }
 4002   return false;
 4003 }
 4004 
 4005 // Should the Matcher clone shifts on addressing modes, expecting them
 4006 // to be subsumed into complex addressing expressions or compute them
 4007 // into registers?
 4008 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4009   Node *off = m->in(AddPNode::Offset);
 4010   if (off->is_Con()) {
 4011     address_visited.test_set(m->_idx); // Flag as address_visited
 4012     Node *adr = m->in(AddPNode::Address);
 4013 
 4014     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4015     // AtomicAdd is not an addressing expression.
 4016     // Cheap to find it by looking for screwy base.
 4017     if (adr->is_AddP() &&
 4018         !adr->in(AddPNode::Base)->is_top() &&
 4019         !adr->in(AddPNode::Offset)->is_Con() &&
 4020         off->get_long() == (int) (off->get_long()) && // immL32
 4021         // Are there other uses besides address expressions?
 4022         !is_visited(adr)) {
 4023       address_visited.set(adr->_idx); // Flag as address_visited
 4024       Node *shift = adr->in(AddPNode::Offset);
 4025       if (!clone_shift(shift, this, mstack, address_visited)) {
 4026         mstack.push(shift, Pre_Visit);
 4027       }
 4028       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4029       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4030     } else {
 4031       mstack.push(adr, Pre_Visit);
 4032     }
 4033 
 4034     // Clone X+offset as it also folds into most addressing expressions
 4035     mstack.push(off, Visit);
 4036     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4037     return true;
 4038   } else if (clone_shift(off, this, mstack, address_visited)) {
 4039     address_visited.test_set(m->_idx); // Flag as address_visited
 4040     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4041     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4042     return true;
 4043   }
 4044   return false;
 4045 }
 4046 
 4047 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4048   switch (bt) {
 4049     case BoolTest::eq:
 4050       return Assembler::eq;
 4051     case BoolTest::ne:
 4052       return Assembler::neq;
 4053     case BoolTest::le:
 4054     case BoolTest::ule:
 4055       return Assembler::le;
 4056     case BoolTest::ge:
 4057     case BoolTest::uge:
 4058       return Assembler::nlt;
 4059     case BoolTest::lt:
 4060     case BoolTest::ult:
 4061       return Assembler::lt;
 4062     case BoolTest::gt:
 4063     case BoolTest::ugt:
 4064       return Assembler::nle;
 4065     default : ShouldNotReachHere(); return Assembler::_false;
 4066   }
 4067 }
 4068 
 4069 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4070   switch (bt) {
 4071   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4072   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4073   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4074   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4075   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4076   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4077   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4078   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4079   }
 4080 }
 4081 
 4082 // Helper methods for MachSpillCopyNode::implementation().
 4083 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4084                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4085   assert(ireg == Op_VecS || // 32bit vector
 4086          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4087           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4088          "no non-adjacent vector moves" );
 4089   if (masm) {
 4090     switch (ireg) {
 4091     case Op_VecS: // copy whole register
 4092     case Op_VecD:
 4093     case Op_VecX:
 4094       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4095         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4096       } else {
 4097         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4098      }
 4099       break;
 4100     case Op_VecY:
 4101       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4102         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4103       } else {
 4104         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4105      }
 4106       break;
 4107     case Op_VecZ:
 4108       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4109       break;
 4110     default:
 4111       ShouldNotReachHere();
 4112     }
 4113 #ifndef PRODUCT
 4114   } else {
 4115     switch (ireg) {
 4116     case Op_VecS:
 4117     case Op_VecD:
 4118     case Op_VecX:
 4119       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4120       break;
 4121     case Op_VecY:
 4122     case Op_VecZ:
 4123       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4124       break;
 4125     default:
 4126       ShouldNotReachHere();
 4127     }
 4128 #endif
 4129   }
 4130 }
 4131 
 4132 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4133                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4134   if (masm) {
 4135     if (is_load) {
 4136       switch (ireg) {
 4137       case Op_VecS:
 4138         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4139         break;
 4140       case Op_VecD:
 4141         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4142         break;
 4143       case Op_VecX:
 4144         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4145           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4146         } else {
 4147           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4148           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4149         }
 4150         break;
 4151       case Op_VecY:
 4152         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4153           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4154         } else {
 4155           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4156           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4157         }
 4158         break;
 4159       case Op_VecZ:
 4160         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4161         break;
 4162       default:
 4163         ShouldNotReachHere();
 4164       }
 4165     } else { // store
 4166       switch (ireg) {
 4167       case Op_VecS:
 4168         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4169         break;
 4170       case Op_VecD:
 4171         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4172         break;
 4173       case Op_VecX:
 4174         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4175           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4176         }
 4177         else {
 4178           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4179         }
 4180         break;
 4181       case Op_VecY:
 4182         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4183           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4184         }
 4185         else {
 4186           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4187         }
 4188         break;
 4189       case Op_VecZ:
 4190         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4191         break;
 4192       default:
 4193         ShouldNotReachHere();
 4194       }
 4195     }
 4196 #ifndef PRODUCT
 4197   } else {
 4198     if (is_load) {
 4199       switch (ireg) {
 4200       case Op_VecS:
 4201         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4202         break;
 4203       case Op_VecD:
 4204         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4205         break;
 4206        case Op_VecX:
 4207         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4208         break;
 4209       case Op_VecY:
 4210       case Op_VecZ:
 4211         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4212         break;
 4213       default:
 4214         ShouldNotReachHere();
 4215       }
 4216     } else { // store
 4217       switch (ireg) {
 4218       case Op_VecS:
 4219         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4220         break;
 4221       case Op_VecD:
 4222         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4223         break;
 4224        case Op_VecX:
 4225         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4226         break;
 4227       case Op_VecY:
 4228       case Op_VecZ:
 4229         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4230         break;
 4231       default:
 4232         ShouldNotReachHere();
 4233       }
 4234     }
 4235 #endif
 4236   }
 4237 }
 4238 
 4239 template <class T>
 4240 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4241   int size = type2aelembytes(bt) * len;
 4242   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4243   for (int i = 0; i < len; i++) {
 4244     int offset = i * type2aelembytes(bt);
 4245     switch (bt) {
 4246       case T_BYTE: val->at(i) = con; break;
 4247       case T_SHORT: {
 4248         jshort c = con;
 4249         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4250         break;
 4251       }
 4252       case T_INT: {
 4253         jint c = con;
 4254         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4255         break;
 4256       }
 4257       case T_LONG: {
 4258         jlong c = con;
 4259         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4260         break;
 4261       }
 4262       case T_FLOAT: {
 4263         jfloat c = con;
 4264         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4265         break;
 4266       }
 4267       case T_DOUBLE: {
 4268         jdouble c = con;
 4269         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4270         break;
 4271       }
 4272       default: assert(false, "%s", type2name(bt));
 4273     }
 4274   }
 4275   return val;
 4276 }
 4277 
 4278 static inline jlong high_bit_set(BasicType bt) {
 4279   switch (bt) {
 4280     case T_BYTE:  return 0x8080808080808080;
 4281     case T_SHORT: return 0x8000800080008000;
 4282     case T_INT:   return 0x8000000080000000;
 4283     case T_LONG:  return 0x8000000000000000;
 4284     default:
 4285       ShouldNotReachHere();
 4286       return 0;
 4287   }
 4288 }
 4289 
 4290 #ifndef PRODUCT
 4291   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4292     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4293   }
 4294 #endif
 4295 
 4296   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4297     __ nop(_count);
 4298   }
 4299 
 4300   uint MachNopNode::size(PhaseRegAlloc*) const {
 4301     return _count;
 4302   }
 4303 
 4304 #ifndef PRODUCT
 4305   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4306     st->print("# breakpoint");
 4307   }
 4308 #endif
 4309 
 4310   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4311     __ int3();
 4312   }
 4313 
 4314   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4315     return MachNode::size(ra_);
 4316   }
 4317 
 4318 %}
 4319 
 4320 //----------ENCODING BLOCK-----------------------------------------------------
 4321 // This block specifies the encoding classes used by the compiler to
 4322 // output byte streams.  Encoding classes are parameterized macros
 4323 // used by Machine Instruction Nodes in order to generate the bit
 4324 // encoding of the instruction.  Operands specify their base encoding
 4325 // interface with the interface keyword.  There are currently
 4326 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4327 // COND_INTER.  REG_INTER causes an operand to generate a function
 4328 // which returns its register number when queried.  CONST_INTER causes
 4329 // an operand to generate a function which returns the value of the
 4330 // constant when queried.  MEMORY_INTER causes an operand to generate
 4331 // four functions which return the Base Register, the Index Register,
 4332 // the Scale Value, and the Offset Value of the operand when queried.
 4333 // COND_INTER causes an operand to generate six functions which return
 4334 // the encoding code (ie - encoding bits for the instruction)
 4335 // associated with each basic boolean condition for a conditional
 4336 // instruction.
 4337 //
 4338 // Instructions specify two basic values for encoding.  Again, a
 4339 // function is available to check if the constant displacement is an
 4340 // oop. They use the ins_encode keyword to specify their encoding
 4341 // classes (which must be a sequence of enc_class names, and their
 4342 // parameters, specified in the encoding block), and they use the
 4343 // opcode keyword to specify, in order, their primary, secondary, and
 4344 // tertiary opcode.  Only the opcode sections which a particular
 4345 // instruction needs for encoding need to be specified.
 4346 encode %{
 4347   enc_class cdql_enc(no_rax_rdx_RegI div)
 4348   %{
 4349     // Full implementation of Java idiv and irem; checks for
 4350     // special case as described in JVM spec., p.243 & p.271.
 4351     //
 4352     //         normal case                           special case
 4353     //
 4354     // input : rax: dividend                         min_int
 4355     //         reg: divisor                          -1
 4356     //
 4357     // output: rax: quotient  (= rax idiv reg)       min_int
 4358     //         rdx: remainder (= rax irem reg)       0
 4359     //
 4360     //  Code sequnce:
 4361     //
 4362     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4363     //    5:   75 07/08                jne    e <normal>
 4364     //    7:   33 d2                   xor    %edx,%edx
 4365     //  [div >= 8 -> offset + 1]
 4366     //  [REX_B]
 4367     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4368     //    c:   74 03/04                je     11 <done>
 4369     // 000000000000000e <normal>:
 4370     //    e:   99                      cltd
 4371     //  [div >= 8 -> offset + 1]
 4372     //  [REX_B]
 4373     //    f:   f7 f9                   idiv   $div
 4374     // 0000000000000011 <done>:
 4375     Label normal;
 4376     Label done;
 4377 
 4378     // cmp    $0x80000000,%eax
 4379     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4380 
 4381     // jne    e <normal>
 4382     __ jccb(Assembler::notEqual, normal);
 4383 
 4384     // xor    %edx,%edx
 4385     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4386 
 4387     // cmp    $0xffffffffffffffff,%ecx
 4388     __ cmpl($div$$Register, -1);
 4389 
 4390     // je     11 <done>
 4391     __ jccb(Assembler::equal, done);
 4392 
 4393     // <normal>
 4394     // cltd
 4395     __ bind(normal);
 4396     __ cdql();
 4397 
 4398     // idivl
 4399     // <done>
 4400     __ idivl($div$$Register);
 4401     __ bind(done);
 4402   %}
 4403 
 4404   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4405   %{
 4406     // Full implementation of Java ldiv and lrem; checks for
 4407     // special case as described in JVM spec., p.243 & p.271.
 4408     //
 4409     //         normal case                           special case
 4410     //
 4411     // input : rax: dividend                         min_long
 4412     //         reg: divisor                          -1
 4413     //
 4414     // output: rax: quotient  (= rax idiv reg)       min_long
 4415     //         rdx: remainder (= rax irem reg)       0
 4416     //
 4417     //  Code sequnce:
 4418     //
 4419     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4420     //    7:   00 00 80
 4421     //    a:   48 39 d0                cmp    %rdx,%rax
 4422     //    d:   75 08                   jne    17 <normal>
 4423     //    f:   33 d2                   xor    %edx,%edx
 4424     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4425     //   15:   74 05                   je     1c <done>
 4426     // 0000000000000017 <normal>:
 4427     //   17:   48 99                   cqto
 4428     //   19:   48 f7 f9                idiv   $div
 4429     // 000000000000001c <done>:
 4430     Label normal;
 4431     Label done;
 4432 
 4433     // mov    $0x8000000000000000,%rdx
 4434     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4435 
 4436     // cmp    %rdx,%rax
 4437     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4438 
 4439     // jne    17 <normal>
 4440     __ jccb(Assembler::notEqual, normal);
 4441 
 4442     // xor    %edx,%edx
 4443     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4444 
 4445     // cmp    $0xffffffffffffffff,$div
 4446     __ cmpq($div$$Register, -1);
 4447 
 4448     // je     1e <done>
 4449     __ jccb(Assembler::equal, done);
 4450 
 4451     // <normal>
 4452     // cqto
 4453     __ bind(normal);
 4454     __ cdqq();
 4455 
 4456     // idivq (note: must be emitted by the user of this rule)
 4457     // <done>
 4458     __ idivq($div$$Register);
 4459     __ bind(done);
 4460   %}
 4461 
 4462   enc_class clear_avx %{
 4463     DEBUG_ONLY(int off0 = __ offset());
 4464     if (generate_vzeroupper(Compile::current())) {
 4465       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4466       // Clear upper bits of YMM registers when current compiled code uses
 4467       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4468       __ vzeroupper();
 4469     }
 4470     DEBUG_ONLY(int off1 = __ offset());
 4471     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4472   %}
 4473 
 4474   enc_class Java_To_Runtime(method meth) %{
 4475     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4476     __ call(r10);
 4477     __ post_call_nop();
 4478   %}
 4479 
 4480   enc_class Java_Static_Call(method meth)
 4481   %{
 4482     // JAVA STATIC CALL
 4483     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4484     // determine who we intended to call.
 4485     if (!_method) {
 4486       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4487     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4488       // The NOP here is purely to ensure that eliding a call to
 4489       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4490       __ addr_nop_5();
 4491       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4492     } else {
 4493       int method_index = resolved_method_index(masm);
 4494       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4495                                                   : static_call_Relocation::spec(method_index);
 4496       address mark = __ pc();
 4497       int call_offset = __ offset();
 4498       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4499       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4500         // Calls of the same statically bound method can share
 4501         // a stub to the interpreter.
 4502         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4503       } else {
 4504         // Emit stubs for static call.
 4505         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4506         __ clear_inst_mark();
 4507         if (stub == nullptr) {
 4508           ciEnv::current()->record_failure("CodeCache is full");
 4509           return;
 4510         }
 4511       }
 4512     }
 4513     __ post_call_nop();
 4514   %}
 4515 
 4516   enc_class Java_Dynamic_Call(method meth) %{
 4517     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4518     __ post_call_nop();
 4519   %}
 4520 
 4521   enc_class call_epilog %{
 4522     if (VerifyStackAtCalls) {
 4523       // Check that stack depth is unchanged: find majik cookie on stack
 4524       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4525       Label L;
 4526       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4527       __ jccb(Assembler::equal, L);
 4528       // Die if stack mismatch
 4529       __ int3();
 4530       __ bind(L);
 4531     }
 4532   %}
 4533 
 4534 %}
 4535 
 4536 //----------FRAME--------------------------------------------------------------
 4537 // Definition of frame structure and management information.
 4538 //
 4539 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4540 //                             |   (to get allocators register number
 4541 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4542 //  r   CALLER     |        |
 4543 //  o     |        +--------+      pad to even-align allocators stack-slot
 4544 //  w     V        |  pad0  |        numbers; owned by CALLER
 4545 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4546 //  h     ^        |   in   |  5
 4547 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4548 //  |     |        |        |  3
 4549 //  |     |        +--------+
 4550 //  V     |        | old out|      Empty on Intel, window on Sparc
 4551 //        |    old |preserve|      Must be even aligned.
 4552 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4553 //        |        |   in   |  3   area for Intel ret address
 4554 //     Owned by    |preserve|      Empty on Sparc.
 4555 //       SELF      +--------+
 4556 //        |        |  pad2  |  2   pad to align old SP
 4557 //        |        +--------+  1
 4558 //        |        | locks  |  0
 4559 //        |        +--------+----> OptoReg::stack0(), even aligned
 4560 //        |        |  pad1  | 11   pad to align new SP
 4561 //        |        +--------+
 4562 //        |        |        | 10
 4563 //        |        | spills |  9   spills
 4564 //        V        |        |  8   (pad0 slot for callee)
 4565 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4566 //        ^        |  out   |  7
 4567 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4568 //     Owned by    +--------+
 4569 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4570 //        |    new |preserve|      Must be even-aligned.
 4571 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4572 //        |        |        |
 4573 //
 4574 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4575 //         known from SELF's arguments and the Java calling convention.
 4576 //         Region 6-7 is determined per call site.
 4577 // Note 2: If the calling convention leaves holes in the incoming argument
 4578 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4579 //         are owned by the CALLEE.  Holes should not be necessary in the
 4580 //         incoming area, as the Java calling convention is completely under
 4581 //         the control of the AD file.  Doubles can be sorted and packed to
 4582 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4583 //         varargs C calling conventions.
 4584 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4585 //         even aligned with pad0 as needed.
 4586 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4587 //         region 6-11 is even aligned; it may be padded out more so that
 4588 //         the region from SP to FP meets the minimum stack alignment.
 4589 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4590 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4591 //         SP meets the minimum alignment.
 4592 
 4593 frame
 4594 %{
 4595   // These three registers define part of the calling convention
 4596   // between compiled code and the interpreter.
 4597   inline_cache_reg(RAX);                // Inline Cache Register
 4598 
 4599   // Optional: name the operand used by cisc-spilling to access
 4600   // [stack_pointer + offset]
 4601   cisc_spilling_operand_name(indOffset32);
 4602 
 4603   // Number of stack slots consumed by locking an object
 4604   sync_stack_slots(2);
 4605 
 4606   // Compiled code's Frame Pointer
 4607   frame_pointer(RSP);
 4608 
 4609   // Interpreter stores its frame pointer in a register which is
 4610   // stored to the stack by I2CAdaptors.
 4611   // I2CAdaptors convert from interpreted java to compiled java.
 4612   interpreter_frame_pointer(RBP);
 4613 
 4614   // Stack alignment requirement
 4615   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4616 
 4617   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4618   // for calls to C.  Supports the var-args backing area for register parms.
 4619   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4620 
 4621   // The after-PROLOG location of the return address.  Location of
 4622   // return address specifies a type (REG or STACK) and a number
 4623   // representing the register number (i.e. - use a register name) or
 4624   // stack slot.
 4625   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4626   // Otherwise, it is above the locks and verification slot and alignment word
 4627   return_addr(STACK - 2 +
 4628               align_up((Compile::current()->in_preserve_stack_slots() +
 4629                         Compile::current()->fixed_slots()),
 4630                        stack_alignment_in_slots()));
 4631 
 4632   // Location of compiled Java return values.  Same as C for now.
 4633   return_value
 4634   %{
 4635     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4636            "only return normal values");
 4637 
 4638     static const int lo[Op_RegL + 1] = {
 4639       0,
 4640       0,
 4641       RAX_num,  // Op_RegN
 4642       RAX_num,  // Op_RegI
 4643       RAX_num,  // Op_RegP
 4644       XMM0_num, // Op_RegF
 4645       XMM0_num, // Op_RegD
 4646       RAX_num   // Op_RegL
 4647     };
 4648     static const int hi[Op_RegL + 1] = {
 4649       0,
 4650       0,
 4651       OptoReg::Bad, // Op_RegN
 4652       OptoReg::Bad, // Op_RegI
 4653       RAX_H_num,    // Op_RegP
 4654       OptoReg::Bad, // Op_RegF
 4655       XMM0b_num,    // Op_RegD
 4656       RAX_H_num     // Op_RegL
 4657     };
 4658     // Excluded flags and vector registers.
 4659     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4660     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4661   %}
 4662 %}
 4663 
 4664 //----------ATTRIBUTES---------------------------------------------------------
 4665 //----------Operand Attributes-------------------------------------------------
 4666 op_attrib op_cost(0);        // Required cost attribute
 4667 
 4668 //----------Instruction Attributes---------------------------------------------
 4669 ins_attrib ins_cost(100);       // Required cost attribute
 4670 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4671 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4672                                 // a non-matching short branch variant
 4673                                 // of some long branch?
 4674 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4675                                 // be a power of 2) specifies the
 4676                                 // alignment that some part of the
 4677                                 // instruction (not necessarily the
 4678                                 // start) requires.  If > 1, a
 4679                                 // compute_padding() function must be
 4680                                 // provided for the instruction
 4681 
 4682 // Whether this node is expanded during code emission into a sequence of
 4683 // instructions and the first instruction can perform an implicit null check.
 4684 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4685 
 4686 //----------OPERANDS-----------------------------------------------------------
 4687 // Operand definitions must precede instruction definitions for correct parsing
 4688 // in the ADLC because operands constitute user defined types which are used in
 4689 // instruction definitions.
 4690 
 4691 //----------Simple Operands----------------------------------------------------
 4692 // Immediate Operands
 4693 // Integer Immediate
 4694 operand immI()
 4695 %{
 4696   match(ConI);
 4697 
 4698   op_cost(10);
 4699   format %{ %}
 4700   interface(CONST_INTER);
 4701 %}
 4702 
 4703 // Constant for test vs zero
 4704 operand immI_0()
 4705 %{
 4706   predicate(n->get_int() == 0);
 4707   match(ConI);
 4708 
 4709   op_cost(0);
 4710   format %{ %}
 4711   interface(CONST_INTER);
 4712 %}
 4713 
 4714 // Constant for increment
 4715 operand immI_1()
 4716 %{
 4717   predicate(n->get_int() == 1);
 4718   match(ConI);
 4719 
 4720   op_cost(0);
 4721   format %{ %}
 4722   interface(CONST_INTER);
 4723 %}
 4724 
 4725 // Constant for decrement
 4726 operand immI_M1()
 4727 %{
 4728   predicate(n->get_int() == -1);
 4729   match(ConI);
 4730 
 4731   op_cost(0);
 4732   format %{ %}
 4733   interface(CONST_INTER);
 4734 %}
 4735 
 4736 operand immI_2()
 4737 %{
 4738   predicate(n->get_int() == 2);
 4739   match(ConI);
 4740 
 4741   op_cost(0);
 4742   format %{ %}
 4743   interface(CONST_INTER);
 4744 %}
 4745 
 4746 operand immI_4()
 4747 %{
 4748   predicate(n->get_int() == 4);
 4749   match(ConI);
 4750 
 4751   op_cost(0);
 4752   format %{ %}
 4753   interface(CONST_INTER);
 4754 %}
 4755 
 4756 operand immI_8()
 4757 %{
 4758   predicate(n->get_int() == 8);
 4759   match(ConI);
 4760 
 4761   op_cost(0);
 4762   format %{ %}
 4763   interface(CONST_INTER);
 4764 %}
 4765 
 4766 // Valid scale values for addressing modes
 4767 operand immI2()
 4768 %{
 4769   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4770   match(ConI);
 4771 
 4772   format %{ %}
 4773   interface(CONST_INTER);
 4774 %}
 4775 
 4776 operand immU7()
 4777 %{
 4778   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4779   match(ConI);
 4780 
 4781   op_cost(5);
 4782   format %{ %}
 4783   interface(CONST_INTER);
 4784 %}
 4785 
 4786 operand immI8()
 4787 %{
 4788   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4789   match(ConI);
 4790 
 4791   op_cost(5);
 4792   format %{ %}
 4793   interface(CONST_INTER);
 4794 %}
 4795 
 4796 operand immU8()
 4797 %{
 4798   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4799   match(ConI);
 4800 
 4801   op_cost(5);
 4802   format %{ %}
 4803   interface(CONST_INTER);
 4804 %}
 4805 
 4806 operand immI16()
 4807 %{
 4808   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4809   match(ConI);
 4810 
 4811   op_cost(10);
 4812   format %{ %}
 4813   interface(CONST_INTER);
 4814 %}
 4815 
 4816 // Int Immediate non-negative
 4817 operand immU31()
 4818 %{
 4819   predicate(n->get_int() >= 0);
 4820   match(ConI);
 4821 
 4822   op_cost(0);
 4823   format %{ %}
 4824   interface(CONST_INTER);
 4825 %}
 4826 
 4827 // Pointer Immediate
 4828 operand immP()
 4829 %{
 4830   match(ConP);
 4831 
 4832   op_cost(10);
 4833   format %{ %}
 4834   interface(CONST_INTER);
 4835 %}
 4836 
 4837 // Null Pointer Immediate
 4838 operand immP0()
 4839 %{
 4840   predicate(n->get_ptr() == 0);
 4841   match(ConP);
 4842 
 4843   op_cost(5);
 4844   format %{ %}
 4845   interface(CONST_INTER);
 4846 %}
 4847 
 4848 // Pointer Immediate
 4849 operand immN() %{
 4850   match(ConN);
 4851 
 4852   op_cost(10);
 4853   format %{ %}
 4854   interface(CONST_INTER);
 4855 %}
 4856 
 4857 operand immNKlass() %{
 4858   match(ConNKlass);
 4859 
 4860   op_cost(10);
 4861   format %{ %}
 4862   interface(CONST_INTER);
 4863 %}
 4864 
 4865 // Null Pointer Immediate
 4866 operand immN0() %{
 4867   predicate(n->get_narrowcon() == 0);
 4868   match(ConN);
 4869 
 4870   op_cost(5);
 4871   format %{ %}
 4872   interface(CONST_INTER);
 4873 %}
 4874 
 4875 operand immP31()
 4876 %{
 4877   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4878             && (n->get_ptr() >> 31) == 0);
 4879   match(ConP);
 4880 
 4881   op_cost(5);
 4882   format %{ %}
 4883   interface(CONST_INTER);
 4884 %}
 4885 
 4886 
 4887 // Long Immediate
 4888 operand immL()
 4889 %{
 4890   match(ConL);
 4891 
 4892   op_cost(20);
 4893   format %{ %}
 4894   interface(CONST_INTER);
 4895 %}
 4896 
 4897 // Long Immediate 8-bit
 4898 operand immL8()
 4899 %{
 4900   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4901   match(ConL);
 4902 
 4903   op_cost(5);
 4904   format %{ %}
 4905   interface(CONST_INTER);
 4906 %}
 4907 
 4908 // Long Immediate 32-bit unsigned
 4909 operand immUL32()
 4910 %{
 4911   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4912   match(ConL);
 4913 
 4914   op_cost(10);
 4915   format %{ %}
 4916   interface(CONST_INTER);
 4917 %}
 4918 
 4919 // Long Immediate 32-bit signed
 4920 operand immL32()
 4921 %{
 4922   predicate(n->get_long() == (int) (n->get_long()));
 4923   match(ConL);
 4924 
 4925   op_cost(15);
 4926   format %{ %}
 4927   interface(CONST_INTER);
 4928 %}
 4929 
 4930 operand immL_Pow2()
 4931 %{
 4932   predicate(is_power_of_2((julong)n->get_long()));
 4933   match(ConL);
 4934 
 4935   op_cost(15);
 4936   format %{ %}
 4937   interface(CONST_INTER);
 4938 %}
 4939 
 4940 operand immL_NotPow2()
 4941 %{
 4942   predicate(is_power_of_2((julong)~n->get_long()));
 4943   match(ConL);
 4944 
 4945   op_cost(15);
 4946   format %{ %}
 4947   interface(CONST_INTER);
 4948 %}
 4949 
 4950 // Long Immediate zero
 4951 operand immL0()
 4952 %{
 4953   predicate(n->get_long() == 0L);
 4954   match(ConL);
 4955 
 4956   op_cost(10);
 4957   format %{ %}
 4958   interface(CONST_INTER);
 4959 %}
 4960 
 4961 // Constant for increment
 4962 operand immL1()
 4963 %{
 4964   predicate(n->get_long() == 1);
 4965   match(ConL);
 4966 
 4967   format %{ %}
 4968   interface(CONST_INTER);
 4969 %}
 4970 
 4971 // Constant for decrement
 4972 operand immL_M1()
 4973 %{
 4974   predicate(n->get_long() == -1);
 4975   match(ConL);
 4976 
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Long Immediate: low 32-bit mask
 4982 operand immL_32bits()
 4983 %{
 4984   predicate(n->get_long() == 0xFFFFFFFFL);
 4985   match(ConL);
 4986   op_cost(20);
 4987 
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 // Int Immediate: 2^n-1, positive
 4993 operand immI_Pow2M1()
 4994 %{
 4995   predicate((n->get_int() > 0)
 4996             && is_power_of_2((juint)n->get_int() + 1));
 4997   match(ConI);
 4998 
 4999   op_cost(20);
 5000   format %{ %}
 5001   interface(CONST_INTER);
 5002 %}
 5003 
 5004 // Float Immediate zero
 5005 operand immF0()
 5006 %{
 5007   predicate(jint_cast(n->getf()) == 0);
 5008   match(ConF);
 5009 
 5010   op_cost(5);
 5011   format %{ %}
 5012   interface(CONST_INTER);
 5013 %}
 5014 
 5015 // Float Immediate
 5016 operand immF()
 5017 %{
 5018   match(ConF);
 5019 
 5020   op_cost(15);
 5021   format %{ %}
 5022   interface(CONST_INTER);
 5023 %}
 5024 
 5025 // Half Float Immediate
 5026 operand immH()
 5027 %{
 5028   match(ConH);
 5029 
 5030   op_cost(15);
 5031   format %{ %}
 5032   interface(CONST_INTER);
 5033 %}
 5034 
 5035 // Double Immediate zero
 5036 operand immD0()
 5037 %{
 5038   predicate(jlong_cast(n->getd()) == 0);
 5039   match(ConD);
 5040 
 5041   op_cost(5);
 5042   format %{ %}
 5043   interface(CONST_INTER);
 5044 %}
 5045 
 5046 // Double Immediate
 5047 operand immD()
 5048 %{
 5049   match(ConD);
 5050 
 5051   op_cost(15);
 5052   format %{ %}
 5053   interface(CONST_INTER);
 5054 %}
 5055 
 5056 // Immediates for special shifts (sign extend)
 5057 
 5058 // Constants for increment
 5059 operand immI_16()
 5060 %{
 5061   predicate(n->get_int() == 16);
 5062   match(ConI);
 5063 
 5064   format %{ %}
 5065   interface(CONST_INTER);
 5066 %}
 5067 
 5068 operand immI_24()
 5069 %{
 5070   predicate(n->get_int() == 24);
 5071   match(ConI);
 5072 
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Constant for byte-wide masking
 5078 operand immI_255()
 5079 %{
 5080   predicate(n->get_int() == 255);
 5081   match(ConI);
 5082 
 5083   format %{ %}
 5084   interface(CONST_INTER);
 5085 %}
 5086 
 5087 // Constant for short-wide masking
 5088 operand immI_65535()
 5089 %{
 5090   predicate(n->get_int() == 65535);
 5091   match(ConI);
 5092 
 5093   format %{ %}
 5094   interface(CONST_INTER);
 5095 %}
 5096 
 5097 // Constant for byte-wide masking
 5098 operand immL_255()
 5099 %{
 5100   predicate(n->get_long() == 255);
 5101   match(ConL);
 5102 
 5103   format %{ %}
 5104   interface(CONST_INTER);
 5105 %}
 5106 
 5107 // Constant for short-wide masking
 5108 operand immL_65535()
 5109 %{
 5110   predicate(n->get_long() == 65535);
 5111   match(ConL);
 5112 
 5113   format %{ %}
 5114   interface(CONST_INTER);
 5115 %}
 5116 
 5117 operand kReg()
 5118 %{
 5119   constraint(ALLOC_IN_RC(vectmask_reg));
 5120   match(RegVectMask);
 5121   format %{%}
 5122   interface(REG_INTER);
 5123 %}
 5124 
 5125 // Register Operands
 5126 // Integer Register
 5127 operand rRegI()
 5128 %{
 5129   constraint(ALLOC_IN_RC(int_reg));
 5130   match(RegI);
 5131 
 5132   match(rax_RegI);
 5133   match(rbx_RegI);
 5134   match(rcx_RegI);
 5135   match(rdx_RegI);
 5136   match(rdi_RegI);
 5137 
 5138   format %{ %}
 5139   interface(REG_INTER);
 5140 %}
 5141 
 5142 // Special Registers
 5143 operand rax_RegI()
 5144 %{
 5145   constraint(ALLOC_IN_RC(int_rax_reg));
 5146   match(RegI);
 5147   match(rRegI);
 5148 
 5149   format %{ "RAX" %}
 5150   interface(REG_INTER);
 5151 %}
 5152 
 5153 // Special Registers
 5154 operand rbx_RegI()
 5155 %{
 5156   constraint(ALLOC_IN_RC(int_rbx_reg));
 5157   match(RegI);
 5158   match(rRegI);
 5159 
 5160   format %{ "RBX" %}
 5161   interface(REG_INTER);
 5162 %}
 5163 
 5164 operand rcx_RegI()
 5165 %{
 5166   constraint(ALLOC_IN_RC(int_rcx_reg));
 5167   match(RegI);
 5168   match(rRegI);
 5169 
 5170   format %{ "RCX" %}
 5171   interface(REG_INTER);
 5172 %}
 5173 
 5174 operand rdx_RegI()
 5175 %{
 5176   constraint(ALLOC_IN_RC(int_rdx_reg));
 5177   match(RegI);
 5178   match(rRegI);
 5179 
 5180   format %{ "RDX" %}
 5181   interface(REG_INTER);
 5182 %}
 5183 
 5184 operand rdi_RegI()
 5185 %{
 5186   constraint(ALLOC_IN_RC(int_rdi_reg));
 5187   match(RegI);
 5188   match(rRegI);
 5189 
 5190   format %{ "RDI" %}
 5191   interface(REG_INTER);
 5192 %}
 5193 
 5194 operand no_rax_rdx_RegI()
 5195 %{
 5196   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5197   match(RegI);
 5198   match(rbx_RegI);
 5199   match(rcx_RegI);
 5200   match(rdi_RegI);
 5201 
 5202   format %{ %}
 5203   interface(REG_INTER);
 5204 %}
 5205 
 5206 operand no_rbp_r13_RegI()
 5207 %{
 5208   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5209   match(RegI);
 5210   match(rRegI);
 5211   match(rax_RegI);
 5212   match(rbx_RegI);
 5213   match(rcx_RegI);
 5214   match(rdx_RegI);
 5215   match(rdi_RegI);
 5216 
 5217   format %{ %}
 5218   interface(REG_INTER);
 5219 %}
 5220 
 5221 // Pointer Register
 5222 operand any_RegP()
 5223 %{
 5224   constraint(ALLOC_IN_RC(any_reg));
 5225   match(RegP);
 5226   match(rax_RegP);
 5227   match(rbx_RegP);
 5228   match(rdi_RegP);
 5229   match(rsi_RegP);
 5230   match(rbp_RegP);
 5231   match(r15_RegP);
 5232   match(rRegP);
 5233 
 5234   format %{ %}
 5235   interface(REG_INTER);
 5236 %}
 5237 
 5238 operand rRegP()
 5239 %{
 5240   constraint(ALLOC_IN_RC(ptr_reg));
 5241   match(RegP);
 5242   match(rax_RegP);
 5243   match(rbx_RegP);
 5244   match(rdi_RegP);
 5245   match(rsi_RegP);
 5246   match(rbp_RegP);  // See Q&A below about
 5247   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5248 
 5249   format %{ %}
 5250   interface(REG_INTER);
 5251 %}
 5252 
 5253 operand rRegN() %{
 5254   constraint(ALLOC_IN_RC(int_reg));
 5255   match(RegN);
 5256 
 5257   format %{ %}
 5258   interface(REG_INTER);
 5259 %}
 5260 
 5261 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5262 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5263 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5264 // The output of an instruction is controlled by the allocator, which respects
 5265 // register class masks, not match rules.  Unless an instruction mentions
 5266 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5267 // by the allocator as an input.
 5268 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5269 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5270 // result, RBP is not included in the output of the instruction either.
 5271 
 5272 // This operand is not allowed to use RBP even if
 5273 // RBP is not used to hold the frame pointer.
 5274 operand no_rbp_RegP()
 5275 %{
 5276   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5277   match(RegP);
 5278   match(rbx_RegP);
 5279   match(rsi_RegP);
 5280   match(rdi_RegP);
 5281 
 5282   format %{ %}
 5283   interface(REG_INTER);
 5284 %}
 5285 
 5286 // Special Registers
 5287 // Return a pointer value
 5288 operand rax_RegP()
 5289 %{
 5290   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5291   match(RegP);
 5292   match(rRegP);
 5293 
 5294   format %{ %}
 5295   interface(REG_INTER);
 5296 %}
 5297 
 5298 // Special Registers
 5299 // Return a compressed pointer value
 5300 operand rax_RegN()
 5301 %{
 5302   constraint(ALLOC_IN_RC(int_rax_reg));
 5303   match(RegN);
 5304   match(rRegN);
 5305 
 5306   format %{ %}
 5307   interface(REG_INTER);
 5308 %}
 5309 
 5310 // Used in AtomicAdd
 5311 operand rbx_RegP()
 5312 %{
 5313   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5314   match(RegP);
 5315   match(rRegP);
 5316 
 5317   format %{ %}
 5318   interface(REG_INTER);
 5319 %}
 5320 
 5321 operand rsi_RegP()
 5322 %{
 5323   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5324   match(RegP);
 5325   match(rRegP);
 5326 
 5327   format %{ %}
 5328   interface(REG_INTER);
 5329 %}
 5330 
 5331 operand rbp_RegP()
 5332 %{
 5333   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5334   match(RegP);
 5335   match(rRegP);
 5336 
 5337   format %{ %}
 5338   interface(REG_INTER);
 5339 %}
 5340 
 5341 // Used in rep stosq
 5342 operand rdi_RegP()
 5343 %{
 5344   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5345   match(RegP);
 5346   match(rRegP);
 5347 
 5348   format %{ %}
 5349   interface(REG_INTER);
 5350 %}
 5351 
 5352 operand r15_RegP()
 5353 %{
 5354   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5355   match(RegP);
 5356   match(rRegP);
 5357 
 5358   format %{ %}
 5359   interface(REG_INTER);
 5360 %}
 5361 
 5362 operand rRegL()
 5363 %{
 5364   constraint(ALLOC_IN_RC(long_reg));
 5365   match(RegL);
 5366   match(rax_RegL);
 5367   match(rdx_RegL);
 5368 
 5369   format %{ %}
 5370   interface(REG_INTER);
 5371 %}
 5372 
 5373 // Special Registers
 5374 operand no_rax_rdx_RegL()
 5375 %{
 5376   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5377   match(RegL);
 5378   match(rRegL);
 5379 
 5380   format %{ %}
 5381   interface(REG_INTER);
 5382 %}
 5383 
 5384 operand rax_RegL()
 5385 %{
 5386   constraint(ALLOC_IN_RC(long_rax_reg));
 5387   match(RegL);
 5388   match(rRegL);
 5389 
 5390   format %{ "RAX" %}
 5391   interface(REG_INTER);
 5392 %}
 5393 
 5394 operand rcx_RegL()
 5395 %{
 5396   constraint(ALLOC_IN_RC(long_rcx_reg));
 5397   match(RegL);
 5398   match(rRegL);
 5399 
 5400   format %{ %}
 5401   interface(REG_INTER);
 5402 %}
 5403 
 5404 operand rdx_RegL()
 5405 %{
 5406   constraint(ALLOC_IN_RC(long_rdx_reg));
 5407   match(RegL);
 5408   match(rRegL);
 5409 
 5410   format %{ %}
 5411   interface(REG_INTER);
 5412 %}
 5413 
 5414 operand r11_RegL()
 5415 %{
 5416   constraint(ALLOC_IN_RC(long_r11_reg));
 5417   match(RegL);
 5418   match(rRegL);
 5419 
 5420   format %{ %}
 5421   interface(REG_INTER);
 5422 %}
 5423 
 5424 operand no_rbp_r13_RegL()
 5425 %{
 5426   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5427   match(RegL);
 5428   match(rRegL);
 5429   match(rax_RegL);
 5430   match(rcx_RegL);
 5431   match(rdx_RegL);
 5432 
 5433   format %{ %}
 5434   interface(REG_INTER);
 5435 %}
 5436 
 5437 // Flags register, used as output of compare instructions
 5438 operand rFlagsReg()
 5439 %{
 5440   constraint(ALLOC_IN_RC(int_flags));
 5441   match(RegFlags);
 5442 
 5443   format %{ "RFLAGS" %}
 5444   interface(REG_INTER);
 5445 %}
 5446 
 5447 // Flags register, used as output of FLOATING POINT compare instructions
 5448 operand rFlagsRegU()
 5449 %{
 5450   constraint(ALLOC_IN_RC(int_flags));
 5451   match(RegFlags);
 5452 
 5453   format %{ "RFLAGS_U" %}
 5454   interface(REG_INTER);
 5455 %}
 5456 
 5457 operand rFlagsRegUCF() %{
 5458   constraint(ALLOC_IN_RC(int_flags));
 5459   match(RegFlags);
 5460   predicate(false);
 5461 
 5462   format %{ "RFLAGS_U_CF" %}
 5463   interface(REG_INTER);
 5464 %}
 5465 
 5466 // Float register operands
 5467 operand regF() %{
 5468    constraint(ALLOC_IN_RC(float_reg));
 5469    match(RegF);
 5470 
 5471    format %{ %}
 5472    interface(REG_INTER);
 5473 %}
 5474 
 5475 // Float register operands
 5476 operand legRegF() %{
 5477    constraint(ALLOC_IN_RC(float_reg_legacy));
 5478    match(RegF);
 5479 
 5480    format %{ %}
 5481    interface(REG_INTER);
 5482 %}
 5483 
 5484 // Float register operands
 5485 operand vlRegF() %{
 5486    constraint(ALLOC_IN_RC(float_reg_vl));
 5487    match(RegF);
 5488 
 5489    format %{ %}
 5490    interface(REG_INTER);
 5491 %}
 5492 
 5493 // Double register operands
 5494 operand regD() %{
 5495    constraint(ALLOC_IN_RC(double_reg));
 5496    match(RegD);
 5497 
 5498    format %{ %}
 5499    interface(REG_INTER);
 5500 %}
 5501 
 5502 // Double register operands
 5503 operand legRegD() %{
 5504    constraint(ALLOC_IN_RC(double_reg_legacy));
 5505    match(RegD);
 5506 
 5507    format %{ %}
 5508    interface(REG_INTER);
 5509 %}
 5510 
 5511 // Double register operands
 5512 operand vlRegD() %{
 5513    constraint(ALLOC_IN_RC(double_reg_vl));
 5514    match(RegD);
 5515 
 5516    format %{ %}
 5517    interface(REG_INTER);
 5518 %}
 5519 
 5520 //----------Memory Operands----------------------------------------------------
 5521 // Direct Memory Operand
 5522 // operand direct(immP addr)
 5523 // %{
 5524 //   match(addr);
 5525 
 5526 //   format %{ "[$addr]" %}
 5527 //   interface(MEMORY_INTER) %{
 5528 //     base(0xFFFFFFFF);
 5529 //     index(0x4);
 5530 //     scale(0x0);
 5531 //     disp($addr);
 5532 //   %}
 5533 // %}
 5534 
 5535 // Indirect Memory Operand
 5536 operand indirect(any_RegP reg)
 5537 %{
 5538   constraint(ALLOC_IN_RC(ptr_reg));
 5539   match(reg);
 5540 
 5541   format %{ "[$reg]" %}
 5542   interface(MEMORY_INTER) %{
 5543     base($reg);
 5544     index(0x4);
 5545     scale(0x0);
 5546     disp(0x0);
 5547   %}
 5548 %}
 5549 
 5550 // Indirect Memory Plus Short Offset Operand
 5551 operand indOffset8(any_RegP reg, immL8 off)
 5552 %{
 5553   constraint(ALLOC_IN_RC(ptr_reg));
 5554   match(AddP reg off);
 5555 
 5556   format %{ "[$reg + $off (8-bit)]" %}
 5557   interface(MEMORY_INTER) %{
 5558     base($reg);
 5559     index(0x4);
 5560     scale(0x0);
 5561     disp($off);
 5562   %}
 5563 %}
 5564 
 5565 // Indirect Memory Plus Long Offset Operand
 5566 operand indOffset32(any_RegP reg, immL32 off)
 5567 %{
 5568   constraint(ALLOC_IN_RC(ptr_reg));
 5569   match(AddP reg off);
 5570 
 5571   format %{ "[$reg + $off (32-bit)]" %}
 5572   interface(MEMORY_INTER) %{
 5573     base($reg);
 5574     index(0x4);
 5575     scale(0x0);
 5576     disp($off);
 5577   %}
 5578 %}
 5579 
 5580 // Indirect Memory Plus Index Register Plus Offset Operand
 5581 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5582 %{
 5583   constraint(ALLOC_IN_RC(ptr_reg));
 5584   match(AddP (AddP reg lreg) off);
 5585 
 5586   op_cost(10);
 5587   format %{"[$reg + $off + $lreg]" %}
 5588   interface(MEMORY_INTER) %{
 5589     base($reg);
 5590     index($lreg);
 5591     scale(0x0);
 5592     disp($off);
 5593   %}
 5594 %}
 5595 
 5596 // Indirect Memory Plus Index Register Plus Offset Operand
 5597 operand indIndex(any_RegP reg, rRegL lreg)
 5598 %{
 5599   constraint(ALLOC_IN_RC(ptr_reg));
 5600   match(AddP reg lreg);
 5601 
 5602   op_cost(10);
 5603   format %{"[$reg + $lreg]" %}
 5604   interface(MEMORY_INTER) %{
 5605     base($reg);
 5606     index($lreg);
 5607     scale(0x0);
 5608     disp(0x0);
 5609   %}
 5610 %}
 5611 
 5612 // Indirect Memory Times Scale Plus Index Register
 5613 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5614 %{
 5615   constraint(ALLOC_IN_RC(ptr_reg));
 5616   match(AddP reg (LShiftL lreg scale));
 5617 
 5618   op_cost(10);
 5619   format %{"[$reg + $lreg << $scale]" %}
 5620   interface(MEMORY_INTER) %{
 5621     base($reg);
 5622     index($lreg);
 5623     scale($scale);
 5624     disp(0x0);
 5625   %}
 5626 %}
 5627 
 5628 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5629 %{
 5630   constraint(ALLOC_IN_RC(ptr_reg));
 5631   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5632   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5633 
 5634   op_cost(10);
 5635   format %{"[$reg + pos $idx << $scale]" %}
 5636   interface(MEMORY_INTER) %{
 5637     base($reg);
 5638     index($idx);
 5639     scale($scale);
 5640     disp(0x0);
 5641   %}
 5642 %}
 5643 
 5644 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5645 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5646 %{
 5647   constraint(ALLOC_IN_RC(ptr_reg));
 5648   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5649 
 5650   op_cost(10);
 5651   format %{"[$reg + $off + $lreg << $scale]" %}
 5652   interface(MEMORY_INTER) %{
 5653     base($reg);
 5654     index($lreg);
 5655     scale($scale);
 5656     disp($off);
 5657   %}
 5658 %}
 5659 
 5660 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5661 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5662 %{
 5663   constraint(ALLOC_IN_RC(ptr_reg));
 5664   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5665   match(AddP (AddP reg (ConvI2L idx)) off);
 5666 
 5667   op_cost(10);
 5668   format %{"[$reg + $off + $idx]" %}
 5669   interface(MEMORY_INTER) %{
 5670     base($reg);
 5671     index($idx);
 5672     scale(0x0);
 5673     disp($off);
 5674   %}
 5675 %}
 5676 
 5677 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5678 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5682   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5683 
 5684   op_cost(10);
 5685   format %{"[$reg + $off + $idx << $scale]" %}
 5686   interface(MEMORY_INTER) %{
 5687     base($reg);
 5688     index($idx);
 5689     scale($scale);
 5690     disp($off);
 5691   %}
 5692 %}
 5693 
 5694 // Indirect Narrow Oop Plus Offset Operand
 5695 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5696 // we can't free r12 even with CompressedOops::base() == nullptr.
 5697 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5698   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5699   constraint(ALLOC_IN_RC(ptr_reg));
 5700   match(AddP (DecodeN reg) off);
 5701 
 5702   op_cost(10);
 5703   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5704   interface(MEMORY_INTER) %{
 5705     base(0xc); // R12
 5706     index($reg);
 5707     scale(0x3);
 5708     disp($off);
 5709   %}
 5710 %}
 5711 
 5712 // Indirect Memory Operand
 5713 operand indirectNarrow(rRegN reg)
 5714 %{
 5715   predicate(CompressedOops::shift() == 0);
 5716   constraint(ALLOC_IN_RC(ptr_reg));
 5717   match(DecodeN reg);
 5718 
 5719   format %{ "[$reg]" %}
 5720   interface(MEMORY_INTER) %{
 5721     base($reg);
 5722     index(0x4);
 5723     scale(0x0);
 5724     disp(0x0);
 5725   %}
 5726 %}
 5727 
 5728 // Indirect Memory Plus Short Offset Operand
 5729 operand indOffset8Narrow(rRegN reg, immL8 off)
 5730 %{
 5731   predicate(CompressedOops::shift() == 0);
 5732   constraint(ALLOC_IN_RC(ptr_reg));
 5733   match(AddP (DecodeN reg) off);
 5734 
 5735   format %{ "[$reg + $off (8-bit)]" %}
 5736   interface(MEMORY_INTER) %{
 5737     base($reg);
 5738     index(0x4);
 5739     scale(0x0);
 5740     disp($off);
 5741   %}
 5742 %}
 5743 
 5744 // Indirect Memory Plus Long Offset Operand
 5745 operand indOffset32Narrow(rRegN reg, immL32 off)
 5746 %{
 5747   predicate(CompressedOops::shift() == 0);
 5748   constraint(ALLOC_IN_RC(ptr_reg));
 5749   match(AddP (DecodeN reg) off);
 5750 
 5751   format %{ "[$reg + $off (32-bit)]" %}
 5752   interface(MEMORY_INTER) %{
 5753     base($reg);
 5754     index(0x4);
 5755     scale(0x0);
 5756     disp($off);
 5757   %}
 5758 %}
 5759 
 5760 // Indirect Memory Plus Index Register Plus Offset Operand
 5761 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5762 %{
 5763   predicate(CompressedOops::shift() == 0);
 5764   constraint(ALLOC_IN_RC(ptr_reg));
 5765   match(AddP (AddP (DecodeN reg) lreg) off);
 5766 
 5767   op_cost(10);
 5768   format %{"[$reg + $off + $lreg]" %}
 5769   interface(MEMORY_INTER) %{
 5770     base($reg);
 5771     index($lreg);
 5772     scale(0x0);
 5773     disp($off);
 5774   %}
 5775 %}
 5776 
 5777 // Indirect Memory Plus Index Register Plus Offset Operand
 5778 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5779 %{
 5780   predicate(CompressedOops::shift() == 0);
 5781   constraint(ALLOC_IN_RC(ptr_reg));
 5782   match(AddP (DecodeN reg) lreg);
 5783 
 5784   op_cost(10);
 5785   format %{"[$reg + $lreg]" %}
 5786   interface(MEMORY_INTER) %{
 5787     base($reg);
 5788     index($lreg);
 5789     scale(0x0);
 5790     disp(0x0);
 5791   %}
 5792 %}
 5793 
 5794 // Indirect Memory Times Scale Plus Index Register
 5795 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5796 %{
 5797   predicate(CompressedOops::shift() == 0);
 5798   constraint(ALLOC_IN_RC(ptr_reg));
 5799   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5800 
 5801   op_cost(10);
 5802   format %{"[$reg + $lreg << $scale]" %}
 5803   interface(MEMORY_INTER) %{
 5804     base($reg);
 5805     index($lreg);
 5806     scale($scale);
 5807     disp(0x0);
 5808   %}
 5809 %}
 5810 
 5811 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5812 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5813 %{
 5814   predicate(CompressedOops::shift() == 0);
 5815   constraint(ALLOC_IN_RC(ptr_reg));
 5816   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5817 
 5818   op_cost(10);
 5819   format %{"[$reg + $off + $lreg << $scale]" %}
 5820   interface(MEMORY_INTER) %{
 5821     base($reg);
 5822     index($lreg);
 5823     scale($scale);
 5824     disp($off);
 5825   %}
 5826 %}
 5827 
 5828 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5829 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5830 %{
 5831   constraint(ALLOC_IN_RC(ptr_reg));
 5832   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5833   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5834 
 5835   op_cost(10);
 5836   format %{"[$reg + $off + $idx]" %}
 5837   interface(MEMORY_INTER) %{
 5838     base($reg);
 5839     index($idx);
 5840     scale(0x0);
 5841     disp($off);
 5842   %}
 5843 %}
 5844 
 5845 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5846 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5847 %{
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5850   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5851 
 5852   op_cost(10);
 5853   format %{"[$reg + $off + $idx << $scale]" %}
 5854   interface(MEMORY_INTER) %{
 5855     base($reg);
 5856     index($idx);
 5857     scale($scale);
 5858     disp($off);
 5859   %}
 5860 %}
 5861 
 5862 //----------Special Memory Operands--------------------------------------------
 5863 // Stack Slot Operand - This operand is used for loading and storing temporary
 5864 //                      values on the stack where a match requires a value to
 5865 //                      flow through memory.
 5866 operand stackSlotP(sRegP reg)
 5867 %{
 5868   constraint(ALLOC_IN_RC(stack_slots));
 5869   // No match rule because this operand is only generated in matching
 5870 
 5871   format %{ "[$reg]" %}
 5872   interface(MEMORY_INTER) %{
 5873     base(0x4);   // RSP
 5874     index(0x4);  // No Index
 5875     scale(0x0);  // No Scale
 5876     disp($reg);  // Stack Offset
 5877   %}
 5878 %}
 5879 
 5880 operand stackSlotI(sRegI reg)
 5881 %{
 5882   constraint(ALLOC_IN_RC(stack_slots));
 5883   // No match rule because this operand is only generated in matching
 5884 
 5885   format %{ "[$reg]" %}
 5886   interface(MEMORY_INTER) %{
 5887     base(0x4);   // RSP
 5888     index(0x4);  // No Index
 5889     scale(0x0);  // No Scale
 5890     disp($reg);  // Stack Offset
 5891   %}
 5892 %}
 5893 
 5894 operand stackSlotF(sRegF reg)
 5895 %{
 5896   constraint(ALLOC_IN_RC(stack_slots));
 5897   // No match rule because this operand is only generated in matching
 5898 
 5899   format %{ "[$reg]" %}
 5900   interface(MEMORY_INTER) %{
 5901     base(0x4);   // RSP
 5902     index(0x4);  // No Index
 5903     scale(0x0);  // No Scale
 5904     disp($reg);  // Stack Offset
 5905   %}
 5906 %}
 5907 
 5908 operand stackSlotD(sRegD reg)
 5909 %{
 5910   constraint(ALLOC_IN_RC(stack_slots));
 5911   // No match rule because this operand is only generated in matching
 5912 
 5913   format %{ "[$reg]" %}
 5914   interface(MEMORY_INTER) %{
 5915     base(0x4);   // RSP
 5916     index(0x4);  // No Index
 5917     scale(0x0);  // No Scale
 5918     disp($reg);  // Stack Offset
 5919   %}
 5920 %}
 5921 operand stackSlotL(sRegL reg)
 5922 %{
 5923   constraint(ALLOC_IN_RC(stack_slots));
 5924   // No match rule because this operand is only generated in matching
 5925 
 5926   format %{ "[$reg]" %}
 5927   interface(MEMORY_INTER) %{
 5928     base(0x4);   // RSP
 5929     index(0x4);  // No Index
 5930     scale(0x0);  // No Scale
 5931     disp($reg);  // Stack Offset
 5932   %}
 5933 %}
 5934 
 5935 //----------Conditional Branch Operands----------------------------------------
 5936 // Comparison Op  - This is the operation of the comparison, and is limited to
 5937 //                  the following set of codes:
 5938 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 5939 //
 5940 // Other attributes of the comparison, such as unsignedness, are specified
 5941 // by the comparison instruction that sets a condition code flags register.
 5942 // That result is represented by a flags operand whose subtype is appropriate
 5943 // to the unsignedness (etc.) of the comparison.
 5944 //
 5945 // Later, the instruction which matches both the Comparison Op (a Bool) and
 5946 // the flags (produced by the Cmp) specifies the coding of the comparison op
 5947 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 5948 
 5949 // Comparison Code
 5950 operand cmpOp()
 5951 %{
 5952   match(Bool);
 5953 
 5954   format %{ "" %}
 5955   interface(COND_INTER) %{
 5956     equal(0x4, "e");
 5957     not_equal(0x5, "ne");
 5958     less(0xC, "l");
 5959     greater_equal(0xD, "ge");
 5960     less_equal(0xE, "le");
 5961     greater(0xF, "g");
 5962     overflow(0x0, "o");
 5963     no_overflow(0x1, "no");
 5964   %}
 5965 %}
 5966 
 5967 // Comparison Code, unsigned compare.  Used by FP also, with
 5968 // C2 (unordered) turned into GT or LT already.  The other bits
 5969 // C0 and C3 are turned into Carry & Zero flags.
 5970 operand cmpOpU()
 5971 %{
 5972   match(Bool);
 5973 
 5974   format %{ "" %}
 5975   interface(COND_INTER) %{
 5976     equal(0x4, "e");
 5977     not_equal(0x5, "ne");
 5978     less(0x2, "b");
 5979     greater_equal(0x3, "ae");
 5980     less_equal(0x6, "be");
 5981     greater(0x7, "a");
 5982     overflow(0x0, "o");
 5983     no_overflow(0x1, "no");
 5984   %}
 5985 %}
 5986 
 5987 
 5988 // Floating comparisons that don't require any fixup for the unordered case,
 5989 // If both inputs of the comparison are the same, ZF is always set so we
 5990 // don't need to use cmpOpUCF2 for eq/ne
 5991 operand cmpOpUCF() %{
 5992   match(Bool);
 5993   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 5994             n->as_Bool()->_test._test == BoolTest::ge ||
 5995             n->as_Bool()->_test._test == BoolTest::le ||
 5996             n->as_Bool()->_test._test == BoolTest::gt ||
 5997             n->in(1)->in(1) == n->in(1)->in(2));
 5998   format %{ "" %}
 5999   interface(COND_INTER) %{
 6000     equal(0xb, "np");
 6001     not_equal(0xa, "p");
 6002     less(0x2, "b");
 6003     greater_equal(0x3, "ae");
 6004     less_equal(0x6, "be");
 6005     greater(0x7, "a");
 6006     overflow(0x0, "o");
 6007     no_overflow(0x1, "no");
 6008   %}
 6009 %}
 6010 
 6011 
 6012 // Floating comparisons that can be fixed up with extra conditional jumps
 6013 operand cmpOpUCF2() %{
 6014   match(Bool);
 6015   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6016              n->as_Bool()->_test._test == BoolTest::eq) &&
 6017             n->in(1)->in(1) != n->in(1)->in(2));
 6018   format %{ "" %}
 6019   interface(COND_INTER) %{
 6020     equal(0x4, "e");
 6021     not_equal(0x5, "ne");
 6022     less(0x2, "b");
 6023     greater_equal(0x3, "ae");
 6024     less_equal(0x6, "be");
 6025     greater(0x7, "a");
 6026     overflow(0x0, "o");
 6027     no_overflow(0x1, "no");
 6028   %}
 6029 %}
 6030 
 6031 // Operands for bound floating pointer register arguments
 6032 operand rxmm0() %{
 6033   constraint(ALLOC_IN_RC(xmm0_reg));
 6034   match(VecX);
 6035   format%{%}
 6036   interface(REG_INTER);
 6037 %}
 6038 
 6039 // Vectors
 6040 
 6041 // Dummy generic vector class. Should be used for all vector operands.
 6042 // Replaced with vec[SDXYZ] during post-selection pass.
 6043 operand vec() %{
 6044   constraint(ALLOC_IN_RC(dynamic));
 6045   match(VecX);
 6046   match(VecY);
 6047   match(VecZ);
 6048   match(VecS);
 6049   match(VecD);
 6050 
 6051   format %{ %}
 6052   interface(REG_INTER);
 6053 %}
 6054 
 6055 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6056 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6057 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6058 // runtime code generation via reg_class_dynamic.
 6059 operand legVec() %{
 6060   constraint(ALLOC_IN_RC(dynamic));
 6061   match(VecX);
 6062   match(VecY);
 6063   match(VecZ);
 6064   match(VecS);
 6065   match(VecD);
 6066 
 6067   format %{ %}
 6068   interface(REG_INTER);
 6069 %}
 6070 
 6071 // Replaces vec during post-selection cleanup. See above.
 6072 operand vecS() %{
 6073   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6074   match(VecS);
 6075 
 6076   format %{ %}
 6077   interface(REG_INTER);
 6078 %}
 6079 
 6080 // Replaces legVec during post-selection cleanup. See above.
 6081 operand legVecS() %{
 6082   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6083   match(VecS);
 6084 
 6085   format %{ %}
 6086   interface(REG_INTER);
 6087 %}
 6088 
 6089 // Replaces vec during post-selection cleanup. See above.
 6090 operand vecD() %{
 6091   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6092   match(VecD);
 6093 
 6094   format %{ %}
 6095   interface(REG_INTER);
 6096 %}
 6097 
 6098 // Replaces legVec during post-selection cleanup. See above.
 6099 operand legVecD() %{
 6100   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6101   match(VecD);
 6102 
 6103   format %{ %}
 6104   interface(REG_INTER);
 6105 %}
 6106 
 6107 // Replaces vec during post-selection cleanup. See above.
 6108 operand vecX() %{
 6109   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6110   match(VecX);
 6111 
 6112   format %{ %}
 6113   interface(REG_INTER);
 6114 %}
 6115 
 6116 // Replaces legVec during post-selection cleanup. See above.
 6117 operand legVecX() %{
 6118   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6119   match(VecX);
 6120 
 6121   format %{ %}
 6122   interface(REG_INTER);
 6123 %}
 6124 
 6125 // Replaces vec during post-selection cleanup. See above.
 6126 operand vecY() %{
 6127   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6128   match(VecY);
 6129 
 6130   format %{ %}
 6131   interface(REG_INTER);
 6132 %}
 6133 
 6134 // Replaces legVec during post-selection cleanup. See above.
 6135 operand legVecY() %{
 6136   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6137   match(VecY);
 6138 
 6139   format %{ %}
 6140   interface(REG_INTER);
 6141 %}
 6142 
 6143 // Replaces vec during post-selection cleanup. See above.
 6144 operand vecZ() %{
 6145   constraint(ALLOC_IN_RC(vectorz_reg));
 6146   match(VecZ);
 6147 
 6148   format %{ %}
 6149   interface(REG_INTER);
 6150 %}
 6151 
 6152 // Replaces legVec during post-selection cleanup. See above.
 6153 operand legVecZ() %{
 6154   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6155   match(VecZ);
 6156 
 6157   format %{ %}
 6158   interface(REG_INTER);
 6159 %}
 6160 
 6161 //----------OPERAND CLASSES----------------------------------------------------
 6162 // Operand Classes are groups of operands that are used as to simplify
 6163 // instruction definitions by not requiring the AD writer to specify separate
 6164 // instructions for every form of operand when the instruction accepts
 6165 // multiple operand types with the same basic encoding and format.  The classic
 6166 // case of this is memory operands.
 6167 
 6168 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6169                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6170                indCompressedOopOffset,
 6171                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6172                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6173                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6174 
 6175 //----------PIPELINE-----------------------------------------------------------
 6176 // Rules which define the behavior of the target architectures pipeline.
 6177 pipeline %{
 6178 
 6179 //----------ATTRIBUTES---------------------------------------------------------
 6180 attributes %{
 6181   variable_size_instructions;        // Fixed size instructions
 6182   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6183   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6184   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6185   instruction_fetch_units = 1;       // of 16 bytes
 6186 %}
 6187 
 6188 //----------RESOURCES----------------------------------------------------------
 6189 // Resources are the functional units available to the machine
 6190 
 6191 // Generic P2/P3 pipeline
 6192 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6193 // 3 instructions decoded per cycle.
 6194 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6195 // 3 ALU op, only ALU0 handles mul instructions.
 6196 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6197            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6198            BR, FPU,
 6199            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6200 
 6201 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6202 // Pipeline Description specifies the stages in the machine's pipeline
 6203 
 6204 // Generic P2/P3 pipeline
 6205 pipe_desc(S0, S1, S2, S3, S4, S5);
 6206 
 6207 //----------PIPELINE CLASSES---------------------------------------------------
 6208 // Pipeline Classes describe the stages in which input and output are
 6209 // referenced by the hardware pipeline.
 6210 
 6211 // Naming convention: ialu or fpu
 6212 // Then: _reg
 6213 // Then: _reg if there is a 2nd register
 6214 // Then: _long if it's a pair of instructions implementing a long
 6215 // Then: _fat if it requires the big decoder
 6216 //   Or: _mem if it requires the big decoder and a memory unit.
 6217 
 6218 // Integer ALU reg operation
 6219 pipe_class ialu_reg(rRegI dst)
 6220 %{
 6221     single_instruction;
 6222     dst    : S4(write);
 6223     dst    : S3(read);
 6224     DECODE : S0;        // any decoder
 6225     ALU    : S3;        // any alu
 6226 %}
 6227 
 6228 // Long ALU reg operation
 6229 pipe_class ialu_reg_long(rRegL dst)
 6230 %{
 6231     instruction_count(2);
 6232     dst    : S4(write);
 6233     dst    : S3(read);
 6234     DECODE : S0(2);     // any 2 decoders
 6235     ALU    : S3(2);     // both alus
 6236 %}
 6237 
 6238 // Integer ALU reg operation using big decoder
 6239 pipe_class ialu_reg_fat(rRegI dst)
 6240 %{
 6241     single_instruction;
 6242     dst    : S4(write);
 6243     dst    : S3(read);
 6244     D0     : S0;        // big decoder only
 6245     ALU    : S3;        // any alu
 6246 %}
 6247 
 6248 // Integer ALU reg-reg operation
 6249 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6250 %{
 6251     single_instruction;
 6252     dst    : S4(write);
 6253     src    : S3(read);
 6254     DECODE : S0;        // any decoder
 6255     ALU    : S3;        // any alu
 6256 %}
 6257 
 6258 // Integer ALU reg-reg operation
 6259 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6260 %{
 6261     single_instruction;
 6262     dst    : S4(write);
 6263     src    : S3(read);
 6264     D0     : S0;        // big decoder only
 6265     ALU    : S3;        // any alu
 6266 %}
 6267 
 6268 // Integer ALU reg-mem operation
 6269 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6270 %{
 6271     single_instruction;
 6272     dst    : S5(write);
 6273     mem    : S3(read);
 6274     D0     : S0;        // big decoder only
 6275     ALU    : S4;        // any alu
 6276     MEM    : S3;        // any mem
 6277 %}
 6278 
 6279 // Integer mem operation (prefetch)
 6280 pipe_class ialu_mem(memory mem)
 6281 %{
 6282     single_instruction;
 6283     mem    : S3(read);
 6284     D0     : S0;        // big decoder only
 6285     MEM    : S3;        // any mem
 6286 %}
 6287 
 6288 // Integer Store to Memory
 6289 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6290 %{
 6291     single_instruction;
 6292     mem    : S3(read);
 6293     src    : S5(read);
 6294     D0     : S0;        // big decoder only
 6295     ALU    : S4;        // any alu
 6296     MEM    : S3;
 6297 %}
 6298 
 6299 // // Long Store to Memory
 6300 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6301 // %{
 6302 //     instruction_count(2);
 6303 //     mem    : S3(read);
 6304 //     src    : S5(read);
 6305 //     D0     : S0(2);          // big decoder only; twice
 6306 //     ALU    : S4(2);     // any 2 alus
 6307 //     MEM    : S3(2);  // Both mems
 6308 // %}
 6309 
 6310 // Integer Store to Memory
 6311 pipe_class ialu_mem_imm(memory mem)
 6312 %{
 6313     single_instruction;
 6314     mem    : S3(read);
 6315     D0     : S0;        // big decoder only
 6316     ALU    : S4;        // any alu
 6317     MEM    : S3;
 6318 %}
 6319 
 6320 // Integer ALU0 reg-reg operation
 6321 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6322 %{
 6323     single_instruction;
 6324     dst    : S4(write);
 6325     src    : S3(read);
 6326     D0     : S0;        // Big decoder only
 6327     ALU0   : S3;        // only alu0
 6328 %}
 6329 
 6330 // Integer ALU0 reg-mem operation
 6331 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6332 %{
 6333     single_instruction;
 6334     dst    : S5(write);
 6335     mem    : S3(read);
 6336     D0     : S0;        // big decoder only
 6337     ALU0   : S4;        // ALU0 only
 6338     MEM    : S3;        // any mem
 6339 %}
 6340 
 6341 // Integer ALU reg-reg operation
 6342 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6343 %{
 6344     single_instruction;
 6345     cr     : S4(write);
 6346     src1   : S3(read);
 6347     src2   : S3(read);
 6348     DECODE : S0;        // any decoder
 6349     ALU    : S3;        // any alu
 6350 %}
 6351 
 6352 // Integer ALU reg-imm operation
 6353 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6354 %{
 6355     single_instruction;
 6356     cr     : S4(write);
 6357     src1   : S3(read);
 6358     DECODE : S0;        // any decoder
 6359     ALU    : S3;        // any alu
 6360 %}
 6361 
 6362 // Integer ALU reg-mem operation
 6363 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6364 %{
 6365     single_instruction;
 6366     cr     : S4(write);
 6367     src1   : S3(read);
 6368     src2   : S3(read);
 6369     D0     : S0;        // big decoder only
 6370     ALU    : S4;        // any alu
 6371     MEM    : S3;
 6372 %}
 6373 
 6374 // Conditional move reg-reg
 6375 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6376 %{
 6377     instruction_count(4);
 6378     y      : S4(read);
 6379     q      : S3(read);
 6380     p      : S3(read);
 6381     DECODE : S0(4);     // any decoder
 6382 %}
 6383 
 6384 // Conditional move reg-reg
 6385 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6386 %{
 6387     single_instruction;
 6388     dst    : S4(write);
 6389     src    : S3(read);
 6390     cr     : S3(read);
 6391     DECODE : S0;        // any decoder
 6392 %}
 6393 
 6394 // Conditional move reg-mem
 6395 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6396 %{
 6397     single_instruction;
 6398     dst    : S4(write);
 6399     src    : S3(read);
 6400     cr     : S3(read);
 6401     DECODE : S0;        // any decoder
 6402     MEM    : S3;
 6403 %}
 6404 
 6405 // Conditional move reg-reg long
 6406 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6407 %{
 6408     single_instruction;
 6409     dst    : S4(write);
 6410     src    : S3(read);
 6411     cr     : S3(read);
 6412     DECODE : S0(2);     // any 2 decoders
 6413 %}
 6414 
 6415 // Float reg-reg operation
 6416 pipe_class fpu_reg(regD dst)
 6417 %{
 6418     instruction_count(2);
 6419     dst    : S3(read);
 6420     DECODE : S0(2);     // any 2 decoders
 6421     FPU    : S3;
 6422 %}
 6423 
 6424 // Float reg-reg operation
 6425 pipe_class fpu_reg_reg(regD dst, regD src)
 6426 %{
 6427     instruction_count(2);
 6428     dst    : S4(write);
 6429     src    : S3(read);
 6430     DECODE : S0(2);     // any 2 decoders
 6431     FPU    : S3;
 6432 %}
 6433 
 6434 // Float reg-reg operation
 6435 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6436 %{
 6437     instruction_count(3);
 6438     dst    : S4(write);
 6439     src1   : S3(read);
 6440     src2   : S3(read);
 6441     DECODE : S0(3);     // any 3 decoders
 6442     FPU    : S3(2);
 6443 %}
 6444 
 6445 // Float reg-reg operation
 6446 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6447 %{
 6448     instruction_count(4);
 6449     dst    : S4(write);
 6450     src1   : S3(read);
 6451     src2   : S3(read);
 6452     src3   : S3(read);
 6453     DECODE : S0(4);     // any 3 decoders
 6454     FPU    : S3(2);
 6455 %}
 6456 
 6457 // Float reg-reg operation
 6458 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6459 %{
 6460     instruction_count(4);
 6461     dst    : S4(write);
 6462     src1   : S3(read);
 6463     src2   : S3(read);
 6464     src3   : S3(read);
 6465     DECODE : S1(3);     // any 3 decoders
 6466     D0     : S0;        // Big decoder only
 6467     FPU    : S3(2);
 6468     MEM    : S3;
 6469 %}
 6470 
 6471 // Float reg-mem operation
 6472 pipe_class fpu_reg_mem(regD dst, memory mem)
 6473 %{
 6474     instruction_count(2);
 6475     dst    : S5(write);
 6476     mem    : S3(read);
 6477     D0     : S0;        // big decoder only
 6478     DECODE : S1;        // any decoder for FPU POP
 6479     FPU    : S4;
 6480     MEM    : S3;        // any mem
 6481 %}
 6482 
 6483 // Float reg-mem operation
 6484 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6485 %{
 6486     instruction_count(3);
 6487     dst    : S5(write);
 6488     src1   : S3(read);
 6489     mem    : S3(read);
 6490     D0     : S0;        // big decoder only
 6491     DECODE : S1(2);     // any decoder for FPU POP
 6492     FPU    : S4;
 6493     MEM    : S3;        // any mem
 6494 %}
 6495 
 6496 // Float mem-reg operation
 6497 pipe_class fpu_mem_reg(memory mem, regD src)
 6498 %{
 6499     instruction_count(2);
 6500     src    : S5(read);
 6501     mem    : S3(read);
 6502     DECODE : S0;        // any decoder for FPU PUSH
 6503     D0     : S1;        // big decoder only
 6504     FPU    : S4;
 6505     MEM    : S3;        // any mem
 6506 %}
 6507 
 6508 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6509 %{
 6510     instruction_count(3);
 6511     src1   : S3(read);
 6512     src2   : S3(read);
 6513     mem    : S3(read);
 6514     DECODE : S0(2);     // any decoder for FPU PUSH
 6515     D0     : S1;        // big decoder only
 6516     FPU    : S4;
 6517     MEM    : S3;        // any mem
 6518 %}
 6519 
 6520 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6521 %{
 6522     instruction_count(3);
 6523     src1   : S3(read);
 6524     src2   : S3(read);
 6525     mem    : S4(read);
 6526     DECODE : S0;        // any decoder for FPU PUSH
 6527     D0     : S0(2);     // big decoder only
 6528     FPU    : S4;
 6529     MEM    : S3(2);     // any mem
 6530 %}
 6531 
 6532 pipe_class fpu_mem_mem(memory dst, memory src1)
 6533 %{
 6534     instruction_count(2);
 6535     src1   : S3(read);
 6536     dst    : S4(read);
 6537     D0     : S0(2);     // big decoder only
 6538     MEM    : S3(2);     // any mem
 6539 %}
 6540 
 6541 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6542 %{
 6543     instruction_count(3);
 6544     src1   : S3(read);
 6545     src2   : S3(read);
 6546     dst    : S4(read);
 6547     D0     : S0(3);     // big decoder only
 6548     FPU    : S4;
 6549     MEM    : S3(3);     // any mem
 6550 %}
 6551 
 6552 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6553 %{
 6554     instruction_count(3);
 6555     src1   : S4(read);
 6556     mem    : S4(read);
 6557     DECODE : S0;        // any decoder for FPU PUSH
 6558     D0     : S0(2);     // big decoder only
 6559     FPU    : S4;
 6560     MEM    : S3(2);     // any mem
 6561 %}
 6562 
 6563 // Float load constant
 6564 pipe_class fpu_reg_con(regD dst)
 6565 %{
 6566     instruction_count(2);
 6567     dst    : S5(write);
 6568     D0     : S0;        // big decoder only for the load
 6569     DECODE : S1;        // any decoder for FPU POP
 6570     FPU    : S4;
 6571     MEM    : S3;        // any mem
 6572 %}
 6573 
 6574 // Float load constant
 6575 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6576 %{
 6577     instruction_count(3);
 6578     dst    : S5(write);
 6579     src    : S3(read);
 6580     D0     : S0;        // big decoder only for the load
 6581     DECODE : S1(2);     // any decoder for FPU POP
 6582     FPU    : S4;
 6583     MEM    : S3;        // any mem
 6584 %}
 6585 
 6586 // UnConditional branch
 6587 pipe_class pipe_jmp(label labl)
 6588 %{
 6589     single_instruction;
 6590     BR   : S3;
 6591 %}
 6592 
 6593 // Conditional branch
 6594 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6595 %{
 6596     single_instruction;
 6597     cr    : S1(read);
 6598     BR    : S3;
 6599 %}
 6600 
 6601 // Allocation idiom
 6602 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6603 %{
 6604     instruction_count(1); force_serialization;
 6605     fixed_latency(6);
 6606     heap_ptr : S3(read);
 6607     DECODE   : S0(3);
 6608     D0       : S2;
 6609     MEM      : S3;
 6610     ALU      : S3(2);
 6611     dst      : S5(write);
 6612     BR       : S5;
 6613 %}
 6614 
 6615 // Generic big/slow expanded idiom
 6616 pipe_class pipe_slow()
 6617 %{
 6618     instruction_count(10); multiple_bundles; force_serialization;
 6619     fixed_latency(100);
 6620     D0  : S0(2);
 6621     MEM : S3(2);
 6622 %}
 6623 
 6624 // The real do-nothing guy
 6625 pipe_class empty()
 6626 %{
 6627     instruction_count(0);
 6628 %}
 6629 
 6630 // Define the class for the Nop node
 6631 define
 6632 %{
 6633    MachNop = empty;
 6634 %}
 6635 
 6636 %}
 6637 
 6638 //----------INSTRUCTIONS-------------------------------------------------------
 6639 //
 6640 // match      -- States which machine-independent subtree may be replaced
 6641 //               by this instruction.
 6642 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6643 //               selection to identify a minimum cost tree of machine
 6644 //               instructions that matches a tree of machine-independent
 6645 //               instructions.
 6646 // format     -- A string providing the disassembly for this instruction.
 6647 //               The value of an instruction's operand may be inserted
 6648 //               by referring to it with a '$' prefix.
 6649 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6650 //               to within an encode class as $primary, $secondary, and $tertiary
 6651 //               rrspectively.  The primary opcode is commonly used to
 6652 //               indicate the type of machine instruction, while secondary
 6653 //               and tertiary are often used for prefix options or addressing
 6654 //               modes.
 6655 // ins_encode -- A list of encode classes with parameters. The encode class
 6656 //               name must have been defined in an 'enc_class' specification
 6657 //               in the encode section of the architecture description.
 6658 
 6659 // ============================================================================
 6660 
 6661 instruct ShouldNotReachHere() %{
 6662   match(Halt);
 6663   format %{ "stop\t# ShouldNotReachHere" %}
 6664   ins_encode %{
 6665     if (is_reachable()) {
 6666       const char* str = __ code_string(_halt_reason);
 6667       __ stop(str);
 6668     }
 6669   %}
 6670   ins_pipe(pipe_slow);
 6671 %}
 6672 
 6673 // ============================================================================
 6674 
 6675 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6676 // Load Float
 6677 instruct MoveF2VL(vlRegF dst, regF src) %{
 6678   match(Set dst src);
 6679   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6680   ins_encode %{
 6681     ShouldNotReachHere();
 6682   %}
 6683   ins_pipe( fpu_reg_reg );
 6684 %}
 6685 
 6686 // Load Float
 6687 instruct MoveF2LEG(legRegF dst, regF src) %{
 6688   match(Set dst src);
 6689   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6690   ins_encode %{
 6691     ShouldNotReachHere();
 6692   %}
 6693   ins_pipe( fpu_reg_reg );
 6694 %}
 6695 
 6696 // Load Float
 6697 instruct MoveVL2F(regF dst, vlRegF src) %{
 6698   match(Set dst src);
 6699   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6700   ins_encode %{
 6701     ShouldNotReachHere();
 6702   %}
 6703   ins_pipe( fpu_reg_reg );
 6704 %}
 6705 
 6706 // Load Float
 6707 instruct MoveLEG2F(regF dst, legRegF src) %{
 6708   match(Set dst src);
 6709   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6710   ins_encode %{
 6711     ShouldNotReachHere();
 6712   %}
 6713   ins_pipe( fpu_reg_reg );
 6714 %}
 6715 
 6716 // Load Double
 6717 instruct MoveD2VL(vlRegD dst, regD src) %{
 6718   match(Set dst src);
 6719   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6720   ins_encode %{
 6721     ShouldNotReachHere();
 6722   %}
 6723   ins_pipe( fpu_reg_reg );
 6724 %}
 6725 
 6726 // Load Double
 6727 instruct MoveD2LEG(legRegD dst, regD src) %{
 6728   match(Set dst src);
 6729   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6730   ins_encode %{
 6731     ShouldNotReachHere();
 6732   %}
 6733   ins_pipe( fpu_reg_reg );
 6734 %}
 6735 
 6736 // Load Double
 6737 instruct MoveVL2D(regD dst, vlRegD src) %{
 6738   match(Set dst src);
 6739   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6740   ins_encode %{
 6741     ShouldNotReachHere();
 6742   %}
 6743   ins_pipe( fpu_reg_reg );
 6744 %}
 6745 
 6746 // Load Double
 6747 instruct MoveLEG2D(regD dst, legRegD src) %{
 6748   match(Set dst src);
 6749   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6750   ins_encode %{
 6751     ShouldNotReachHere();
 6752   %}
 6753   ins_pipe( fpu_reg_reg );
 6754 %}
 6755 
 6756 //----------Load/Store/Move Instructions---------------------------------------
 6757 //----------Load Instructions--------------------------------------------------
 6758 
 6759 // Load Byte (8 bit signed)
 6760 instruct loadB(rRegI dst, memory mem)
 6761 %{
 6762   match(Set dst (LoadB mem));
 6763 
 6764   ins_cost(125);
 6765   format %{ "movsbl  $dst, $mem\t# byte" %}
 6766 
 6767   ins_encode %{
 6768     __ movsbl($dst$$Register, $mem$$Address);
 6769   %}
 6770 
 6771   ins_pipe(ialu_reg_mem);
 6772 %}
 6773 
 6774 // Load Byte (8 bit signed) into Long Register
 6775 instruct loadB2L(rRegL dst, memory mem)
 6776 %{
 6777   match(Set dst (ConvI2L (LoadB mem)));
 6778 
 6779   ins_cost(125);
 6780   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6781 
 6782   ins_encode %{
 6783     __ movsbq($dst$$Register, $mem$$Address);
 6784   %}
 6785 
 6786   ins_pipe(ialu_reg_mem);
 6787 %}
 6788 
 6789 // Load Unsigned Byte (8 bit UNsigned)
 6790 instruct loadUB(rRegI dst, memory mem)
 6791 %{
 6792   match(Set dst (LoadUB mem));
 6793 
 6794   ins_cost(125);
 6795   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6796 
 6797   ins_encode %{
 6798     __ movzbl($dst$$Register, $mem$$Address);
 6799   %}
 6800 
 6801   ins_pipe(ialu_reg_mem);
 6802 %}
 6803 
 6804 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6805 instruct loadUB2L(rRegL dst, memory mem)
 6806 %{
 6807   match(Set dst (ConvI2L (LoadUB mem)));
 6808 
 6809   ins_cost(125);
 6810   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6811 
 6812   ins_encode %{
 6813     __ movzbq($dst$$Register, $mem$$Address);
 6814   %}
 6815 
 6816   ins_pipe(ialu_reg_mem);
 6817 %}
 6818 
 6819 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6820 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6821   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6822   effect(KILL cr);
 6823 
 6824   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6825             "andl    $dst, right_n_bits($mask, 8)" %}
 6826   ins_encode %{
 6827     Register Rdst = $dst$$Register;
 6828     __ movzbq(Rdst, $mem$$Address);
 6829     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6830   %}
 6831   ins_pipe(ialu_reg_mem);
 6832 %}
 6833 
 6834 // Load Short (16 bit signed)
 6835 instruct loadS(rRegI dst, memory mem)
 6836 %{
 6837   match(Set dst (LoadS mem));
 6838 
 6839   ins_cost(125);
 6840   format %{ "movswl $dst, $mem\t# short" %}
 6841 
 6842   ins_encode %{
 6843     __ movswl($dst$$Register, $mem$$Address);
 6844   %}
 6845 
 6846   ins_pipe(ialu_reg_mem);
 6847 %}
 6848 
 6849 // Load Short (16 bit signed) to Byte (8 bit signed)
 6850 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6851   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6852 
 6853   ins_cost(125);
 6854   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6855   ins_encode %{
 6856     __ movsbl($dst$$Register, $mem$$Address);
 6857   %}
 6858   ins_pipe(ialu_reg_mem);
 6859 %}
 6860 
 6861 // Load Short (16 bit signed) into Long Register
 6862 instruct loadS2L(rRegL dst, memory mem)
 6863 %{
 6864   match(Set dst (ConvI2L (LoadS mem)));
 6865 
 6866   ins_cost(125);
 6867   format %{ "movswq $dst, $mem\t# short -> long" %}
 6868 
 6869   ins_encode %{
 6870     __ movswq($dst$$Register, $mem$$Address);
 6871   %}
 6872 
 6873   ins_pipe(ialu_reg_mem);
 6874 %}
 6875 
 6876 // Load Unsigned Short/Char (16 bit UNsigned)
 6877 instruct loadUS(rRegI dst, memory mem)
 6878 %{
 6879   match(Set dst (LoadUS mem));
 6880 
 6881   ins_cost(125);
 6882   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6883 
 6884   ins_encode %{
 6885     __ movzwl($dst$$Register, $mem$$Address);
 6886   %}
 6887 
 6888   ins_pipe(ialu_reg_mem);
 6889 %}
 6890 
 6891 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6892 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6893   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6894 
 6895   ins_cost(125);
 6896   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6897   ins_encode %{
 6898     __ movsbl($dst$$Register, $mem$$Address);
 6899   %}
 6900   ins_pipe(ialu_reg_mem);
 6901 %}
 6902 
 6903 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6904 instruct loadUS2L(rRegL dst, memory mem)
 6905 %{
 6906   match(Set dst (ConvI2L (LoadUS mem)));
 6907 
 6908   ins_cost(125);
 6909   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6910 
 6911   ins_encode %{
 6912     __ movzwq($dst$$Register, $mem$$Address);
 6913   %}
 6914 
 6915   ins_pipe(ialu_reg_mem);
 6916 %}
 6917 
 6918 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 6919 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 6920   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6921 
 6922   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 6923   ins_encode %{
 6924     __ movzbq($dst$$Register, $mem$$Address);
 6925   %}
 6926   ins_pipe(ialu_reg_mem);
 6927 %}
 6928 
 6929 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 6930 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6931   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6932   effect(KILL cr);
 6933 
 6934   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 6935             "andl    $dst, right_n_bits($mask, 16)" %}
 6936   ins_encode %{
 6937     Register Rdst = $dst$$Register;
 6938     __ movzwq(Rdst, $mem$$Address);
 6939     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 6940   %}
 6941   ins_pipe(ialu_reg_mem);
 6942 %}
 6943 
 6944 // Load Integer
 6945 instruct loadI(rRegI dst, memory mem)
 6946 %{
 6947   match(Set dst (LoadI mem));
 6948 
 6949   ins_cost(125);
 6950   format %{ "movl    $dst, $mem\t# int" %}
 6951 
 6952   ins_encode %{
 6953     __ movl($dst$$Register, $mem$$Address);
 6954   %}
 6955 
 6956   ins_pipe(ialu_reg_mem);
 6957 %}
 6958 
 6959 // Load Integer (32 bit signed) to Byte (8 bit signed)
 6960 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6961   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 6962 
 6963   ins_cost(125);
 6964   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 6965   ins_encode %{
 6966     __ movsbl($dst$$Register, $mem$$Address);
 6967   %}
 6968   ins_pipe(ialu_reg_mem);
 6969 %}
 6970 
 6971 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 6972 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 6973   match(Set dst (AndI (LoadI mem) mask));
 6974 
 6975   ins_cost(125);
 6976   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 6977   ins_encode %{
 6978     __ movzbl($dst$$Register, $mem$$Address);
 6979   %}
 6980   ins_pipe(ialu_reg_mem);
 6981 %}
 6982 
 6983 // Load Integer (32 bit signed) to Short (16 bit signed)
 6984 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 6985   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 6986 
 6987   ins_cost(125);
 6988   format %{ "movswl  $dst, $mem\t# int -> short" %}
 6989   ins_encode %{
 6990     __ movswl($dst$$Register, $mem$$Address);
 6991   %}
 6992   ins_pipe(ialu_reg_mem);
 6993 %}
 6994 
 6995 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 6996 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 6997   match(Set dst (AndI (LoadI mem) mask));
 6998 
 6999   ins_cost(125);
 7000   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7001   ins_encode %{
 7002     __ movzwl($dst$$Register, $mem$$Address);
 7003   %}
 7004   ins_pipe(ialu_reg_mem);
 7005 %}
 7006 
 7007 // Load Integer into Long Register
 7008 instruct loadI2L(rRegL dst, memory mem)
 7009 %{
 7010   match(Set dst (ConvI2L (LoadI mem)));
 7011 
 7012   ins_cost(125);
 7013   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7014 
 7015   ins_encode %{
 7016     __ movslq($dst$$Register, $mem$$Address);
 7017   %}
 7018 
 7019   ins_pipe(ialu_reg_mem);
 7020 %}
 7021 
 7022 // Load Integer with mask 0xFF into Long Register
 7023 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7024   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7025 
 7026   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7027   ins_encode %{
 7028     __ movzbq($dst$$Register, $mem$$Address);
 7029   %}
 7030   ins_pipe(ialu_reg_mem);
 7031 %}
 7032 
 7033 // Load Integer with mask 0xFFFF into Long Register
 7034 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7035   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7036 
 7037   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7038   ins_encode %{
 7039     __ movzwq($dst$$Register, $mem$$Address);
 7040   %}
 7041   ins_pipe(ialu_reg_mem);
 7042 %}
 7043 
 7044 // Load Integer with a 31-bit mask into Long Register
 7045 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7046   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7047   effect(KILL cr);
 7048 
 7049   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7050             "andl    $dst, $mask" %}
 7051   ins_encode %{
 7052     Register Rdst = $dst$$Register;
 7053     __ movl(Rdst, $mem$$Address);
 7054     __ andl(Rdst, $mask$$constant);
 7055   %}
 7056   ins_pipe(ialu_reg_mem);
 7057 %}
 7058 
 7059 // Load Unsigned Integer into Long Register
 7060 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7061 %{
 7062   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7063 
 7064   ins_cost(125);
 7065   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7066 
 7067   ins_encode %{
 7068     __ movl($dst$$Register, $mem$$Address);
 7069   %}
 7070 
 7071   ins_pipe(ialu_reg_mem);
 7072 %}
 7073 
 7074 // Load Long
 7075 instruct loadL(rRegL dst, memory mem)
 7076 %{
 7077   match(Set dst (LoadL mem));
 7078 
 7079   ins_cost(125);
 7080   format %{ "movq    $dst, $mem\t# long" %}
 7081 
 7082   ins_encode %{
 7083     __ movq($dst$$Register, $mem$$Address);
 7084   %}
 7085 
 7086   ins_pipe(ialu_reg_mem); // XXX
 7087 %}
 7088 
 7089 // Load Range
 7090 instruct loadRange(rRegI dst, memory mem)
 7091 %{
 7092   match(Set dst (LoadRange mem));
 7093 
 7094   ins_cost(125); // XXX
 7095   format %{ "movl    $dst, $mem\t# range" %}
 7096   ins_encode %{
 7097     __ movl($dst$$Register, $mem$$Address);
 7098   %}
 7099   ins_pipe(ialu_reg_mem);
 7100 %}
 7101 
 7102 // Load Pointer
 7103 instruct loadP(rRegP dst, memory mem)
 7104 %{
 7105   match(Set dst (LoadP mem));
 7106   predicate(n->as_Load()->barrier_data() == 0);
 7107 
 7108   ins_cost(125); // XXX
 7109   format %{ "movq    $dst, $mem\t# ptr" %}
 7110   ins_encode %{
 7111     __ movq($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem); // XXX
 7114 %}
 7115 
 7116 // Load Compressed Pointer
 7117 instruct loadN(rRegN dst, memory mem)
 7118 %{
 7119    predicate(n->as_Load()->barrier_data() == 0);
 7120    match(Set dst (LoadN mem));
 7121 
 7122    ins_cost(125); // XXX
 7123    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7124    ins_encode %{
 7125      __ movl($dst$$Register, $mem$$Address);
 7126    %}
 7127    ins_pipe(ialu_reg_mem); // XXX
 7128 %}
 7129 
 7130 
 7131 // Load Klass Pointer
 7132 instruct loadKlass(rRegP dst, memory mem)
 7133 %{
 7134   match(Set dst (LoadKlass mem));
 7135 
 7136   ins_cost(125); // XXX
 7137   format %{ "movq    $dst, $mem\t# class" %}
 7138   ins_encode %{
 7139     __ movq($dst$$Register, $mem$$Address);
 7140   %}
 7141   ins_pipe(ialu_reg_mem); // XXX
 7142 %}
 7143 
 7144 // Load narrow Klass Pointer
 7145 instruct loadNKlass(rRegN dst, memory mem)
 7146 %{
 7147   predicate(!UseCompactObjectHeaders);
 7148   match(Set dst (LoadNKlass mem));
 7149 
 7150   ins_cost(125); // XXX
 7151   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7152   ins_encode %{
 7153     __ movl($dst$$Register, $mem$$Address);
 7154   %}
 7155   ins_pipe(ialu_reg_mem); // XXX
 7156 %}
 7157 
 7158 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7159 %{
 7160   predicate(UseCompactObjectHeaders);
 7161   match(Set dst (LoadNKlass mem));
 7162   effect(KILL cr);
 7163   ins_cost(125);
 7164   format %{
 7165     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7166     "shrl    $dst, markWord::klass_shift"
 7167   %}
 7168   ins_encode %{
 7169     // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
 7170     // obj-start, so that we can load from the object's mark-word instead.
 7171     Register d = $dst$$Register;
 7172     Address  s = ($mem$$Address).plus_disp(-Type::klass_offset());
 7173     if (UseAPX) {
 7174       __ eshrl(d, s, markWord::klass_shift, false);
 7175     } else {
 7176       __ movl(d, s);
 7177       __ shrl(d, markWord::klass_shift);
 7178     }
 7179   %}
 7180   ins_pipe(ialu_reg_mem);
 7181 %}
 7182 
 7183 // Load Float
 7184 instruct loadF(regF dst, memory mem)
 7185 %{
 7186   match(Set dst (LoadF mem));
 7187 
 7188   ins_cost(145); // XXX
 7189   format %{ "movss   $dst, $mem\t# float" %}
 7190   ins_encode %{
 7191     __ movflt($dst$$XMMRegister, $mem$$Address);
 7192   %}
 7193   ins_pipe(pipe_slow); // XXX
 7194 %}
 7195 
 7196 // Load Double
 7197 instruct loadD_partial(regD dst, memory mem)
 7198 %{
 7199   predicate(!UseXmmLoadAndClearUpper);
 7200   match(Set dst (LoadD mem));
 7201 
 7202   ins_cost(145); // XXX
 7203   format %{ "movlpd  $dst, $mem\t# double" %}
 7204   ins_encode %{
 7205     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7206   %}
 7207   ins_pipe(pipe_slow); // XXX
 7208 %}
 7209 
 7210 instruct loadD(regD dst, memory mem)
 7211 %{
 7212   predicate(UseXmmLoadAndClearUpper);
 7213   match(Set dst (LoadD mem));
 7214 
 7215   ins_cost(145); // XXX
 7216   format %{ "movsd   $dst, $mem\t# double" %}
 7217   ins_encode %{
 7218     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7219   %}
 7220   ins_pipe(pipe_slow); // XXX
 7221 %}
 7222 
 7223 // max = java.lang.Math.max(float a, float b)
 7224 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
 7225   predicate(VM_Version::supports_avx10_2());
 7226   match(Set dst (MaxF a b));
 7227   format %{ "maxF $dst, $a, $b" %}
 7228   ins_encode %{
 7229     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7230   %}
 7231   ins_pipe( pipe_slow );
 7232 %}
 7233 
 7234 // max = java.lang.Math.max(float a, float b)
 7235 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7236   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7237   match(Set dst (MaxF a b));
 7238   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7239   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7240   ins_encode %{
 7241     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7242   %}
 7243   ins_pipe( pipe_slow );
 7244 %}
 7245 
 7246 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7247   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7248   match(Set dst (MaxF a b));
 7249   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7250 
 7251   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7252   ins_encode %{
 7253     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7254                     false /*min*/, true /*single*/);
 7255   %}
 7256   ins_pipe( pipe_slow );
 7257 %}
 7258 
 7259 // max = java.lang.Math.max(double a, double b)
 7260 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
 7261   predicate(VM_Version::supports_avx10_2());
 7262   match(Set dst (MaxD a b));
 7263   format %{ "maxD $dst, $a, $b" %}
 7264   ins_encode %{
 7265     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7266   %}
 7267   ins_pipe( pipe_slow );
 7268 %}
 7269 
 7270 // max = java.lang.Math.max(double a, double b)
 7271 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7272   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7273   match(Set dst (MaxD a b));
 7274   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7275   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7276   ins_encode %{
 7277     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7278   %}
 7279   ins_pipe( pipe_slow );
 7280 %}
 7281 
 7282 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7283   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7284   match(Set dst (MaxD a b));
 7285   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7286 
 7287   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7288   ins_encode %{
 7289     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7290                     false /*min*/, false /*single*/);
 7291   %}
 7292   ins_pipe( pipe_slow );
 7293 %}
 7294 
 7295 // max = java.lang.Math.min(float a, float b)
 7296 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
 7297   predicate(VM_Version::supports_avx10_2());
 7298   match(Set dst (MinF a b));
 7299   format %{ "minF $dst, $a, $b" %}
 7300   ins_encode %{
 7301     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7302   %}
 7303   ins_pipe( pipe_slow );
 7304 %}
 7305 
 7306 // min = java.lang.Math.min(float a, float b)
 7307 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7308   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7309   match(Set dst (MinF a b));
 7310   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7311   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7312   ins_encode %{
 7313     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7314   %}
 7315   ins_pipe( pipe_slow );
 7316 %}
 7317 
 7318 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7319   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7320   match(Set dst (MinF a b));
 7321   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7322 
 7323   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7324   ins_encode %{
 7325     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7326                     true /*min*/, true /*single*/);
 7327   %}
 7328   ins_pipe( pipe_slow );
 7329 %}
 7330 
 7331 // max = java.lang.Math.min(double a, double b)
 7332 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
 7333   predicate(VM_Version::supports_avx10_2());
 7334   match(Set dst (MinD a b));
 7335   format %{ "minD $dst, $a, $b" %}
 7336   ins_encode %{
 7337     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7338   %}
 7339   ins_pipe( pipe_slow );
 7340 %}
 7341 
 7342 // min = java.lang.Math.min(double a, double b)
 7343 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7344   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7345   match(Set dst (MinD a b));
 7346   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7347     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7348   ins_encode %{
 7349     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7350   %}
 7351   ins_pipe( pipe_slow );
 7352 %}
 7353 
 7354 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7355   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7356   match(Set dst (MinD a b));
 7357   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7358 
 7359   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7360   ins_encode %{
 7361     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7362                     true /*min*/, false /*single*/);
 7363   %}
 7364   ins_pipe( pipe_slow );
 7365 %}
 7366 
 7367 // Load Effective Address
 7368 instruct leaP8(rRegP dst, indOffset8 mem)
 7369 %{
 7370   match(Set dst mem);
 7371 
 7372   ins_cost(110); // XXX
 7373   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7374   ins_encode %{
 7375     __ leaq($dst$$Register, $mem$$Address);
 7376   %}
 7377   ins_pipe(ialu_reg_reg_fat);
 7378 %}
 7379 
 7380 instruct leaP32(rRegP dst, indOffset32 mem)
 7381 %{
 7382   match(Set dst mem);
 7383 
 7384   ins_cost(110);
 7385   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7386   ins_encode %{
 7387     __ leaq($dst$$Register, $mem$$Address);
 7388   %}
 7389   ins_pipe(ialu_reg_reg_fat);
 7390 %}
 7391 
 7392 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7393 %{
 7394   match(Set dst mem);
 7395 
 7396   ins_cost(110);
 7397   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7398   ins_encode %{
 7399     __ leaq($dst$$Register, $mem$$Address);
 7400   %}
 7401   ins_pipe(ialu_reg_reg_fat);
 7402 %}
 7403 
 7404 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7405 %{
 7406   match(Set dst mem);
 7407 
 7408   ins_cost(110);
 7409   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7410   ins_encode %{
 7411     __ leaq($dst$$Register, $mem$$Address);
 7412   %}
 7413   ins_pipe(ialu_reg_reg_fat);
 7414 %}
 7415 
 7416 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7417 %{
 7418   match(Set dst mem);
 7419 
 7420   ins_cost(110);
 7421   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7422   ins_encode %{
 7423     __ leaq($dst$$Register, $mem$$Address);
 7424   %}
 7425   ins_pipe(ialu_reg_reg_fat);
 7426 %}
 7427 
 7428 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7429 %{
 7430   match(Set dst mem);
 7431 
 7432   ins_cost(110);
 7433   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7434   ins_encode %{
 7435     __ leaq($dst$$Register, $mem$$Address);
 7436   %}
 7437   ins_pipe(ialu_reg_reg_fat);
 7438 %}
 7439 
 7440 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7441 %{
 7442   match(Set dst mem);
 7443 
 7444   ins_cost(110);
 7445   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7446   ins_encode %{
 7447     __ leaq($dst$$Register, $mem$$Address);
 7448   %}
 7449   ins_pipe(ialu_reg_reg_fat);
 7450 %}
 7451 
 7452 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7453 %{
 7454   match(Set dst mem);
 7455 
 7456   ins_cost(110);
 7457   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7458   ins_encode %{
 7459     __ leaq($dst$$Register, $mem$$Address);
 7460   %}
 7461   ins_pipe(ialu_reg_reg_fat);
 7462 %}
 7463 
 7464 // Load Effective Address which uses Narrow (32-bits) oop
 7465 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7466 %{
 7467   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7468   match(Set dst mem);
 7469 
 7470   ins_cost(110);
 7471   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7472   ins_encode %{
 7473     __ leaq($dst$$Register, $mem$$Address);
 7474   %}
 7475   ins_pipe(ialu_reg_reg_fat);
 7476 %}
 7477 
 7478 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7479 %{
 7480   predicate(CompressedOops::shift() == 0);
 7481   match(Set dst mem);
 7482 
 7483   ins_cost(110); // XXX
 7484   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7485   ins_encode %{
 7486     __ leaq($dst$$Register, $mem$$Address);
 7487   %}
 7488   ins_pipe(ialu_reg_reg_fat);
 7489 %}
 7490 
 7491 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7492 %{
 7493   predicate(CompressedOops::shift() == 0);
 7494   match(Set dst mem);
 7495 
 7496   ins_cost(110);
 7497   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7498   ins_encode %{
 7499     __ leaq($dst$$Register, $mem$$Address);
 7500   %}
 7501   ins_pipe(ialu_reg_reg_fat);
 7502 %}
 7503 
 7504 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7505 %{
 7506   predicate(CompressedOops::shift() == 0);
 7507   match(Set dst mem);
 7508 
 7509   ins_cost(110);
 7510   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7511   ins_encode %{
 7512     __ leaq($dst$$Register, $mem$$Address);
 7513   %}
 7514   ins_pipe(ialu_reg_reg_fat);
 7515 %}
 7516 
 7517 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7518 %{
 7519   predicate(CompressedOops::shift() == 0);
 7520   match(Set dst mem);
 7521 
 7522   ins_cost(110);
 7523   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7524   ins_encode %{
 7525     __ leaq($dst$$Register, $mem$$Address);
 7526   %}
 7527   ins_pipe(ialu_reg_reg_fat);
 7528 %}
 7529 
 7530 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7531 %{
 7532   predicate(CompressedOops::shift() == 0);
 7533   match(Set dst mem);
 7534 
 7535   ins_cost(110);
 7536   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7537   ins_encode %{
 7538     __ leaq($dst$$Register, $mem$$Address);
 7539   %}
 7540   ins_pipe(ialu_reg_reg_fat);
 7541 %}
 7542 
 7543 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7544 %{
 7545   predicate(CompressedOops::shift() == 0);
 7546   match(Set dst mem);
 7547 
 7548   ins_cost(110);
 7549   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7550   ins_encode %{
 7551     __ leaq($dst$$Register, $mem$$Address);
 7552   %}
 7553   ins_pipe(ialu_reg_reg_fat);
 7554 %}
 7555 
 7556 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7557 %{
 7558   predicate(CompressedOops::shift() == 0);
 7559   match(Set dst mem);
 7560 
 7561   ins_cost(110);
 7562   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7563   ins_encode %{
 7564     __ leaq($dst$$Register, $mem$$Address);
 7565   %}
 7566   ins_pipe(ialu_reg_reg_fat);
 7567 %}
 7568 
 7569 instruct loadConI(rRegI dst, immI src)
 7570 %{
 7571   match(Set dst src);
 7572 
 7573   format %{ "movl    $dst, $src\t# int" %}
 7574   ins_encode %{
 7575     __ movl($dst$$Register, $src$$constant);
 7576   %}
 7577   ins_pipe(ialu_reg_fat); // XXX
 7578 %}
 7579 
 7580 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7581 %{
 7582   match(Set dst src);
 7583   effect(KILL cr);
 7584 
 7585   ins_cost(50);
 7586   format %{ "xorl    $dst, $dst\t# int" %}
 7587   ins_encode %{
 7588     __ xorl($dst$$Register, $dst$$Register);
 7589   %}
 7590   ins_pipe(ialu_reg);
 7591 %}
 7592 
 7593 instruct loadConL(rRegL dst, immL src)
 7594 %{
 7595   match(Set dst src);
 7596 
 7597   ins_cost(150);
 7598   format %{ "movq    $dst, $src\t# long" %}
 7599   ins_encode %{
 7600     __ mov64($dst$$Register, $src$$constant);
 7601   %}
 7602   ins_pipe(ialu_reg);
 7603 %}
 7604 
 7605 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7606 %{
 7607   match(Set dst src);
 7608   effect(KILL cr);
 7609 
 7610   ins_cost(50);
 7611   format %{ "xorl    $dst, $dst\t# long" %}
 7612   ins_encode %{
 7613     __ xorl($dst$$Register, $dst$$Register);
 7614   %}
 7615   ins_pipe(ialu_reg); // XXX
 7616 %}
 7617 
 7618 instruct loadConUL32(rRegL dst, immUL32 src)
 7619 %{
 7620   match(Set dst src);
 7621 
 7622   ins_cost(60);
 7623   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7624   ins_encode %{
 7625     __ movl($dst$$Register, $src$$constant);
 7626   %}
 7627   ins_pipe(ialu_reg);
 7628 %}
 7629 
 7630 instruct loadConL32(rRegL dst, immL32 src)
 7631 %{
 7632   match(Set dst src);
 7633 
 7634   ins_cost(70);
 7635   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7636   ins_encode %{
 7637     __ movq($dst$$Register, $src$$constant);
 7638   %}
 7639   ins_pipe(ialu_reg);
 7640 %}
 7641 
 7642 instruct loadConP(rRegP dst, immP con) %{
 7643   match(Set dst con);
 7644 
 7645   format %{ "movq    $dst, $con\t# ptr" %}
 7646   ins_encode %{
 7647     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7648   %}
 7649   ins_pipe(ialu_reg_fat); // XXX
 7650 %}
 7651 
 7652 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7653 %{
 7654   match(Set dst src);
 7655   effect(KILL cr);
 7656 
 7657   ins_cost(50);
 7658   format %{ "xorl    $dst, $dst\t# ptr" %}
 7659   ins_encode %{
 7660     __ xorl($dst$$Register, $dst$$Register);
 7661   %}
 7662   ins_pipe(ialu_reg);
 7663 %}
 7664 
 7665 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7666 %{
 7667   match(Set dst src);
 7668   effect(KILL cr);
 7669 
 7670   ins_cost(60);
 7671   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7672   ins_encode %{
 7673     __ movl($dst$$Register, $src$$constant);
 7674   %}
 7675   ins_pipe(ialu_reg);
 7676 %}
 7677 
 7678 instruct loadConF(regF dst, immF con) %{
 7679   match(Set dst con);
 7680   ins_cost(125);
 7681   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7682   ins_encode %{
 7683     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7684   %}
 7685   ins_pipe(pipe_slow);
 7686 %}
 7687 
 7688 instruct loadConH(regF dst, immH con) %{
 7689   match(Set dst con);
 7690   ins_cost(125);
 7691   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7692   ins_encode %{
 7693     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7694   %}
 7695   ins_pipe(pipe_slow);
 7696 %}
 7697 
 7698 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7699   match(Set dst src);
 7700   effect(KILL cr);
 7701   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7702   ins_encode %{
 7703     __ xorq($dst$$Register, $dst$$Register);
 7704   %}
 7705   ins_pipe(ialu_reg);
 7706 %}
 7707 
 7708 instruct loadConN(rRegN dst, immN src) %{
 7709   match(Set dst src);
 7710 
 7711   ins_cost(125);
 7712   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7713   ins_encode %{
 7714     address con = (address)$src$$constant;
 7715     if (con == nullptr) {
 7716       ShouldNotReachHere();
 7717     } else {
 7718       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7719     }
 7720   %}
 7721   ins_pipe(ialu_reg_fat); // XXX
 7722 %}
 7723 
 7724 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7725   match(Set dst src);
 7726 
 7727   ins_cost(125);
 7728   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7729   ins_encode %{
 7730     address con = (address)$src$$constant;
 7731     if (con == nullptr) {
 7732       ShouldNotReachHere();
 7733     } else {
 7734       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7735     }
 7736   %}
 7737   ins_pipe(ialu_reg_fat); // XXX
 7738 %}
 7739 
 7740 instruct loadConF0(regF dst, immF0 src)
 7741 %{
 7742   match(Set dst src);
 7743   ins_cost(100);
 7744 
 7745   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7746   ins_encode %{
 7747     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7748   %}
 7749   ins_pipe(pipe_slow);
 7750 %}
 7751 
 7752 // Use the same format since predicate() can not be used here.
 7753 instruct loadConD(regD dst, immD con) %{
 7754   match(Set dst con);
 7755   ins_cost(125);
 7756   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7757   ins_encode %{
 7758     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7759   %}
 7760   ins_pipe(pipe_slow);
 7761 %}
 7762 
 7763 instruct loadConD0(regD dst, immD0 src)
 7764 %{
 7765   match(Set dst src);
 7766   ins_cost(100);
 7767 
 7768   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7769   ins_encode %{
 7770     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7771   %}
 7772   ins_pipe(pipe_slow);
 7773 %}
 7774 
 7775 instruct loadSSI(rRegI dst, stackSlotI src)
 7776 %{
 7777   match(Set dst src);
 7778 
 7779   ins_cost(125);
 7780   format %{ "movl    $dst, $src\t# int stk" %}
 7781   ins_encode %{
 7782     __ movl($dst$$Register, $src$$Address);
 7783   %}
 7784   ins_pipe(ialu_reg_mem);
 7785 %}
 7786 
 7787 instruct loadSSL(rRegL dst, stackSlotL src)
 7788 %{
 7789   match(Set dst src);
 7790 
 7791   ins_cost(125);
 7792   format %{ "movq    $dst, $src\t# long stk" %}
 7793   ins_encode %{
 7794     __ movq($dst$$Register, $src$$Address);
 7795   %}
 7796   ins_pipe(ialu_reg_mem);
 7797 %}
 7798 
 7799 instruct loadSSP(rRegP dst, stackSlotP src)
 7800 %{
 7801   match(Set dst src);
 7802 
 7803   ins_cost(125);
 7804   format %{ "movq    $dst, $src\t# ptr stk" %}
 7805   ins_encode %{
 7806     __ movq($dst$$Register, $src$$Address);
 7807   %}
 7808   ins_pipe(ialu_reg_mem);
 7809 %}
 7810 
 7811 instruct loadSSF(regF dst, stackSlotF src)
 7812 %{
 7813   match(Set dst src);
 7814 
 7815   ins_cost(125);
 7816   format %{ "movss   $dst, $src\t# float stk" %}
 7817   ins_encode %{
 7818     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7819   %}
 7820   ins_pipe(pipe_slow); // XXX
 7821 %}
 7822 
 7823 // Use the same format since predicate() can not be used here.
 7824 instruct loadSSD(regD dst, stackSlotD src)
 7825 %{
 7826   match(Set dst src);
 7827 
 7828   ins_cost(125);
 7829   format %{ "movsd   $dst, $src\t# double stk" %}
 7830   ins_encode  %{
 7831     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7832   %}
 7833   ins_pipe(pipe_slow); // XXX
 7834 %}
 7835 
 7836 // Prefetch instructions for allocation.
 7837 // Must be safe to execute with invalid address (cannot fault).
 7838 
 7839 instruct prefetchAlloc( memory mem ) %{
 7840   predicate(AllocatePrefetchInstr==3);
 7841   match(PrefetchAllocation mem);
 7842   ins_cost(125);
 7843 
 7844   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7845   ins_encode %{
 7846     __ prefetchw($mem$$Address);
 7847   %}
 7848   ins_pipe(ialu_mem);
 7849 %}
 7850 
 7851 instruct prefetchAllocNTA( memory mem ) %{
 7852   predicate(AllocatePrefetchInstr==0);
 7853   match(PrefetchAllocation mem);
 7854   ins_cost(125);
 7855 
 7856   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7857   ins_encode %{
 7858     __ prefetchnta($mem$$Address);
 7859   %}
 7860   ins_pipe(ialu_mem);
 7861 %}
 7862 
 7863 instruct prefetchAllocT0( memory mem ) %{
 7864   predicate(AllocatePrefetchInstr==1);
 7865   match(PrefetchAllocation mem);
 7866   ins_cost(125);
 7867 
 7868   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7869   ins_encode %{
 7870     __ prefetcht0($mem$$Address);
 7871   %}
 7872   ins_pipe(ialu_mem);
 7873 %}
 7874 
 7875 instruct prefetchAllocT2( memory mem ) %{
 7876   predicate(AllocatePrefetchInstr==2);
 7877   match(PrefetchAllocation mem);
 7878   ins_cost(125);
 7879 
 7880   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7881   ins_encode %{
 7882     __ prefetcht2($mem$$Address);
 7883   %}
 7884   ins_pipe(ialu_mem);
 7885 %}
 7886 
 7887 //----------Store Instructions-------------------------------------------------
 7888 
 7889 // Store Byte
 7890 instruct storeB(memory mem, rRegI src)
 7891 %{
 7892   match(Set mem (StoreB mem src));
 7893 
 7894   ins_cost(125); // XXX
 7895   format %{ "movb    $mem, $src\t# byte" %}
 7896   ins_encode %{
 7897     __ movb($mem$$Address, $src$$Register);
 7898   %}
 7899   ins_pipe(ialu_mem_reg);
 7900 %}
 7901 
 7902 // Store Char/Short
 7903 instruct storeC(memory mem, rRegI src)
 7904 %{
 7905   match(Set mem (StoreC mem src));
 7906 
 7907   ins_cost(125); // XXX
 7908   format %{ "movw    $mem, $src\t# char/short" %}
 7909   ins_encode %{
 7910     __ movw($mem$$Address, $src$$Register);
 7911   %}
 7912   ins_pipe(ialu_mem_reg);
 7913 %}
 7914 
 7915 // Store Integer
 7916 instruct storeI(memory mem, rRegI src)
 7917 %{
 7918   match(Set mem (StoreI mem src));
 7919 
 7920   ins_cost(125); // XXX
 7921   format %{ "movl    $mem, $src\t# int" %}
 7922   ins_encode %{
 7923     __ movl($mem$$Address, $src$$Register);
 7924   %}
 7925   ins_pipe(ialu_mem_reg);
 7926 %}
 7927 
 7928 // Store Long
 7929 instruct storeL(memory mem, rRegL src)
 7930 %{
 7931   match(Set mem (StoreL mem src));
 7932 
 7933   ins_cost(125); // XXX
 7934   format %{ "movq    $mem, $src\t# long" %}
 7935   ins_encode %{
 7936     __ movq($mem$$Address, $src$$Register);
 7937   %}
 7938   ins_pipe(ialu_mem_reg); // XXX
 7939 %}
 7940 
 7941 // Store Pointer
 7942 instruct storeP(memory mem, any_RegP src)
 7943 %{
 7944   predicate(n->as_Store()->barrier_data() == 0);
 7945   match(Set mem (StoreP mem src));
 7946 
 7947   ins_cost(125); // XXX
 7948   format %{ "movq    $mem, $src\t# ptr" %}
 7949   ins_encode %{
 7950     __ movq($mem$$Address, $src$$Register);
 7951   %}
 7952   ins_pipe(ialu_mem_reg);
 7953 %}
 7954 
 7955 instruct storeImmP0(memory mem, immP0 zero)
 7956 %{
 7957   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 7958   match(Set mem (StoreP mem zero));
 7959 
 7960   ins_cost(125); // XXX
 7961   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 7962   ins_encode %{
 7963     __ movq($mem$$Address, r12);
 7964   %}
 7965   ins_pipe(ialu_mem_reg);
 7966 %}
 7967 
 7968 // Store Null Pointer, mark word, or other simple pointer constant.
 7969 instruct storeImmP(memory mem, immP31 src)
 7970 %{
 7971   predicate(n->as_Store()->barrier_data() == 0);
 7972   match(Set mem (StoreP mem src));
 7973 
 7974   ins_cost(150); // XXX
 7975   format %{ "movq    $mem, $src\t# ptr" %}
 7976   ins_encode %{
 7977     __ movq($mem$$Address, $src$$constant);
 7978   %}
 7979   ins_pipe(ialu_mem_imm);
 7980 %}
 7981 
 7982 // Store Compressed Pointer
 7983 instruct storeN(memory mem, rRegN src)
 7984 %{
 7985   predicate(n->as_Store()->barrier_data() == 0);
 7986   match(Set mem (StoreN mem src));
 7987 
 7988   ins_cost(125); // XXX
 7989   format %{ "movl    $mem, $src\t# compressed ptr" %}
 7990   ins_encode %{
 7991     __ movl($mem$$Address, $src$$Register);
 7992   %}
 7993   ins_pipe(ialu_mem_reg);
 7994 %}
 7995 
 7996 instruct storeNKlass(memory mem, rRegN src)
 7997 %{
 7998   match(Set mem (StoreNKlass mem src));
 7999 
 8000   ins_cost(125); // XXX
 8001   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8002   ins_encode %{
 8003     __ movl($mem$$Address, $src$$Register);
 8004   %}
 8005   ins_pipe(ialu_mem_reg);
 8006 %}
 8007 
 8008 instruct storeImmN0(memory mem, immN0 zero)
 8009 %{
 8010   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8011   match(Set mem (StoreN mem zero));
 8012 
 8013   ins_cost(125); // XXX
 8014   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8015   ins_encode %{
 8016     __ movl($mem$$Address, r12);
 8017   %}
 8018   ins_pipe(ialu_mem_reg);
 8019 %}
 8020 
 8021 instruct storeImmN(memory mem, immN src)
 8022 %{
 8023   predicate(n->as_Store()->barrier_data() == 0);
 8024   match(Set mem (StoreN mem src));
 8025 
 8026   ins_cost(150); // XXX
 8027   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8028   ins_encode %{
 8029     address con = (address)$src$$constant;
 8030     if (con == nullptr) {
 8031       __ movl($mem$$Address, 0);
 8032     } else {
 8033       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8034     }
 8035   %}
 8036   ins_pipe(ialu_mem_imm);
 8037 %}
 8038 
 8039 instruct storeImmNKlass(memory mem, immNKlass src)
 8040 %{
 8041   match(Set mem (StoreNKlass mem src));
 8042 
 8043   ins_cost(150); // XXX
 8044   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8045   ins_encode %{
 8046     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8047   %}
 8048   ins_pipe(ialu_mem_imm);
 8049 %}
 8050 
 8051 // Store Integer Immediate
 8052 instruct storeImmI0(memory mem, immI_0 zero)
 8053 %{
 8054   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8055   match(Set mem (StoreI mem zero));
 8056 
 8057   ins_cost(125); // XXX
 8058   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8059   ins_encode %{
 8060     __ movl($mem$$Address, r12);
 8061   %}
 8062   ins_pipe(ialu_mem_reg);
 8063 %}
 8064 
 8065 instruct storeImmI(memory mem, immI src)
 8066 %{
 8067   match(Set mem (StoreI mem src));
 8068 
 8069   ins_cost(150);
 8070   format %{ "movl    $mem, $src\t# int" %}
 8071   ins_encode %{
 8072     __ movl($mem$$Address, $src$$constant);
 8073   %}
 8074   ins_pipe(ialu_mem_imm);
 8075 %}
 8076 
 8077 // Store Long Immediate
 8078 instruct storeImmL0(memory mem, immL0 zero)
 8079 %{
 8080   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8081   match(Set mem (StoreL mem zero));
 8082 
 8083   ins_cost(125); // XXX
 8084   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8085   ins_encode %{
 8086     __ movq($mem$$Address, r12);
 8087   %}
 8088   ins_pipe(ialu_mem_reg);
 8089 %}
 8090 
 8091 instruct storeImmL(memory mem, immL32 src)
 8092 %{
 8093   match(Set mem (StoreL mem src));
 8094 
 8095   ins_cost(150);
 8096   format %{ "movq    $mem, $src\t# long" %}
 8097   ins_encode %{
 8098     __ movq($mem$$Address, $src$$constant);
 8099   %}
 8100   ins_pipe(ialu_mem_imm);
 8101 %}
 8102 
 8103 // Store Short/Char Immediate
 8104 instruct storeImmC0(memory mem, immI_0 zero)
 8105 %{
 8106   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8107   match(Set mem (StoreC mem zero));
 8108 
 8109   ins_cost(125); // XXX
 8110   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8111   ins_encode %{
 8112     __ movw($mem$$Address, r12);
 8113   %}
 8114   ins_pipe(ialu_mem_reg);
 8115 %}
 8116 
 8117 instruct storeImmI16(memory mem, immI16 src)
 8118 %{
 8119   predicate(UseStoreImmI16);
 8120   match(Set mem (StoreC mem src));
 8121 
 8122   ins_cost(150);
 8123   format %{ "movw    $mem, $src\t# short/char" %}
 8124   ins_encode %{
 8125     __ movw($mem$$Address, $src$$constant);
 8126   %}
 8127   ins_pipe(ialu_mem_imm);
 8128 %}
 8129 
 8130 // Store Byte Immediate
 8131 instruct storeImmB0(memory mem, immI_0 zero)
 8132 %{
 8133   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8134   match(Set mem (StoreB mem zero));
 8135 
 8136   ins_cost(125); // XXX
 8137   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8138   ins_encode %{
 8139     __ movb($mem$$Address, r12);
 8140   %}
 8141   ins_pipe(ialu_mem_reg);
 8142 %}
 8143 
 8144 instruct storeImmB(memory mem, immI8 src)
 8145 %{
 8146   match(Set mem (StoreB mem src));
 8147 
 8148   ins_cost(150); // XXX
 8149   format %{ "movb    $mem, $src\t# byte" %}
 8150   ins_encode %{
 8151     __ movb($mem$$Address, $src$$constant);
 8152   %}
 8153   ins_pipe(ialu_mem_imm);
 8154 %}
 8155 
 8156 // Store Float
 8157 instruct storeF(memory mem, regF src)
 8158 %{
 8159   match(Set mem (StoreF mem src));
 8160 
 8161   ins_cost(95); // XXX
 8162   format %{ "movss   $mem, $src\t# float" %}
 8163   ins_encode %{
 8164     __ movflt($mem$$Address, $src$$XMMRegister);
 8165   %}
 8166   ins_pipe(pipe_slow); // XXX
 8167 %}
 8168 
 8169 // Store immediate Float value (it is faster than store from XMM register)
 8170 instruct storeF0(memory mem, immF0 zero)
 8171 %{
 8172   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8173   match(Set mem (StoreF mem zero));
 8174 
 8175   ins_cost(25); // XXX
 8176   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8177   ins_encode %{
 8178     __ movl($mem$$Address, r12);
 8179   %}
 8180   ins_pipe(ialu_mem_reg);
 8181 %}
 8182 
 8183 instruct storeF_imm(memory mem, immF src)
 8184 %{
 8185   match(Set mem (StoreF mem src));
 8186 
 8187   ins_cost(50);
 8188   format %{ "movl    $mem, $src\t# float" %}
 8189   ins_encode %{
 8190     __ movl($mem$$Address, jint_cast($src$$constant));
 8191   %}
 8192   ins_pipe(ialu_mem_imm);
 8193 %}
 8194 
 8195 // Store Double
 8196 instruct storeD(memory mem, regD src)
 8197 %{
 8198   match(Set mem (StoreD mem src));
 8199 
 8200   ins_cost(95); // XXX
 8201   format %{ "movsd   $mem, $src\t# double" %}
 8202   ins_encode %{
 8203     __ movdbl($mem$$Address, $src$$XMMRegister);
 8204   %}
 8205   ins_pipe(pipe_slow); // XXX
 8206 %}
 8207 
 8208 // Store immediate double 0.0 (it is faster than store from XMM register)
 8209 instruct storeD0_imm(memory mem, immD0 src)
 8210 %{
 8211   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8212   match(Set mem (StoreD mem src));
 8213 
 8214   ins_cost(50);
 8215   format %{ "movq    $mem, $src\t# double 0." %}
 8216   ins_encode %{
 8217     __ movq($mem$$Address, $src$$constant);
 8218   %}
 8219   ins_pipe(ialu_mem_imm);
 8220 %}
 8221 
 8222 instruct storeD0(memory mem, immD0 zero)
 8223 %{
 8224   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8225   match(Set mem (StoreD mem zero));
 8226 
 8227   ins_cost(25); // XXX
 8228   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8229   ins_encode %{
 8230     __ movq($mem$$Address, r12);
 8231   %}
 8232   ins_pipe(ialu_mem_reg);
 8233 %}
 8234 
 8235 instruct storeSSI(stackSlotI dst, rRegI src)
 8236 %{
 8237   match(Set dst src);
 8238 
 8239   ins_cost(100);
 8240   format %{ "movl    $dst, $src\t# int stk" %}
 8241   ins_encode %{
 8242     __ movl($dst$$Address, $src$$Register);
 8243   %}
 8244   ins_pipe( ialu_mem_reg );
 8245 %}
 8246 
 8247 instruct storeSSL(stackSlotL dst, rRegL src)
 8248 %{
 8249   match(Set dst src);
 8250 
 8251   ins_cost(100);
 8252   format %{ "movq    $dst, $src\t# long stk" %}
 8253   ins_encode %{
 8254     __ movq($dst$$Address, $src$$Register);
 8255   %}
 8256   ins_pipe(ialu_mem_reg);
 8257 %}
 8258 
 8259 instruct storeSSP(stackSlotP dst, rRegP src)
 8260 %{
 8261   match(Set dst src);
 8262 
 8263   ins_cost(100);
 8264   format %{ "movq    $dst, $src\t# ptr stk" %}
 8265   ins_encode %{
 8266     __ movq($dst$$Address, $src$$Register);
 8267   %}
 8268   ins_pipe(ialu_mem_reg);
 8269 %}
 8270 
 8271 instruct storeSSF(stackSlotF dst, regF src)
 8272 %{
 8273   match(Set dst src);
 8274 
 8275   ins_cost(95); // XXX
 8276   format %{ "movss   $dst, $src\t# float stk" %}
 8277   ins_encode %{
 8278     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8279   %}
 8280   ins_pipe(pipe_slow); // XXX
 8281 %}
 8282 
 8283 instruct storeSSD(stackSlotD dst, regD src)
 8284 %{
 8285   match(Set dst src);
 8286 
 8287   ins_cost(95); // XXX
 8288   format %{ "movsd   $dst, $src\t# double stk" %}
 8289   ins_encode %{
 8290     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8291   %}
 8292   ins_pipe(pipe_slow); // XXX
 8293 %}
 8294 
 8295 instruct cacheWB(indirect addr)
 8296 %{
 8297   predicate(VM_Version::supports_data_cache_line_flush());
 8298   match(CacheWB addr);
 8299 
 8300   ins_cost(100);
 8301   format %{"cache wb $addr" %}
 8302   ins_encode %{
 8303     assert($addr->index_position() < 0, "should be");
 8304     assert($addr$$disp == 0, "should be");
 8305     __ cache_wb(Address($addr$$base$$Register, 0));
 8306   %}
 8307   ins_pipe(pipe_slow); // XXX
 8308 %}
 8309 
 8310 instruct cacheWBPreSync()
 8311 %{
 8312   predicate(VM_Version::supports_data_cache_line_flush());
 8313   match(CacheWBPreSync);
 8314 
 8315   ins_cost(100);
 8316   format %{"cache wb presync" %}
 8317   ins_encode %{
 8318     __ cache_wbsync(true);
 8319   %}
 8320   ins_pipe(pipe_slow); // XXX
 8321 %}
 8322 
 8323 instruct cacheWBPostSync()
 8324 %{
 8325   predicate(VM_Version::supports_data_cache_line_flush());
 8326   match(CacheWBPostSync);
 8327 
 8328   ins_cost(100);
 8329   format %{"cache wb postsync" %}
 8330   ins_encode %{
 8331     __ cache_wbsync(false);
 8332   %}
 8333   ins_pipe(pipe_slow); // XXX
 8334 %}
 8335 
 8336 //----------BSWAP Instructions-------------------------------------------------
 8337 instruct bytes_reverse_int(rRegI dst) %{
 8338   match(Set dst (ReverseBytesI dst));
 8339 
 8340   format %{ "bswapl  $dst" %}
 8341   ins_encode %{
 8342     __ bswapl($dst$$Register);
 8343   %}
 8344   ins_pipe( ialu_reg );
 8345 %}
 8346 
 8347 instruct bytes_reverse_long(rRegL dst) %{
 8348   match(Set dst (ReverseBytesL dst));
 8349 
 8350   format %{ "bswapq  $dst" %}
 8351   ins_encode %{
 8352     __ bswapq($dst$$Register);
 8353   %}
 8354   ins_pipe( ialu_reg);
 8355 %}
 8356 
 8357 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8358   match(Set dst (ReverseBytesUS dst));
 8359   effect(KILL cr);
 8360 
 8361   format %{ "bswapl  $dst\n\t"
 8362             "shrl    $dst,16\n\t" %}
 8363   ins_encode %{
 8364     __ bswapl($dst$$Register);
 8365     __ shrl($dst$$Register, 16);
 8366   %}
 8367   ins_pipe( ialu_reg );
 8368 %}
 8369 
 8370 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8371   match(Set dst (ReverseBytesS dst));
 8372   effect(KILL cr);
 8373 
 8374   format %{ "bswapl  $dst\n\t"
 8375             "sar     $dst,16\n\t" %}
 8376   ins_encode %{
 8377     __ bswapl($dst$$Register);
 8378     __ sarl($dst$$Register, 16);
 8379   %}
 8380   ins_pipe( ialu_reg );
 8381 %}
 8382 
 8383 //---------- Zeros Count Instructions ------------------------------------------
 8384 
 8385 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8386   predicate(UseCountLeadingZerosInstruction);
 8387   match(Set dst (CountLeadingZerosI src));
 8388   effect(KILL cr);
 8389 
 8390   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8391   ins_encode %{
 8392     __ lzcntl($dst$$Register, $src$$Register);
 8393   %}
 8394   ins_pipe(ialu_reg);
 8395 %}
 8396 
 8397 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8398   predicate(UseCountLeadingZerosInstruction);
 8399   match(Set dst (CountLeadingZerosI (LoadI src)));
 8400   effect(KILL cr);
 8401   ins_cost(175);
 8402   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8403   ins_encode %{
 8404     __ lzcntl($dst$$Register, $src$$Address);
 8405   %}
 8406   ins_pipe(ialu_reg_mem);
 8407 %}
 8408 
 8409 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8410   predicate(!UseCountLeadingZerosInstruction);
 8411   match(Set dst (CountLeadingZerosI src));
 8412   effect(KILL cr);
 8413 
 8414   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8415             "jnz     skip\n\t"
 8416             "movl    $dst, -1\n"
 8417       "skip:\n\t"
 8418             "negl    $dst\n\t"
 8419             "addl    $dst, 31" %}
 8420   ins_encode %{
 8421     Register Rdst = $dst$$Register;
 8422     Register Rsrc = $src$$Register;
 8423     Label skip;
 8424     __ bsrl(Rdst, Rsrc);
 8425     __ jccb(Assembler::notZero, skip);
 8426     __ movl(Rdst, -1);
 8427     __ bind(skip);
 8428     __ negl(Rdst);
 8429     __ addl(Rdst, BitsPerInt - 1);
 8430   %}
 8431   ins_pipe(ialu_reg);
 8432 %}
 8433 
 8434 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8435   predicate(UseCountLeadingZerosInstruction);
 8436   match(Set dst (CountLeadingZerosL src));
 8437   effect(KILL cr);
 8438 
 8439   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8440   ins_encode %{
 8441     __ lzcntq($dst$$Register, $src$$Register);
 8442   %}
 8443   ins_pipe(ialu_reg);
 8444 %}
 8445 
 8446 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8447   predicate(UseCountLeadingZerosInstruction);
 8448   match(Set dst (CountLeadingZerosL (LoadL src)));
 8449   effect(KILL cr);
 8450   ins_cost(175);
 8451   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8452   ins_encode %{
 8453     __ lzcntq($dst$$Register, $src$$Address);
 8454   %}
 8455   ins_pipe(ialu_reg_mem);
 8456 %}
 8457 
 8458 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8459   predicate(!UseCountLeadingZerosInstruction);
 8460   match(Set dst (CountLeadingZerosL src));
 8461   effect(KILL cr);
 8462 
 8463   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8464             "jnz     skip\n\t"
 8465             "movl    $dst, -1\n"
 8466       "skip:\n\t"
 8467             "negl    $dst\n\t"
 8468             "addl    $dst, 63" %}
 8469   ins_encode %{
 8470     Register Rdst = $dst$$Register;
 8471     Register Rsrc = $src$$Register;
 8472     Label skip;
 8473     __ bsrq(Rdst, Rsrc);
 8474     __ jccb(Assembler::notZero, skip);
 8475     __ movl(Rdst, -1);
 8476     __ bind(skip);
 8477     __ negl(Rdst);
 8478     __ addl(Rdst, BitsPerLong - 1);
 8479   %}
 8480   ins_pipe(ialu_reg);
 8481 %}
 8482 
 8483 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8484   predicate(UseCountTrailingZerosInstruction);
 8485   match(Set dst (CountTrailingZerosI src));
 8486   effect(KILL cr);
 8487 
 8488   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8489   ins_encode %{
 8490     __ tzcntl($dst$$Register, $src$$Register);
 8491   %}
 8492   ins_pipe(ialu_reg);
 8493 %}
 8494 
 8495 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8496   predicate(UseCountTrailingZerosInstruction);
 8497   match(Set dst (CountTrailingZerosI (LoadI src)));
 8498   effect(KILL cr);
 8499   ins_cost(175);
 8500   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8501   ins_encode %{
 8502     __ tzcntl($dst$$Register, $src$$Address);
 8503   %}
 8504   ins_pipe(ialu_reg_mem);
 8505 %}
 8506 
 8507 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8508   predicate(!UseCountTrailingZerosInstruction);
 8509   match(Set dst (CountTrailingZerosI src));
 8510   effect(KILL cr);
 8511 
 8512   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8513             "jnz     done\n\t"
 8514             "movl    $dst, 32\n"
 8515       "done:" %}
 8516   ins_encode %{
 8517     Register Rdst = $dst$$Register;
 8518     Label done;
 8519     __ bsfl(Rdst, $src$$Register);
 8520     __ jccb(Assembler::notZero, done);
 8521     __ movl(Rdst, BitsPerInt);
 8522     __ bind(done);
 8523   %}
 8524   ins_pipe(ialu_reg);
 8525 %}
 8526 
 8527 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8528   predicate(UseCountTrailingZerosInstruction);
 8529   match(Set dst (CountTrailingZerosL src));
 8530   effect(KILL cr);
 8531 
 8532   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8533   ins_encode %{
 8534     __ tzcntq($dst$$Register, $src$$Register);
 8535   %}
 8536   ins_pipe(ialu_reg);
 8537 %}
 8538 
 8539 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8540   predicate(UseCountTrailingZerosInstruction);
 8541   match(Set dst (CountTrailingZerosL (LoadL src)));
 8542   effect(KILL cr);
 8543   ins_cost(175);
 8544   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8545   ins_encode %{
 8546     __ tzcntq($dst$$Register, $src$$Address);
 8547   %}
 8548   ins_pipe(ialu_reg_mem);
 8549 %}
 8550 
 8551 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8552   predicate(!UseCountTrailingZerosInstruction);
 8553   match(Set dst (CountTrailingZerosL src));
 8554   effect(KILL cr);
 8555 
 8556   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8557             "jnz     done\n\t"
 8558             "movl    $dst, 64\n"
 8559       "done:" %}
 8560   ins_encode %{
 8561     Register Rdst = $dst$$Register;
 8562     Label done;
 8563     __ bsfq(Rdst, $src$$Register);
 8564     __ jccb(Assembler::notZero, done);
 8565     __ movl(Rdst, BitsPerLong);
 8566     __ bind(done);
 8567   %}
 8568   ins_pipe(ialu_reg);
 8569 %}
 8570 
 8571 //--------------- Reverse Operation Instructions ----------------
 8572 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8573   predicate(!VM_Version::supports_gfni());
 8574   match(Set dst (ReverseI src));
 8575   effect(TEMP dst, TEMP rtmp, KILL cr);
 8576   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8577   ins_encode %{
 8578     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8579   %}
 8580   ins_pipe( ialu_reg );
 8581 %}
 8582 
 8583 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8584   predicate(VM_Version::supports_gfni());
 8585   match(Set dst (ReverseI src));
 8586   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8587   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8588   ins_encode %{
 8589     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8590   %}
 8591   ins_pipe( ialu_reg );
 8592 %}
 8593 
 8594 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8595   predicate(!VM_Version::supports_gfni());
 8596   match(Set dst (ReverseL src));
 8597   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8598   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8599   ins_encode %{
 8600     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8601   %}
 8602   ins_pipe( ialu_reg );
 8603 %}
 8604 
 8605 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8606   predicate(VM_Version::supports_gfni());
 8607   match(Set dst (ReverseL src));
 8608   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8609   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8610   ins_encode %{
 8611     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8612   %}
 8613   ins_pipe( ialu_reg );
 8614 %}
 8615 
 8616 //---------- Population Count Instructions -------------------------------------
 8617 
 8618 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8619   predicate(UsePopCountInstruction);
 8620   match(Set dst (PopCountI src));
 8621   effect(KILL cr);
 8622 
 8623   format %{ "popcnt  $dst, $src" %}
 8624   ins_encode %{
 8625     __ popcntl($dst$$Register, $src$$Register);
 8626   %}
 8627   ins_pipe(ialu_reg);
 8628 %}
 8629 
 8630 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8631   predicate(UsePopCountInstruction);
 8632   match(Set dst (PopCountI (LoadI mem)));
 8633   effect(KILL cr);
 8634 
 8635   format %{ "popcnt  $dst, $mem" %}
 8636   ins_encode %{
 8637     __ popcntl($dst$$Register, $mem$$Address);
 8638   %}
 8639   ins_pipe(ialu_reg);
 8640 %}
 8641 
 8642 // Note: Long.bitCount(long) returns an int.
 8643 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8644   predicate(UsePopCountInstruction);
 8645   match(Set dst (PopCountL src));
 8646   effect(KILL cr);
 8647 
 8648   format %{ "popcnt  $dst, $src" %}
 8649   ins_encode %{
 8650     __ popcntq($dst$$Register, $src$$Register);
 8651   %}
 8652   ins_pipe(ialu_reg);
 8653 %}
 8654 
 8655 // Note: Long.bitCount(long) returns an int.
 8656 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8657   predicate(UsePopCountInstruction);
 8658   match(Set dst (PopCountL (LoadL mem)));
 8659   effect(KILL cr);
 8660 
 8661   format %{ "popcnt  $dst, $mem" %}
 8662   ins_encode %{
 8663     __ popcntq($dst$$Register, $mem$$Address);
 8664   %}
 8665   ins_pipe(ialu_reg);
 8666 %}
 8667 
 8668 
 8669 //----------MemBar Instructions-----------------------------------------------
 8670 // Memory barrier flavors
 8671 
 8672 instruct membar_acquire()
 8673 %{
 8674   match(MemBarAcquire);
 8675   match(LoadFence);
 8676   ins_cost(0);
 8677 
 8678   size(0);
 8679   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8680   ins_encode();
 8681   ins_pipe(empty);
 8682 %}
 8683 
 8684 instruct membar_acquire_lock()
 8685 %{
 8686   match(MemBarAcquireLock);
 8687   ins_cost(0);
 8688 
 8689   size(0);
 8690   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8691   ins_encode();
 8692   ins_pipe(empty);
 8693 %}
 8694 
 8695 instruct membar_release()
 8696 %{
 8697   match(MemBarRelease);
 8698   match(StoreFence);
 8699   ins_cost(0);
 8700 
 8701   size(0);
 8702   format %{ "MEMBAR-release ! (empty encoding)" %}
 8703   ins_encode();
 8704   ins_pipe(empty);
 8705 %}
 8706 
 8707 instruct membar_release_lock()
 8708 %{
 8709   match(MemBarReleaseLock);
 8710   ins_cost(0);
 8711 
 8712   size(0);
 8713   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8714   ins_encode();
 8715   ins_pipe(empty);
 8716 %}
 8717 
 8718 instruct membar_volatile(rFlagsReg cr) %{
 8719   match(MemBarVolatile);
 8720   effect(KILL cr);
 8721   ins_cost(400);
 8722 
 8723   format %{
 8724     $$template
 8725     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8726   %}
 8727   ins_encode %{
 8728     __ membar(Assembler::StoreLoad);
 8729   %}
 8730   ins_pipe(pipe_slow);
 8731 %}
 8732 
 8733 instruct unnecessary_membar_volatile()
 8734 %{
 8735   match(MemBarVolatile);
 8736   predicate(Matcher::post_store_load_barrier(n));
 8737   ins_cost(0);
 8738 
 8739   size(0);
 8740   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8741   ins_encode();
 8742   ins_pipe(empty);
 8743 %}
 8744 
 8745 instruct membar_storestore() %{
 8746   match(MemBarStoreStore);
 8747   match(StoreStoreFence);
 8748   ins_cost(0);
 8749 
 8750   size(0);
 8751   format %{ "MEMBAR-storestore (empty encoding)" %}
 8752   ins_encode( );
 8753   ins_pipe(empty);
 8754 %}
 8755 
 8756 //----------Move Instructions--------------------------------------------------
 8757 
 8758 instruct castX2P(rRegP dst, rRegL src)
 8759 %{
 8760   match(Set dst (CastX2P src));
 8761 
 8762   format %{ "movq    $dst, $src\t# long->ptr" %}
 8763   ins_encode %{
 8764     if ($dst$$reg != $src$$reg) {
 8765       __ movptr($dst$$Register, $src$$Register);
 8766     }
 8767   %}
 8768   ins_pipe(ialu_reg_reg); // XXX
 8769 %}
 8770 
 8771 instruct castP2X(rRegL dst, rRegP src)
 8772 %{
 8773   match(Set dst (CastP2X src));
 8774 
 8775   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8776   ins_encode %{
 8777     if ($dst$$reg != $src$$reg) {
 8778       __ movptr($dst$$Register, $src$$Register);
 8779     }
 8780   %}
 8781   ins_pipe(ialu_reg_reg); // XXX
 8782 %}
 8783 
 8784 // Convert oop into int for vectors alignment masking
 8785 instruct convP2I(rRegI dst, rRegP src)
 8786 %{
 8787   match(Set dst (ConvL2I (CastP2X src)));
 8788 
 8789   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8790   ins_encode %{
 8791     __ movl($dst$$Register, $src$$Register);
 8792   %}
 8793   ins_pipe(ialu_reg_reg); // XXX
 8794 %}
 8795 
 8796 // Convert compressed oop into int for vectors alignment masking
 8797 // in case of 32bit oops (heap < 4Gb).
 8798 instruct convN2I(rRegI dst, rRegN src)
 8799 %{
 8800   predicate(CompressedOops::shift() == 0);
 8801   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8802 
 8803   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8804   ins_encode %{
 8805     __ movl($dst$$Register, $src$$Register);
 8806   %}
 8807   ins_pipe(ialu_reg_reg); // XXX
 8808 %}
 8809 
 8810 // Convert oop pointer into compressed form
 8811 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8812   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8813   match(Set dst (EncodeP src));
 8814   effect(KILL cr);
 8815   format %{ "encode_heap_oop $dst,$src" %}
 8816   ins_encode %{
 8817     Register s = $src$$Register;
 8818     Register d = $dst$$Register;
 8819     if (s != d) {
 8820       __ movq(d, s);
 8821     }
 8822     __ encode_heap_oop(d);
 8823   %}
 8824   ins_pipe(ialu_reg_long);
 8825 %}
 8826 
 8827 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8828   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8829   match(Set dst (EncodeP src));
 8830   effect(KILL cr);
 8831   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8832   ins_encode %{
 8833     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8834   %}
 8835   ins_pipe(ialu_reg_long);
 8836 %}
 8837 
 8838 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8839   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8840             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8841   match(Set dst (DecodeN src));
 8842   effect(KILL cr);
 8843   format %{ "decode_heap_oop $dst,$src" %}
 8844   ins_encode %{
 8845     Register s = $src$$Register;
 8846     Register d = $dst$$Register;
 8847     if (s != d) {
 8848       __ movq(d, s);
 8849     }
 8850     __ decode_heap_oop(d);
 8851   %}
 8852   ins_pipe(ialu_reg_long);
 8853 %}
 8854 
 8855 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8856   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8857             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8858   match(Set dst (DecodeN src));
 8859   effect(KILL cr);
 8860   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8861   ins_encode %{
 8862     Register s = $src$$Register;
 8863     Register d = $dst$$Register;
 8864     if (s != d) {
 8865       __ decode_heap_oop_not_null(d, s);
 8866     } else {
 8867       __ decode_heap_oop_not_null(d);
 8868     }
 8869   %}
 8870   ins_pipe(ialu_reg_long);
 8871 %}
 8872 
 8873 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8874   match(Set dst (EncodePKlass src));
 8875   effect(TEMP dst, KILL cr);
 8876   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8877   ins_encode %{
 8878     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8879   %}
 8880   ins_pipe(ialu_reg_long);
 8881 %}
 8882 
 8883 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8884   match(Set dst (DecodeNKlass src));
 8885   effect(TEMP dst, KILL cr);
 8886   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8887   ins_encode %{
 8888     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8889   %}
 8890   ins_pipe(ialu_reg_long);
 8891 %}
 8892 
 8893 //----------Conditional Move---------------------------------------------------
 8894 // Jump
 8895 // dummy instruction for generating temp registers
 8896 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8897   match(Jump (LShiftL switch_val shift));
 8898   ins_cost(350);
 8899   predicate(false);
 8900   effect(TEMP dest);
 8901 
 8902   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8903             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8904   ins_encode %{
 8905     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8906     // to do that and the compiler is using that register as one it can allocate.
 8907     // So we build it all by hand.
 8908     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 8909     // ArrayAddress dispatch(table, index);
 8910     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 8911     __ lea($dest$$Register, $constantaddress);
 8912     __ jmp(dispatch);
 8913   %}
 8914   ins_pipe(pipe_jmp);
 8915 %}
 8916 
 8917 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 8918   match(Jump (AddL (LShiftL switch_val shift) offset));
 8919   ins_cost(350);
 8920   effect(TEMP dest);
 8921 
 8922   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8923             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 8924   ins_encode %{
 8925     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8926     // to do that and the compiler is using that register as one it can allocate.
 8927     // So we build it all by hand.
 8928     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8929     // ArrayAddress dispatch(table, index);
 8930     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8931     __ lea($dest$$Register, $constantaddress);
 8932     __ jmp(dispatch);
 8933   %}
 8934   ins_pipe(pipe_jmp);
 8935 %}
 8936 
 8937 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 8938   match(Jump switch_val);
 8939   ins_cost(350);
 8940   effect(TEMP dest);
 8941 
 8942   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8943             "jmp     [$dest + $switch_val]\n\t" %}
 8944   ins_encode %{
 8945     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8946     // to do that and the compiler is using that register as one it can allocate.
 8947     // So we build it all by hand.
 8948     // Address index(noreg, switch_reg, Address::times_1);
 8949     // ArrayAddress dispatch(table, index);
 8950     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 8951     __ lea($dest$$Register, $constantaddress);
 8952     __ jmp(dispatch);
 8953   %}
 8954   ins_pipe(pipe_jmp);
 8955 %}
 8956 
 8957 // Conditional move
 8958 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 8959 %{
 8960   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 8961   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 8962 
 8963   ins_cost(100); // XXX
 8964   format %{ "setbn$cop $dst\t# signed, int" %}
 8965   ins_encode %{
 8966     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 8967     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 8968   %}
 8969   ins_pipe(ialu_reg);
 8970 %}
 8971 
 8972 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 8973 %{
 8974   predicate(!UseAPX);
 8975   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 8976 
 8977   ins_cost(200); // XXX
 8978   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 8979   ins_encode %{
 8980     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 8981   %}
 8982   ins_pipe(pipe_cmov_reg);
 8983 %}
 8984 
 8985 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 8986 %{
 8987   predicate(UseAPX);
 8988   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 8989 
 8990   ins_cost(200);
 8991   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 8992   ins_encode %{
 8993     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 8994   %}
 8995   ins_pipe(pipe_cmov_reg);
 8996 %}
 8997 
 8998 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 8999 %{
 9000   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9001   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9002 
 9003   ins_cost(100); // XXX
 9004   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9005   ins_encode %{
 9006     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9007     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9008   %}
 9009   ins_pipe(ialu_reg);
 9010 %}
 9011 
 9012 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9013   predicate(!UseAPX);
 9014   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9015 
 9016   ins_cost(200); // XXX
 9017   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9018   ins_encode %{
 9019     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9020   %}
 9021   ins_pipe(pipe_cmov_reg);
 9022 %}
 9023 
 9024 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9025   predicate(UseAPX);
 9026   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9027 
 9028   ins_cost(200);
 9029   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9030   ins_encode %{
 9031     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9032   %}
 9033   ins_pipe(pipe_cmov_reg);
 9034 %}
 9035 
 9036 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9037 %{
 9038   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9039   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9040 
 9041   ins_cost(100); // XXX
 9042   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9043   ins_encode %{
 9044     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9045     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9046   %}
 9047   ins_pipe(ialu_reg);
 9048 %}
 9049 
 9050 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9051   predicate(!UseAPX);
 9052   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9053   ins_cost(200);
 9054   expand %{
 9055     cmovI_regU(cop, cr, dst, src);
 9056   %}
 9057 %}
 9058 
 9059 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9060   predicate(UseAPX);
 9061   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9062   ins_cost(200);
 9063   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9064   ins_encode %{
 9065     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9066   %}
 9067   ins_pipe(pipe_cmov_reg);
 9068 %}
 9069 
 9070 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9071   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9072   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9073 
 9074   ins_cost(200); // XXX
 9075   format %{ "cmovpl  $dst, $src\n\t"
 9076             "cmovnel $dst, $src" %}
 9077   ins_encode %{
 9078     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9079     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9080   %}
 9081   ins_pipe(pipe_cmov_reg);
 9082 %}
 9083 
 9084 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9085   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9086   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9087   effect(TEMP dst);
 9088 
 9089   ins_cost(200);
 9090   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9091             "cmovnel  $dst, $src2" %}
 9092   ins_encode %{
 9093     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9094     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9095   %}
 9096   ins_pipe(pipe_cmov_reg);
 9097 %}
 9098 
 9099 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9100 // inputs of the CMove
 9101 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9102   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9103   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9104   effect(TEMP dst);
 9105 
 9106   ins_cost(200); // XXX
 9107   format %{ "cmovpl  $dst, $src\n\t"
 9108             "cmovnel $dst, $src" %}
 9109   ins_encode %{
 9110     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9111     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9112   %}
 9113   ins_pipe(pipe_cmov_reg);
 9114 %}
 9115 
 9116 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9117 // and parity flag bit is set if any of the operand is a NaN.
 9118 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9119   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9120   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9121   effect(TEMP dst);
 9122 
 9123   ins_cost(200);
 9124   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9125             "cmovnel  $dst, $src2" %}
 9126   ins_encode %{
 9127     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9128     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9129   %}
 9130   ins_pipe(pipe_cmov_reg);
 9131 %}
 9132 
 9133 // Conditional move
 9134 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9135   predicate(!UseAPX);
 9136   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9137 
 9138   ins_cost(250); // XXX
 9139   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9140   ins_encode %{
 9141     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9142   %}
 9143   ins_pipe(pipe_cmov_mem);
 9144 %}
 9145 
 9146 // Conditional move
 9147 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9148 %{
 9149   predicate(UseAPX);
 9150   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9151 
 9152   ins_cost(250);
 9153   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9154   ins_encode %{
 9155     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9156   %}
 9157   ins_pipe(pipe_cmov_mem);
 9158 %}
 9159 
 9160 // Conditional move
 9161 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9162 %{
 9163   predicate(!UseAPX);
 9164   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9165 
 9166   ins_cost(250); // XXX
 9167   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9168   ins_encode %{
 9169     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9170   %}
 9171   ins_pipe(pipe_cmov_mem);
 9172 %}
 9173 
 9174 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9175   predicate(!UseAPX);
 9176   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9177   ins_cost(250);
 9178   expand %{
 9179     cmovI_memU(cop, cr, dst, src);
 9180   %}
 9181 %}
 9182 
 9183 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9184 %{
 9185   predicate(UseAPX);
 9186   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9187 
 9188   ins_cost(250);
 9189   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9190   ins_encode %{
 9191     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9192   %}
 9193   ins_pipe(pipe_cmov_mem);
 9194 %}
 9195 
 9196 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9197 %{
 9198   predicate(UseAPX);
 9199   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9200   ins_cost(250);
 9201   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9202   ins_encode %{
 9203     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9204   %}
 9205   ins_pipe(pipe_cmov_mem);
 9206 %}
 9207 
 9208 // Conditional move
 9209 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9210 %{
 9211   predicate(!UseAPX);
 9212   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9213 
 9214   ins_cost(200); // XXX
 9215   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9216   ins_encode %{
 9217     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9218   %}
 9219   ins_pipe(pipe_cmov_reg);
 9220 %}
 9221 
 9222 // Conditional move ndd
 9223 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9224 %{
 9225   predicate(UseAPX);
 9226   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9227 
 9228   ins_cost(200);
 9229   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9230   ins_encode %{
 9231     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9232   %}
 9233   ins_pipe(pipe_cmov_reg);
 9234 %}
 9235 
 9236 // Conditional move
 9237 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9238 %{
 9239   predicate(!UseAPX);
 9240   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9241 
 9242   ins_cost(200); // XXX
 9243   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9244   ins_encode %{
 9245     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9246   %}
 9247   ins_pipe(pipe_cmov_reg);
 9248 %}
 9249 
 9250 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9251   predicate(!UseAPX);
 9252   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9253   ins_cost(200);
 9254   expand %{
 9255     cmovN_regU(cop, cr, dst, src);
 9256   %}
 9257 %}
 9258 
 9259 // Conditional move ndd
 9260 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9261 %{
 9262   predicate(UseAPX);
 9263   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9264 
 9265   ins_cost(200);
 9266   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9267   ins_encode %{
 9268     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9269   %}
 9270   ins_pipe(pipe_cmov_reg);
 9271 %}
 9272 
 9273 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9274   predicate(UseAPX);
 9275   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9276   ins_cost(200);
 9277   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9278   ins_encode %{
 9279     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9280   %}
 9281   ins_pipe(pipe_cmov_reg);
 9282 %}
 9283 
 9284 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9285   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9286   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9287 
 9288   ins_cost(200); // XXX
 9289   format %{ "cmovpl  $dst, $src\n\t"
 9290             "cmovnel $dst, $src" %}
 9291   ins_encode %{
 9292     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9293     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9294   %}
 9295   ins_pipe(pipe_cmov_reg);
 9296 %}
 9297 
 9298 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9299 // inputs of the CMove
 9300 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9301   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9302   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9303 
 9304   ins_cost(200); // XXX
 9305   format %{ "cmovpl  $dst, $src\n\t"
 9306             "cmovnel $dst, $src" %}
 9307   ins_encode %{
 9308     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9309     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9310   %}
 9311   ins_pipe(pipe_cmov_reg);
 9312 %}
 9313 
 9314 // Conditional move
 9315 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9316 %{
 9317   predicate(!UseAPX);
 9318   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9319 
 9320   ins_cost(200); // XXX
 9321   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9322   ins_encode %{
 9323     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9324   %}
 9325   ins_pipe(pipe_cmov_reg);  // XXX
 9326 %}
 9327 
 9328 // Conditional move ndd
 9329 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9330 %{
 9331   predicate(UseAPX);
 9332   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9333 
 9334   ins_cost(200);
 9335   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9336   ins_encode %{
 9337     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9338   %}
 9339   ins_pipe(pipe_cmov_reg);
 9340 %}
 9341 
 9342 // Conditional move
 9343 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9344 %{
 9345   predicate(!UseAPX);
 9346   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9347 
 9348   ins_cost(200); // XXX
 9349   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9350   ins_encode %{
 9351     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9352   %}
 9353   ins_pipe(pipe_cmov_reg); // XXX
 9354 %}
 9355 
 9356 // Conditional move ndd
 9357 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9358 %{
 9359   predicate(UseAPX);
 9360   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9361 
 9362   ins_cost(200);
 9363   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9364   ins_encode %{
 9365     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9366   %}
 9367   ins_pipe(pipe_cmov_reg);
 9368 %}
 9369 
 9370 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9371   predicate(!UseAPX);
 9372   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9373   ins_cost(200);
 9374   expand %{
 9375     cmovP_regU(cop, cr, dst, src);
 9376   %}
 9377 %}
 9378 
 9379 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9380   predicate(UseAPX);
 9381   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9382   ins_cost(200);
 9383   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9384   ins_encode %{
 9385     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9386   %}
 9387   ins_pipe(pipe_cmov_reg);
 9388 %}
 9389 
 9390 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9391   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9392   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9393 
 9394   ins_cost(200); // XXX
 9395   format %{ "cmovpq  $dst, $src\n\t"
 9396             "cmovneq $dst, $src" %}
 9397   ins_encode %{
 9398     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9399     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9400   %}
 9401   ins_pipe(pipe_cmov_reg);
 9402 %}
 9403 
 9404 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9405   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9406   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9407   effect(TEMP dst);
 9408 
 9409   ins_cost(200);
 9410   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9411             "cmovneq  $dst, $src2" %}
 9412   ins_encode %{
 9413     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9414     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9415   %}
 9416   ins_pipe(pipe_cmov_reg);
 9417 %}
 9418 
 9419 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9420 // inputs of the CMove
 9421 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9422   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9423   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9424 
 9425   ins_cost(200); // XXX
 9426   format %{ "cmovpq  $dst, $src\n\t"
 9427             "cmovneq $dst, $src" %}
 9428   ins_encode %{
 9429     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9430     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9431   %}
 9432   ins_pipe(pipe_cmov_reg);
 9433 %}
 9434 
 9435 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9436   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9437   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9438   effect(TEMP dst);
 9439 
 9440   ins_cost(200);
 9441   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9442             "cmovneq  $dst, $src2" %}
 9443   ins_encode %{
 9444     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9445     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9446   %}
 9447   ins_pipe(pipe_cmov_reg);
 9448 %}
 9449 
 9450 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9451 %{
 9452   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9453   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9454 
 9455   ins_cost(100); // XXX
 9456   format %{ "setbn$cop $dst\t# signed, long" %}
 9457   ins_encode %{
 9458     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9459     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9460   %}
 9461   ins_pipe(ialu_reg);
 9462 %}
 9463 
 9464 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9465 %{
 9466   predicate(!UseAPX);
 9467   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9468 
 9469   ins_cost(200); // XXX
 9470   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9471   ins_encode %{
 9472     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9473   %}
 9474   ins_pipe(pipe_cmov_reg);  // XXX
 9475 %}
 9476 
 9477 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9478 %{
 9479   predicate(UseAPX);
 9480   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9481 
 9482   ins_cost(200);
 9483   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9484   ins_encode %{
 9485     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9486   %}
 9487   ins_pipe(pipe_cmov_reg);
 9488 %}
 9489 
 9490 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9491 %{
 9492   predicate(!UseAPX);
 9493   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9494 
 9495   ins_cost(200); // XXX
 9496   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9497   ins_encode %{
 9498     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9499   %}
 9500   ins_pipe(pipe_cmov_mem);  // XXX
 9501 %}
 9502 
 9503 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9504 %{
 9505   predicate(UseAPX);
 9506   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9507 
 9508   ins_cost(200);
 9509   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9510   ins_encode %{
 9511     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9512   %}
 9513   ins_pipe(pipe_cmov_mem);
 9514 %}
 9515 
 9516 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9517 %{
 9518   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9519   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9520 
 9521   ins_cost(100); // XXX
 9522   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9523   ins_encode %{
 9524     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9525     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9526   %}
 9527   ins_pipe(ialu_reg);
 9528 %}
 9529 
 9530 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9531 %{
 9532   predicate(!UseAPX);
 9533   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9534 
 9535   ins_cost(200); // XXX
 9536   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9537   ins_encode %{
 9538     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9539   %}
 9540   ins_pipe(pipe_cmov_reg); // XXX
 9541 %}
 9542 
 9543 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9544 %{
 9545   predicate(UseAPX);
 9546   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9547 
 9548   ins_cost(200);
 9549   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9550   ins_encode %{
 9551     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9552   %}
 9553   ins_pipe(pipe_cmov_reg);
 9554 %}
 9555 
 9556 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9557 %{
 9558   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9559   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9560 
 9561   ins_cost(100); // XXX
 9562   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9563   ins_encode %{
 9564     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9565     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9566   %}
 9567   ins_pipe(ialu_reg);
 9568 %}
 9569 
 9570 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9571   predicate(!UseAPX);
 9572   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9573   ins_cost(200);
 9574   expand %{
 9575     cmovL_regU(cop, cr, dst, src);
 9576   %}
 9577 %}
 9578 
 9579 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9580 %{
 9581   predicate(UseAPX);
 9582   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9583   ins_cost(200);
 9584   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9585   ins_encode %{
 9586     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9587   %}
 9588   ins_pipe(pipe_cmov_reg);
 9589 %}
 9590 
 9591 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9592   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9593   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9594 
 9595   ins_cost(200); // XXX
 9596   format %{ "cmovpq  $dst, $src\n\t"
 9597             "cmovneq $dst, $src" %}
 9598   ins_encode %{
 9599     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9600     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9601   %}
 9602   ins_pipe(pipe_cmov_reg);
 9603 %}
 9604 
 9605 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9606   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9607   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9608   effect(TEMP dst);
 9609 
 9610   ins_cost(200);
 9611   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9612             "cmovneq  $dst, $src2" %}
 9613   ins_encode %{
 9614     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9615     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9616   %}
 9617   ins_pipe(pipe_cmov_reg);
 9618 %}
 9619 
 9620 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9621 // inputs of the CMove
 9622 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9623   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9624   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9625 
 9626   ins_cost(200); // XXX
 9627   format %{ "cmovpq  $dst, $src\n\t"
 9628             "cmovneq $dst, $src" %}
 9629   ins_encode %{
 9630     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9631     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9632   %}
 9633   ins_pipe(pipe_cmov_reg);
 9634 %}
 9635 
 9636 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9637   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9638   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9639   effect(TEMP dst);
 9640 
 9641   ins_cost(200);
 9642   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9643             "cmovneq $dst, $src2" %}
 9644   ins_encode %{
 9645     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9646     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9647   %}
 9648   ins_pipe(pipe_cmov_reg);
 9649 %}
 9650 
 9651 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9652 %{
 9653   predicate(!UseAPX);
 9654   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9655 
 9656   ins_cost(200); // XXX
 9657   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9658   ins_encode %{
 9659     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9660   %}
 9661   ins_pipe(pipe_cmov_mem); // XXX
 9662 %}
 9663 
 9664 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9665   predicate(!UseAPX);
 9666   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9667   ins_cost(200);
 9668   expand %{
 9669     cmovL_memU(cop, cr, dst, src);
 9670   %}
 9671 %}
 9672 
 9673 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9674 %{
 9675   predicate(UseAPX);
 9676   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9677 
 9678   ins_cost(200);
 9679   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9680   ins_encode %{
 9681     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9682   %}
 9683   ins_pipe(pipe_cmov_mem);
 9684 %}
 9685 
 9686 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9687 %{
 9688   predicate(UseAPX);
 9689   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9690   ins_cost(200);
 9691   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9692   ins_encode %{
 9693     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9694   %}
 9695   ins_pipe(pipe_cmov_mem);
 9696 %}
 9697 
 9698 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9699 %{
 9700   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9701 
 9702   ins_cost(200); // XXX
 9703   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9704             "movss     $dst, $src\n"
 9705     "skip:" %}
 9706   ins_encode %{
 9707     Label Lskip;
 9708     // Invert sense of branch from sense of CMOV
 9709     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9710     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9711     __ bind(Lskip);
 9712   %}
 9713   ins_pipe(pipe_slow);
 9714 %}
 9715 
 9716 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9717 %{
 9718   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9719 
 9720   ins_cost(200); // XXX
 9721   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9722             "movss     $dst, $src\n"
 9723     "skip:" %}
 9724   ins_encode %{
 9725     Label Lskip;
 9726     // Invert sense of branch from sense of CMOV
 9727     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9728     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9729     __ bind(Lskip);
 9730   %}
 9731   ins_pipe(pipe_slow);
 9732 %}
 9733 
 9734 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9735   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9736   ins_cost(200);
 9737   expand %{
 9738     cmovF_regU(cop, cr, dst, src);
 9739   %}
 9740 %}
 9741 
 9742 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9743 %{
 9744   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9745 
 9746   ins_cost(200); // XXX
 9747   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9748             "movsd     $dst, $src\n"
 9749     "skip:" %}
 9750   ins_encode %{
 9751     Label Lskip;
 9752     // Invert sense of branch from sense of CMOV
 9753     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9754     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9755     __ bind(Lskip);
 9756   %}
 9757   ins_pipe(pipe_slow);
 9758 %}
 9759 
 9760 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9761 %{
 9762   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9763 
 9764   ins_cost(200); // XXX
 9765   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9766             "movsd     $dst, $src\n"
 9767     "skip:" %}
 9768   ins_encode %{
 9769     Label Lskip;
 9770     // Invert sense of branch from sense of CMOV
 9771     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9772     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9773     __ bind(Lskip);
 9774   %}
 9775   ins_pipe(pipe_slow);
 9776 %}
 9777 
 9778 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9779   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9780   ins_cost(200);
 9781   expand %{
 9782     cmovD_regU(cop, cr, dst, src);
 9783   %}
 9784 %}
 9785 
 9786 //----------Arithmetic Instructions--------------------------------------------
 9787 //----------Addition Instructions----------------------------------------------
 9788 
 9789 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9790 %{
 9791   predicate(!UseAPX);
 9792   match(Set dst (AddI dst src));
 9793   effect(KILL cr);
 9794   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9795   format %{ "addl    $dst, $src\t# int" %}
 9796   ins_encode %{
 9797     __ addl($dst$$Register, $src$$Register);
 9798   %}
 9799   ins_pipe(ialu_reg_reg);
 9800 %}
 9801 
 9802 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9803 %{
 9804   predicate(UseAPX);
 9805   match(Set dst (AddI src1 src2));
 9806   effect(KILL cr);
 9807   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9808 
 9809   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9810   ins_encode %{
 9811     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9812   %}
 9813   ins_pipe(ialu_reg_reg);
 9814 %}
 9815 
 9816 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9817 %{
 9818   predicate(!UseAPX);
 9819   match(Set dst (AddI dst src));
 9820   effect(KILL cr);
 9821   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9822 
 9823   format %{ "addl    $dst, $src\t# int" %}
 9824   ins_encode %{
 9825     __ addl($dst$$Register, $src$$constant);
 9826   %}
 9827   ins_pipe( ialu_reg );
 9828 %}
 9829 
 9830 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9831 %{
 9832   predicate(UseAPX);
 9833   match(Set dst (AddI src1 src2));
 9834   effect(KILL cr);
 9835   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9836 
 9837   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9838   ins_encode %{
 9839     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9840   %}
 9841   ins_pipe( ialu_reg );
 9842 %}
 9843 
 9844 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9845 %{
 9846   predicate(UseAPX);
 9847   match(Set dst (AddI (LoadI src1) src2));
 9848   effect(KILL cr);
 9849   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9850 
 9851   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9852   ins_encode %{
 9853     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9854   %}
 9855   ins_pipe( ialu_reg );
 9856 %}
 9857 
 9858 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9859 %{
 9860   predicate(!UseAPX);
 9861   match(Set dst (AddI dst (LoadI src)));
 9862   effect(KILL cr);
 9863   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9864 
 9865   ins_cost(150); // XXX
 9866   format %{ "addl    $dst, $src\t# int" %}
 9867   ins_encode %{
 9868     __ addl($dst$$Register, $src$$Address);
 9869   %}
 9870   ins_pipe(ialu_reg_mem);
 9871 %}
 9872 
 9873 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9874 %{
 9875   predicate(UseAPX);
 9876   match(Set dst (AddI src1 (LoadI src2)));
 9877   effect(KILL cr);
 9878   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9879 
 9880   ins_cost(150);
 9881   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9882   ins_encode %{
 9883     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9884   %}
 9885   ins_pipe(ialu_reg_mem);
 9886 %}
 9887 
 9888 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9889 %{
 9890   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9891   effect(KILL cr);
 9892   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9893 
 9894   ins_cost(150); // XXX
 9895   format %{ "addl    $dst, $src\t# int" %}
 9896   ins_encode %{
 9897     __ addl($dst$$Address, $src$$Register);
 9898   %}
 9899   ins_pipe(ialu_mem_reg);
 9900 %}
 9901 
 9902 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9903 %{
 9904   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9905   effect(KILL cr);
 9906   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9907 
 9908 
 9909   ins_cost(125); // XXX
 9910   format %{ "addl    $dst, $src\t# int" %}
 9911   ins_encode %{
 9912     __ addl($dst$$Address, $src$$constant);
 9913   %}
 9914   ins_pipe(ialu_mem_imm);
 9915 %}
 9916 
 9917 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9918 %{
 9919   predicate(!UseAPX && UseIncDec);
 9920   match(Set dst (AddI dst src));
 9921   effect(KILL cr);
 9922 
 9923   format %{ "incl    $dst\t# int" %}
 9924   ins_encode %{
 9925     __ incrementl($dst$$Register);
 9926   %}
 9927   ins_pipe(ialu_reg);
 9928 %}
 9929 
 9930 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9931 %{
 9932   predicate(UseAPX && UseIncDec);
 9933   match(Set dst (AddI src val));
 9934   effect(KILL cr);
 9935 
 9936   format %{ "eincl    $dst, $src\t# int ndd" %}
 9937   ins_encode %{
 9938     __ eincl($dst$$Register, $src$$Register, false);
 9939   %}
 9940   ins_pipe(ialu_reg);
 9941 %}
 9942 
 9943 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
 9944 %{
 9945   predicate(UseAPX && UseIncDec);
 9946   match(Set dst (AddI (LoadI src) val));
 9947   effect(KILL cr);
 9948 
 9949   format %{ "eincl    $dst, $src\t# int ndd" %}
 9950   ins_encode %{
 9951     __ eincl($dst$$Register, $src$$Address, false);
 9952   %}
 9953   ins_pipe(ialu_reg);
 9954 %}
 9955 
 9956 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 9957 %{
 9958   predicate(UseIncDec);
 9959   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9960   effect(KILL cr);
 9961 
 9962   ins_cost(125); // XXX
 9963   format %{ "incl    $dst\t# int" %}
 9964   ins_encode %{
 9965     __ incrementl($dst$$Address);
 9966   %}
 9967   ins_pipe(ialu_mem_imm);
 9968 %}
 9969 
 9970 // XXX why does that use AddI
 9971 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 9972 %{
 9973   predicate(!UseAPX && UseIncDec);
 9974   match(Set dst (AddI dst src));
 9975   effect(KILL cr);
 9976 
 9977   format %{ "decl    $dst\t# int" %}
 9978   ins_encode %{
 9979     __ decrementl($dst$$Register);
 9980   %}
 9981   ins_pipe(ialu_reg);
 9982 %}
 9983 
 9984 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
 9985 %{
 9986   predicate(UseAPX && UseIncDec);
 9987   match(Set dst (AddI src val));
 9988   effect(KILL cr);
 9989 
 9990   format %{ "edecl    $dst, $src\t# int ndd" %}
 9991   ins_encode %{
 9992     __ edecl($dst$$Register, $src$$Register, false);
 9993   %}
 9994   ins_pipe(ialu_reg);
 9995 %}
 9996 
 9997 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
 9998 %{
 9999   predicate(UseAPX && UseIncDec);
10000   match(Set dst (AddI (LoadI src) val));
10001   effect(KILL cr);
10002 
10003   format %{ "edecl    $dst, $src\t# int ndd" %}
10004   ins_encode %{
10005     __ edecl($dst$$Register, $src$$Address, false);
10006   %}
10007   ins_pipe(ialu_reg);
10008 %}
10009 
10010 // XXX why does that use AddI
10011 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10012 %{
10013   predicate(UseIncDec);
10014   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10015   effect(KILL cr);
10016 
10017   ins_cost(125); // XXX
10018   format %{ "decl    $dst\t# int" %}
10019   ins_encode %{
10020     __ decrementl($dst$$Address);
10021   %}
10022   ins_pipe(ialu_mem_imm);
10023 %}
10024 
10025 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10026 %{
10027   predicate(VM_Version::supports_fast_2op_lea());
10028   match(Set dst (AddI (LShiftI index scale) disp));
10029 
10030   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10031   ins_encode %{
10032     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10033     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10034   %}
10035   ins_pipe(ialu_reg_reg);
10036 %}
10037 
10038 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10039 %{
10040   predicate(VM_Version::supports_fast_3op_lea());
10041   match(Set dst (AddI (AddI base index) disp));
10042 
10043   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10044   ins_encode %{
10045     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10046   %}
10047   ins_pipe(ialu_reg_reg);
10048 %}
10049 
10050 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10051 %{
10052   predicate(VM_Version::supports_fast_2op_lea());
10053   match(Set dst (AddI base (LShiftI index scale)));
10054 
10055   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10056   ins_encode %{
10057     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10058     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10059   %}
10060   ins_pipe(ialu_reg_reg);
10061 %}
10062 
10063 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10064 %{
10065   predicate(VM_Version::supports_fast_3op_lea());
10066   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10067 
10068   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10069   ins_encode %{
10070     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10071     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10072   %}
10073   ins_pipe(ialu_reg_reg);
10074 %}
10075 
10076 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10077 %{
10078   predicate(!UseAPX);
10079   match(Set dst (AddL dst src));
10080   effect(KILL cr);
10081   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10082 
10083   format %{ "addq    $dst, $src\t# long" %}
10084   ins_encode %{
10085     __ addq($dst$$Register, $src$$Register);
10086   %}
10087   ins_pipe(ialu_reg_reg);
10088 %}
10089 
10090 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10091 %{
10092   predicate(UseAPX);
10093   match(Set dst (AddL src1 src2));
10094   effect(KILL cr);
10095   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10096 
10097   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10098   ins_encode %{
10099     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10100   %}
10101   ins_pipe(ialu_reg_reg);
10102 %}
10103 
10104 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10105 %{
10106   predicate(!UseAPX);
10107   match(Set dst (AddL dst src));
10108   effect(KILL cr);
10109   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10110 
10111   format %{ "addq    $dst, $src\t# long" %}
10112   ins_encode %{
10113     __ addq($dst$$Register, $src$$constant);
10114   %}
10115   ins_pipe( ialu_reg );
10116 %}
10117 
10118 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10119 %{
10120   predicate(UseAPX);
10121   match(Set dst (AddL src1 src2));
10122   effect(KILL cr);
10123   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10124 
10125   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10126   ins_encode %{
10127     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10128   %}
10129   ins_pipe( ialu_reg );
10130 %}
10131 
10132 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10133 %{
10134   predicate(UseAPX);
10135   match(Set dst (AddL (LoadL src1) src2));
10136   effect(KILL cr);
10137   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10138 
10139   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10140   ins_encode %{
10141     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10142   %}
10143   ins_pipe( ialu_reg );
10144 %}
10145 
10146 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10147 %{
10148   predicate(!UseAPX);
10149   match(Set dst (AddL dst (LoadL src)));
10150   effect(KILL cr);
10151   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10152 
10153   ins_cost(150); // XXX
10154   format %{ "addq    $dst, $src\t# long" %}
10155   ins_encode %{
10156     __ addq($dst$$Register, $src$$Address);
10157   %}
10158   ins_pipe(ialu_reg_mem);
10159 %}
10160 
10161 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10162 %{
10163   predicate(UseAPX);
10164   match(Set dst (AddL src1 (LoadL src2)));
10165   effect(KILL cr);
10166   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10167 
10168   ins_cost(150);
10169   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10170   ins_encode %{
10171     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10172   %}
10173   ins_pipe(ialu_reg_mem);
10174 %}
10175 
10176 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10177 %{
10178   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10179   effect(KILL cr);
10180   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10181 
10182   ins_cost(150); // XXX
10183   format %{ "addq    $dst, $src\t# long" %}
10184   ins_encode %{
10185     __ addq($dst$$Address, $src$$Register);
10186   %}
10187   ins_pipe(ialu_mem_reg);
10188 %}
10189 
10190 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10191 %{
10192   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10193   effect(KILL cr);
10194   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10195 
10196   ins_cost(125); // XXX
10197   format %{ "addq    $dst, $src\t# long" %}
10198   ins_encode %{
10199     __ addq($dst$$Address, $src$$constant);
10200   %}
10201   ins_pipe(ialu_mem_imm);
10202 %}
10203 
10204 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10205 %{
10206   predicate(!UseAPX && UseIncDec);
10207   match(Set dst (AddL dst src));
10208   effect(KILL cr);
10209 
10210   format %{ "incq    $dst\t# long" %}
10211   ins_encode %{
10212     __ incrementq($dst$$Register);
10213   %}
10214   ins_pipe(ialu_reg);
10215 %}
10216 
10217 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10218 %{
10219   predicate(UseAPX && UseIncDec);
10220   match(Set dst (AddL src val));
10221   effect(KILL cr);
10222 
10223   format %{ "eincq    $dst, $src\t# long ndd" %}
10224   ins_encode %{
10225     __ eincq($dst$$Register, $src$$Register, false);
10226   %}
10227   ins_pipe(ialu_reg);
10228 %}
10229 
10230 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10231 %{
10232   predicate(UseAPX && UseIncDec);
10233   match(Set dst (AddL (LoadL src) val));
10234   effect(KILL cr);
10235 
10236   format %{ "eincq    $dst, $src\t# long ndd" %}
10237   ins_encode %{
10238     __ eincq($dst$$Register, $src$$Address, false);
10239   %}
10240   ins_pipe(ialu_reg);
10241 %}
10242 
10243 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10244 %{
10245   predicate(UseIncDec);
10246   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10247   effect(KILL cr);
10248 
10249   ins_cost(125); // XXX
10250   format %{ "incq    $dst\t# long" %}
10251   ins_encode %{
10252     __ incrementq($dst$$Address);
10253   %}
10254   ins_pipe(ialu_mem_imm);
10255 %}
10256 
10257 // XXX why does that use AddL
10258 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10259 %{
10260   predicate(!UseAPX && UseIncDec);
10261   match(Set dst (AddL dst src));
10262   effect(KILL cr);
10263 
10264   format %{ "decq    $dst\t# long" %}
10265   ins_encode %{
10266     __ decrementq($dst$$Register);
10267   %}
10268   ins_pipe(ialu_reg);
10269 %}
10270 
10271 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10272 %{
10273   predicate(UseAPX && UseIncDec);
10274   match(Set dst (AddL src val));
10275   effect(KILL cr);
10276 
10277   format %{ "edecq    $dst, $src\t# long ndd" %}
10278   ins_encode %{
10279     __ edecq($dst$$Register, $src$$Register, false);
10280   %}
10281   ins_pipe(ialu_reg);
10282 %}
10283 
10284 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10285 %{
10286   predicate(UseAPX && UseIncDec);
10287   match(Set dst (AddL (LoadL src) val));
10288   effect(KILL cr);
10289 
10290   format %{ "edecq    $dst, $src\t# long ndd" %}
10291   ins_encode %{
10292     __ edecq($dst$$Register, $src$$Address, false);
10293   %}
10294   ins_pipe(ialu_reg);
10295 %}
10296 
10297 // XXX why does that use AddL
10298 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10299 %{
10300   predicate(UseIncDec);
10301   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10302   effect(KILL cr);
10303 
10304   ins_cost(125); // XXX
10305   format %{ "decq    $dst\t# long" %}
10306   ins_encode %{
10307     __ decrementq($dst$$Address);
10308   %}
10309   ins_pipe(ialu_mem_imm);
10310 %}
10311 
10312 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10313 %{
10314   predicate(VM_Version::supports_fast_2op_lea());
10315   match(Set dst (AddL (LShiftL index scale) disp));
10316 
10317   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10318   ins_encode %{
10319     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10320     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10321   %}
10322   ins_pipe(ialu_reg_reg);
10323 %}
10324 
10325 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10326 %{
10327   predicate(VM_Version::supports_fast_3op_lea());
10328   match(Set dst (AddL (AddL base index) disp));
10329 
10330   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10331   ins_encode %{
10332     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10333   %}
10334   ins_pipe(ialu_reg_reg);
10335 %}
10336 
10337 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10338 %{
10339   predicate(VM_Version::supports_fast_2op_lea());
10340   match(Set dst (AddL base (LShiftL index scale)));
10341 
10342   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10343   ins_encode %{
10344     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10345     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10346   %}
10347   ins_pipe(ialu_reg_reg);
10348 %}
10349 
10350 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10351 %{
10352   predicate(VM_Version::supports_fast_3op_lea());
10353   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10354 
10355   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10356   ins_encode %{
10357     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10358     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10359   %}
10360   ins_pipe(ialu_reg_reg);
10361 %}
10362 
10363 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10364 %{
10365   match(Set dst (AddP dst src));
10366   effect(KILL cr);
10367   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10368 
10369   format %{ "addq    $dst, $src\t# ptr" %}
10370   ins_encode %{
10371     __ addq($dst$$Register, $src$$Register);
10372   %}
10373   ins_pipe(ialu_reg_reg);
10374 %}
10375 
10376 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10377 %{
10378   match(Set dst (AddP dst src));
10379   effect(KILL cr);
10380   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10381 
10382   format %{ "addq    $dst, $src\t# ptr" %}
10383   ins_encode %{
10384     __ addq($dst$$Register, $src$$constant);
10385   %}
10386   ins_pipe( ialu_reg );
10387 %}
10388 
10389 // XXX addP mem ops ????
10390 
10391 instruct checkCastPP(rRegP dst)
10392 %{
10393   match(Set dst (CheckCastPP dst));
10394 
10395   size(0);
10396   format %{ "# checkcastPP of $dst" %}
10397   ins_encode(/* empty encoding */);
10398   ins_pipe(empty);
10399 %}
10400 
10401 instruct castPP(rRegP dst)
10402 %{
10403   match(Set dst (CastPP dst));
10404 
10405   size(0);
10406   format %{ "# castPP of $dst" %}
10407   ins_encode(/* empty encoding */);
10408   ins_pipe(empty);
10409 %}
10410 
10411 instruct castII(rRegI dst)
10412 %{
10413   predicate(VerifyConstraintCasts == 0);
10414   match(Set dst (CastII dst));
10415 
10416   size(0);
10417   format %{ "# castII of $dst" %}
10418   ins_encode(/* empty encoding */);
10419   ins_cost(0);
10420   ins_pipe(empty);
10421 %}
10422 
10423 instruct castII_checked(rRegI dst, rFlagsReg cr)
10424 %{
10425   predicate(VerifyConstraintCasts > 0);
10426   match(Set dst (CastII dst));
10427 
10428   effect(KILL cr);
10429   format %{ "# cast_checked_II $dst" %}
10430   ins_encode %{
10431     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10432   %}
10433   ins_pipe(pipe_slow);
10434 %}
10435 
10436 instruct castLL(rRegL dst)
10437 %{
10438   predicate(VerifyConstraintCasts == 0);
10439   match(Set dst (CastLL dst));
10440 
10441   size(0);
10442   format %{ "# castLL of $dst" %}
10443   ins_encode(/* empty encoding */);
10444   ins_cost(0);
10445   ins_pipe(empty);
10446 %}
10447 
10448 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10449 %{
10450   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10451   match(Set dst (CastLL dst));
10452 
10453   effect(KILL cr);
10454   format %{ "# cast_checked_LL $dst" %}
10455   ins_encode %{
10456     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10457   %}
10458   ins_pipe(pipe_slow);
10459 %}
10460 
10461 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10462 %{
10463   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10464   match(Set dst (CastLL dst));
10465 
10466   effect(KILL cr, TEMP tmp);
10467   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10468   ins_encode %{
10469     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10470   %}
10471   ins_pipe(pipe_slow);
10472 %}
10473 
10474 instruct castFF(regF dst)
10475 %{
10476   match(Set dst (CastFF dst));
10477 
10478   size(0);
10479   format %{ "# castFF of $dst" %}
10480   ins_encode(/* empty encoding */);
10481   ins_cost(0);
10482   ins_pipe(empty);
10483 %}
10484 
10485 instruct castHH(regF dst)
10486 %{
10487   match(Set dst (CastHH dst));
10488 
10489   size(0);
10490   format %{ "# castHH of $dst" %}
10491   ins_encode(/* empty encoding */);
10492   ins_cost(0);
10493   ins_pipe(empty);
10494 %}
10495 
10496 instruct castDD(regD dst)
10497 %{
10498   match(Set dst (CastDD dst));
10499 
10500   size(0);
10501   format %{ "# castDD of $dst" %}
10502   ins_encode(/* empty encoding */);
10503   ins_cost(0);
10504   ins_pipe(empty);
10505 %}
10506 
10507 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10508 instruct compareAndSwapP(rRegI res,
10509                          memory mem_ptr,
10510                          rax_RegP oldval, rRegP newval,
10511                          rFlagsReg cr)
10512 %{
10513   predicate(n->as_LoadStore()->barrier_data() == 0);
10514   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10515   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10516   effect(KILL cr, KILL oldval);
10517 
10518   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10519             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10520             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10521   ins_encode %{
10522     __ lock();
10523     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10524     __ setcc(Assembler::equal, $res$$Register);
10525   %}
10526   ins_pipe( pipe_cmpxchg );
10527 %}
10528 
10529 instruct compareAndSwapL(rRegI res,
10530                          memory mem_ptr,
10531                          rax_RegL oldval, rRegL newval,
10532                          rFlagsReg cr)
10533 %{
10534   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10535   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10536   effect(KILL cr, KILL oldval);
10537 
10538   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10539             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10540             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10541   ins_encode %{
10542     __ lock();
10543     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10544     __ setcc(Assembler::equal, $res$$Register);
10545   %}
10546   ins_pipe( pipe_cmpxchg );
10547 %}
10548 
10549 instruct compareAndSwapI(rRegI res,
10550                          memory mem_ptr,
10551                          rax_RegI oldval, rRegI newval,
10552                          rFlagsReg cr)
10553 %{
10554   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10555   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10556   effect(KILL cr, KILL oldval);
10557 
10558   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10559             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10560             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10561   ins_encode %{
10562     __ lock();
10563     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10564     __ setcc(Assembler::equal, $res$$Register);
10565   %}
10566   ins_pipe( pipe_cmpxchg );
10567 %}
10568 
10569 instruct compareAndSwapB(rRegI res,
10570                          memory mem_ptr,
10571                          rax_RegI oldval, rRegI newval,
10572                          rFlagsReg cr)
10573 %{
10574   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10575   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10576   effect(KILL cr, KILL oldval);
10577 
10578   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10579             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10580             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10581   ins_encode %{
10582     __ lock();
10583     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10584     __ setcc(Assembler::equal, $res$$Register);
10585   %}
10586   ins_pipe( pipe_cmpxchg );
10587 %}
10588 
10589 instruct compareAndSwapS(rRegI res,
10590                          memory mem_ptr,
10591                          rax_RegI oldval, rRegI newval,
10592                          rFlagsReg cr)
10593 %{
10594   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10595   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10596   effect(KILL cr, KILL oldval);
10597 
10598   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10599             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10600             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10601   ins_encode %{
10602     __ lock();
10603     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10604     __ setcc(Assembler::equal, $res$$Register);
10605   %}
10606   ins_pipe( pipe_cmpxchg );
10607 %}
10608 
10609 instruct compareAndSwapN(rRegI res,
10610                           memory mem_ptr,
10611                           rax_RegN oldval, rRegN newval,
10612                           rFlagsReg cr) %{
10613   predicate(n->as_LoadStore()->barrier_data() == 0);
10614   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10615   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10616   effect(KILL cr, KILL oldval);
10617 
10618   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10619             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10620             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10621   ins_encode %{
10622     __ lock();
10623     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10624     __ setcc(Assembler::equal, $res$$Register);
10625   %}
10626   ins_pipe( pipe_cmpxchg );
10627 %}
10628 
10629 instruct compareAndExchangeB(
10630                          memory mem_ptr,
10631                          rax_RegI oldval, rRegI newval,
10632                          rFlagsReg cr)
10633 %{
10634   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10635   effect(KILL cr);
10636 
10637   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10638             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10639   ins_encode %{
10640     __ lock();
10641     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10642   %}
10643   ins_pipe( pipe_cmpxchg );
10644 %}
10645 
10646 instruct compareAndExchangeS(
10647                          memory mem_ptr,
10648                          rax_RegI oldval, rRegI newval,
10649                          rFlagsReg cr)
10650 %{
10651   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10652   effect(KILL cr);
10653 
10654   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10655             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10656   ins_encode %{
10657     __ lock();
10658     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10659   %}
10660   ins_pipe( pipe_cmpxchg );
10661 %}
10662 
10663 instruct compareAndExchangeI(
10664                          memory mem_ptr,
10665                          rax_RegI oldval, rRegI newval,
10666                          rFlagsReg cr)
10667 %{
10668   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10669   effect(KILL cr);
10670 
10671   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10672             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10673   ins_encode %{
10674     __ lock();
10675     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10676   %}
10677   ins_pipe( pipe_cmpxchg );
10678 %}
10679 
10680 instruct compareAndExchangeL(
10681                          memory mem_ptr,
10682                          rax_RegL oldval, rRegL newval,
10683                          rFlagsReg cr)
10684 %{
10685   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10686   effect(KILL cr);
10687 
10688   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10689             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10690   ins_encode %{
10691     __ lock();
10692     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10693   %}
10694   ins_pipe( pipe_cmpxchg );
10695 %}
10696 
10697 instruct compareAndExchangeN(
10698                           memory mem_ptr,
10699                           rax_RegN oldval, rRegN newval,
10700                           rFlagsReg cr) %{
10701   predicate(n->as_LoadStore()->barrier_data() == 0);
10702   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10703   effect(KILL cr);
10704 
10705   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10706             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10707   ins_encode %{
10708     __ lock();
10709     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10710   %}
10711   ins_pipe( pipe_cmpxchg );
10712 %}
10713 
10714 instruct compareAndExchangeP(
10715                          memory mem_ptr,
10716                          rax_RegP oldval, rRegP newval,
10717                          rFlagsReg cr)
10718 %{
10719   predicate(n->as_LoadStore()->barrier_data() == 0);
10720   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10721   effect(KILL cr);
10722 
10723   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10724             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10725   ins_encode %{
10726     __ lock();
10727     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10728   %}
10729   ins_pipe( pipe_cmpxchg );
10730 %}
10731 
10732 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10733   predicate(n->as_LoadStore()->result_not_used());
10734   match(Set dummy (GetAndAddB mem add));
10735   effect(KILL cr);
10736   format %{ "addb_lock   $mem, $add" %}
10737   ins_encode %{
10738     __ lock();
10739     __ addb($mem$$Address, $add$$Register);
10740   %}
10741   ins_pipe(pipe_cmpxchg);
10742 %}
10743 
10744 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10745   predicate(n->as_LoadStore()->result_not_used());
10746   match(Set dummy (GetAndAddB mem add));
10747   effect(KILL cr);
10748   format %{ "addb_lock   $mem, $add" %}
10749   ins_encode %{
10750     __ lock();
10751     __ addb($mem$$Address, $add$$constant);
10752   %}
10753   ins_pipe(pipe_cmpxchg);
10754 %}
10755 
10756 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10757   predicate(!n->as_LoadStore()->result_not_used());
10758   match(Set newval (GetAndAddB mem newval));
10759   effect(KILL cr);
10760   format %{ "xaddb_lock  $mem, $newval" %}
10761   ins_encode %{
10762     __ lock();
10763     __ xaddb($mem$$Address, $newval$$Register);
10764   %}
10765   ins_pipe(pipe_cmpxchg);
10766 %}
10767 
10768 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10769   predicate(n->as_LoadStore()->result_not_used());
10770   match(Set dummy (GetAndAddS mem add));
10771   effect(KILL cr);
10772   format %{ "addw_lock   $mem, $add" %}
10773   ins_encode %{
10774     __ lock();
10775     __ addw($mem$$Address, $add$$Register);
10776   %}
10777   ins_pipe(pipe_cmpxchg);
10778 %}
10779 
10780 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10781   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10782   match(Set dummy (GetAndAddS mem add));
10783   effect(KILL cr);
10784   format %{ "addw_lock   $mem, $add" %}
10785   ins_encode %{
10786     __ lock();
10787     __ addw($mem$$Address, $add$$constant);
10788   %}
10789   ins_pipe(pipe_cmpxchg);
10790 %}
10791 
10792 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10793   predicate(!n->as_LoadStore()->result_not_used());
10794   match(Set newval (GetAndAddS mem newval));
10795   effect(KILL cr);
10796   format %{ "xaddw_lock  $mem, $newval" %}
10797   ins_encode %{
10798     __ lock();
10799     __ xaddw($mem$$Address, $newval$$Register);
10800   %}
10801   ins_pipe(pipe_cmpxchg);
10802 %}
10803 
10804 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10805   predicate(n->as_LoadStore()->result_not_used());
10806   match(Set dummy (GetAndAddI mem add));
10807   effect(KILL cr);
10808   format %{ "addl_lock   $mem, $add" %}
10809   ins_encode %{
10810     __ lock();
10811     __ addl($mem$$Address, $add$$Register);
10812   %}
10813   ins_pipe(pipe_cmpxchg);
10814 %}
10815 
10816 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10817   predicate(n->as_LoadStore()->result_not_used());
10818   match(Set dummy (GetAndAddI mem add));
10819   effect(KILL cr);
10820   format %{ "addl_lock   $mem, $add" %}
10821   ins_encode %{
10822     __ lock();
10823     __ addl($mem$$Address, $add$$constant);
10824   %}
10825   ins_pipe(pipe_cmpxchg);
10826 %}
10827 
10828 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10829   predicate(!n->as_LoadStore()->result_not_used());
10830   match(Set newval (GetAndAddI mem newval));
10831   effect(KILL cr);
10832   format %{ "xaddl_lock  $mem, $newval" %}
10833   ins_encode %{
10834     __ lock();
10835     __ xaddl($mem$$Address, $newval$$Register);
10836   %}
10837   ins_pipe(pipe_cmpxchg);
10838 %}
10839 
10840 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10841   predicate(n->as_LoadStore()->result_not_used());
10842   match(Set dummy (GetAndAddL mem add));
10843   effect(KILL cr);
10844   format %{ "addq_lock   $mem, $add" %}
10845   ins_encode %{
10846     __ lock();
10847     __ addq($mem$$Address, $add$$Register);
10848   %}
10849   ins_pipe(pipe_cmpxchg);
10850 %}
10851 
10852 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10853   predicate(n->as_LoadStore()->result_not_used());
10854   match(Set dummy (GetAndAddL mem add));
10855   effect(KILL cr);
10856   format %{ "addq_lock   $mem, $add" %}
10857   ins_encode %{
10858     __ lock();
10859     __ addq($mem$$Address, $add$$constant);
10860   %}
10861   ins_pipe(pipe_cmpxchg);
10862 %}
10863 
10864 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10865   predicate(!n->as_LoadStore()->result_not_used());
10866   match(Set newval (GetAndAddL mem newval));
10867   effect(KILL cr);
10868   format %{ "xaddq_lock  $mem, $newval" %}
10869   ins_encode %{
10870     __ lock();
10871     __ xaddq($mem$$Address, $newval$$Register);
10872   %}
10873   ins_pipe(pipe_cmpxchg);
10874 %}
10875 
10876 instruct xchgB( memory mem, rRegI newval) %{
10877   match(Set newval (GetAndSetB mem newval));
10878   format %{ "XCHGB  $newval,[$mem]" %}
10879   ins_encode %{
10880     __ xchgb($newval$$Register, $mem$$Address);
10881   %}
10882   ins_pipe( pipe_cmpxchg );
10883 %}
10884 
10885 instruct xchgS( memory mem, rRegI newval) %{
10886   match(Set newval (GetAndSetS mem newval));
10887   format %{ "XCHGW  $newval,[$mem]" %}
10888   ins_encode %{
10889     __ xchgw($newval$$Register, $mem$$Address);
10890   %}
10891   ins_pipe( pipe_cmpxchg );
10892 %}
10893 
10894 instruct xchgI( memory mem, rRegI newval) %{
10895   match(Set newval (GetAndSetI mem newval));
10896   format %{ "XCHGL  $newval,[$mem]" %}
10897   ins_encode %{
10898     __ xchgl($newval$$Register, $mem$$Address);
10899   %}
10900   ins_pipe( pipe_cmpxchg );
10901 %}
10902 
10903 instruct xchgL( memory mem, rRegL newval) %{
10904   match(Set newval (GetAndSetL mem newval));
10905   format %{ "XCHGL  $newval,[$mem]" %}
10906   ins_encode %{
10907     __ xchgq($newval$$Register, $mem$$Address);
10908   %}
10909   ins_pipe( pipe_cmpxchg );
10910 %}
10911 
10912 instruct xchgP( memory mem, rRegP newval) %{
10913   match(Set newval (GetAndSetP mem newval));
10914   predicate(n->as_LoadStore()->barrier_data() == 0);
10915   format %{ "XCHGQ  $newval,[$mem]" %}
10916   ins_encode %{
10917     __ xchgq($newval$$Register, $mem$$Address);
10918   %}
10919   ins_pipe( pipe_cmpxchg );
10920 %}
10921 
10922 instruct xchgN( memory mem, rRegN newval) %{
10923   predicate(n->as_LoadStore()->barrier_data() == 0);
10924   match(Set newval (GetAndSetN mem newval));
10925   format %{ "XCHGL  $newval,$mem]" %}
10926   ins_encode %{
10927     __ xchgl($newval$$Register, $mem$$Address);
10928   %}
10929   ins_pipe( pipe_cmpxchg );
10930 %}
10931 
10932 //----------Abs Instructions-------------------------------------------
10933 
10934 // Integer Absolute Instructions
10935 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10936 %{
10937   match(Set dst (AbsI src));
10938   effect(TEMP dst, KILL cr);
10939   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10940             "subl    $dst, $src\n\t"
10941             "cmovll  $dst, $src" %}
10942   ins_encode %{
10943     __ xorl($dst$$Register, $dst$$Register);
10944     __ subl($dst$$Register, $src$$Register);
10945     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10946   %}
10947 
10948   ins_pipe(ialu_reg_reg);
10949 %}
10950 
10951 // Long Absolute Instructions
10952 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10953 %{
10954   match(Set dst (AbsL src));
10955   effect(TEMP dst, KILL cr);
10956   format %{ "xorl    $dst, $dst\t# abs long\n\t"
10957             "subq    $dst, $src\n\t"
10958             "cmovlq  $dst, $src" %}
10959   ins_encode %{
10960     __ xorl($dst$$Register, $dst$$Register);
10961     __ subq($dst$$Register, $src$$Register);
10962     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10963   %}
10964 
10965   ins_pipe(ialu_reg_reg);
10966 %}
10967 
10968 //----------Subtraction Instructions-------------------------------------------
10969 
10970 // Integer Subtraction Instructions
10971 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10972 %{
10973   predicate(!UseAPX);
10974   match(Set dst (SubI dst src));
10975   effect(KILL cr);
10976   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10977 
10978   format %{ "subl    $dst, $src\t# int" %}
10979   ins_encode %{
10980     __ subl($dst$$Register, $src$$Register);
10981   %}
10982   ins_pipe(ialu_reg_reg);
10983 %}
10984 
10985 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10986 %{
10987   predicate(UseAPX);
10988   match(Set dst (SubI src1 src2));
10989   effect(KILL cr);
10990   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10991 
10992   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10993   ins_encode %{
10994     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10995   %}
10996   ins_pipe(ialu_reg_reg);
10997 %}
10998 
10999 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11000 %{
11001   predicate(UseAPX);
11002   match(Set dst (SubI src1 src2));
11003   effect(KILL cr);
11004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11005 
11006   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11007   ins_encode %{
11008     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11009   %}
11010   ins_pipe(ialu_reg_reg);
11011 %}
11012 
11013 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11014 %{
11015   predicate(UseAPX);
11016   match(Set dst (SubI (LoadI src1) src2));
11017   effect(KILL cr);
11018   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11019 
11020   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11021   ins_encode %{
11022     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11023   %}
11024   ins_pipe(ialu_reg_reg);
11025 %}
11026 
11027 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11028 %{
11029   predicate(!UseAPX);
11030   match(Set dst (SubI dst (LoadI src)));
11031   effect(KILL cr);
11032   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11033 
11034   ins_cost(150);
11035   format %{ "subl    $dst, $src\t# int" %}
11036   ins_encode %{
11037     __ subl($dst$$Register, $src$$Address);
11038   %}
11039   ins_pipe(ialu_reg_mem);
11040 %}
11041 
11042 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11043 %{
11044   predicate(UseAPX);
11045   match(Set dst (SubI src1 (LoadI src2)));
11046   effect(KILL cr);
11047   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11048 
11049   ins_cost(150);
11050   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11051   ins_encode %{
11052     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11053   %}
11054   ins_pipe(ialu_reg_mem);
11055 %}
11056 
11057 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11058 %{
11059   predicate(UseAPX);
11060   match(Set dst (SubI (LoadI src1) src2));
11061   effect(KILL cr);
11062   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11063 
11064   ins_cost(150);
11065   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11066   ins_encode %{
11067     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11068   %}
11069   ins_pipe(ialu_reg_mem);
11070 %}
11071 
11072 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11073 %{
11074   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11075   effect(KILL cr);
11076   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11077 
11078   ins_cost(150);
11079   format %{ "subl    $dst, $src\t# int" %}
11080   ins_encode %{
11081     __ subl($dst$$Address, $src$$Register);
11082   %}
11083   ins_pipe(ialu_mem_reg);
11084 %}
11085 
11086 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11087 %{
11088   predicate(!UseAPX);
11089   match(Set dst (SubL dst src));
11090   effect(KILL cr);
11091   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092 
11093   format %{ "subq    $dst, $src\t# long" %}
11094   ins_encode %{
11095     __ subq($dst$$Register, $src$$Register);
11096   %}
11097   ins_pipe(ialu_reg_reg);
11098 %}
11099 
11100 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11101 %{
11102   predicate(UseAPX);
11103   match(Set dst (SubL src1 src2));
11104   effect(KILL cr);
11105   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11106 
11107   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11108   ins_encode %{
11109     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11110   %}
11111   ins_pipe(ialu_reg_reg);
11112 %}
11113 
11114 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11115 %{
11116   predicate(UseAPX);
11117   match(Set dst (SubL src1 src2));
11118   effect(KILL cr);
11119   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11120 
11121   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11122   ins_encode %{
11123     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11124   %}
11125   ins_pipe(ialu_reg_reg);
11126 %}
11127 
11128 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11129 %{
11130   predicate(UseAPX);
11131   match(Set dst (SubL (LoadL src1) src2));
11132   effect(KILL cr);
11133   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11134 
11135   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11136   ins_encode %{
11137     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11138   %}
11139   ins_pipe(ialu_reg_reg);
11140 %}
11141 
11142 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11143 %{
11144   predicate(!UseAPX);
11145   match(Set dst (SubL dst (LoadL src)));
11146   effect(KILL cr);
11147   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11148 
11149   ins_cost(150);
11150   format %{ "subq    $dst, $src\t# long" %}
11151   ins_encode %{
11152     __ subq($dst$$Register, $src$$Address);
11153   %}
11154   ins_pipe(ialu_reg_mem);
11155 %}
11156 
11157 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11158 %{
11159   predicate(UseAPX);
11160   match(Set dst (SubL src1 (LoadL src2)));
11161   effect(KILL cr);
11162   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11163 
11164   ins_cost(150);
11165   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11166   ins_encode %{
11167     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11168   %}
11169   ins_pipe(ialu_reg_mem);
11170 %}
11171 
11172 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11173 %{
11174   predicate(UseAPX);
11175   match(Set dst (SubL (LoadL src1) src2));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178 
11179   ins_cost(150);
11180   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11181   ins_encode %{
11182     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11183   %}
11184   ins_pipe(ialu_reg_mem);
11185 %}
11186 
11187 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11188 %{
11189   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11190   effect(KILL cr);
11191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192 
11193   ins_cost(150);
11194   format %{ "subq    $dst, $src\t# long" %}
11195   ins_encode %{
11196     __ subq($dst$$Address, $src$$Register);
11197   %}
11198   ins_pipe(ialu_mem_reg);
11199 %}
11200 
11201 // Subtract from a pointer
11202 // XXX hmpf???
11203 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11204 %{
11205   match(Set dst (AddP dst (SubI zero src)));
11206   effect(KILL cr);
11207 
11208   format %{ "subq    $dst, $src\t# ptr - int" %}
11209   ins_encode %{
11210     __ subq($dst$$Register, $src$$Register);
11211   %}
11212   ins_pipe(ialu_reg_reg);
11213 %}
11214 
11215 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11216 %{
11217   predicate(!UseAPX);
11218   match(Set dst (SubI zero dst));
11219   effect(KILL cr);
11220   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11221 
11222   format %{ "negl    $dst\t# int" %}
11223   ins_encode %{
11224     __ negl($dst$$Register);
11225   %}
11226   ins_pipe(ialu_reg);
11227 %}
11228 
11229 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11230 %{
11231   predicate(UseAPX);
11232   match(Set dst (SubI zero src));
11233   effect(KILL cr);
11234   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11235 
11236   format %{ "enegl    $dst, $src\t# int ndd" %}
11237   ins_encode %{
11238     __ enegl($dst$$Register, $src$$Register, false);
11239   %}
11240   ins_pipe(ialu_reg);
11241 %}
11242 
11243 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11244 %{
11245   predicate(!UseAPX);
11246   match(Set dst (NegI dst));
11247   effect(KILL cr);
11248   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11249 
11250   format %{ "negl    $dst\t# int" %}
11251   ins_encode %{
11252     __ negl($dst$$Register);
11253   %}
11254   ins_pipe(ialu_reg);
11255 %}
11256 
11257 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11258 %{
11259   predicate(UseAPX);
11260   match(Set dst (NegI src));
11261   effect(KILL cr);
11262   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11263 
11264   format %{ "enegl    $dst, $src\t# int ndd" %}
11265   ins_encode %{
11266     __ enegl($dst$$Register, $src$$Register, false);
11267   %}
11268   ins_pipe(ialu_reg);
11269 %}
11270 
11271 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11272 %{
11273   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11274   effect(KILL cr);
11275   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11276 
11277   format %{ "negl    $dst\t# int" %}
11278   ins_encode %{
11279     __ negl($dst$$Address);
11280   %}
11281   ins_pipe(ialu_reg);
11282 %}
11283 
11284 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11285 %{
11286   predicate(!UseAPX);
11287   match(Set dst (SubL zero dst));
11288   effect(KILL cr);
11289   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11290 
11291   format %{ "negq    $dst\t# long" %}
11292   ins_encode %{
11293     __ negq($dst$$Register);
11294   %}
11295   ins_pipe(ialu_reg);
11296 %}
11297 
11298 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11299 %{
11300   predicate(UseAPX);
11301   match(Set dst (SubL zero src));
11302   effect(KILL cr);
11303   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11304 
11305   format %{ "enegq    $dst, $src\t# long ndd" %}
11306   ins_encode %{
11307     __ enegq($dst$$Register, $src$$Register, false);
11308   %}
11309   ins_pipe(ialu_reg);
11310 %}
11311 
11312 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11313 %{
11314   predicate(!UseAPX);
11315   match(Set dst (NegL dst));
11316   effect(KILL cr);
11317   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11318 
11319   format %{ "negq    $dst\t# int" %}
11320   ins_encode %{
11321     __ negq($dst$$Register);
11322   %}
11323   ins_pipe(ialu_reg);
11324 %}
11325 
11326 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11327 %{
11328   predicate(UseAPX);
11329   match(Set dst (NegL src));
11330   effect(KILL cr);
11331   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11332 
11333   format %{ "enegq    $dst, $src\t# long ndd" %}
11334   ins_encode %{
11335     __ enegq($dst$$Register, $src$$Register, false);
11336   %}
11337   ins_pipe(ialu_reg);
11338 %}
11339 
11340 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11341 %{
11342   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11343   effect(KILL cr);
11344   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11345 
11346   format %{ "negq    $dst\t# long" %}
11347   ins_encode %{
11348     __ negq($dst$$Address);
11349   %}
11350   ins_pipe(ialu_reg);
11351 %}
11352 
11353 //----------Multiplication/Division Instructions-------------------------------
11354 // Integer Multiplication Instructions
11355 // Multiply Register
11356 
11357 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11358 %{
11359   predicate(!UseAPX);
11360   match(Set dst (MulI dst src));
11361   effect(KILL cr);
11362 
11363   ins_cost(300);
11364   format %{ "imull   $dst, $src\t# int" %}
11365   ins_encode %{
11366     __ imull($dst$$Register, $src$$Register);
11367   %}
11368   ins_pipe(ialu_reg_reg_alu0);
11369 %}
11370 
11371 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11372 %{
11373   predicate(UseAPX);
11374   match(Set dst (MulI src1 src2));
11375   effect(KILL cr);
11376 
11377   ins_cost(300);
11378   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11379   ins_encode %{
11380     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11381   %}
11382   ins_pipe(ialu_reg_reg_alu0);
11383 %}
11384 
11385 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11386 %{
11387   match(Set dst (MulI src imm));
11388   effect(KILL cr);
11389 
11390   ins_cost(300);
11391   format %{ "imull   $dst, $src, $imm\t# int" %}
11392   ins_encode %{
11393     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11394   %}
11395   ins_pipe(ialu_reg_reg_alu0);
11396 %}
11397 
11398 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11399 %{
11400   predicate(!UseAPX);
11401   match(Set dst (MulI dst (LoadI src)));
11402   effect(KILL cr);
11403 
11404   ins_cost(350);
11405   format %{ "imull   $dst, $src\t# int" %}
11406   ins_encode %{
11407     __ imull($dst$$Register, $src$$Address);
11408   %}
11409   ins_pipe(ialu_reg_mem_alu0);
11410 %}
11411 
11412 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11413 %{
11414   predicate(UseAPX);
11415   match(Set dst (MulI src1 (LoadI src2)));
11416   effect(KILL cr);
11417 
11418   ins_cost(350);
11419   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11420   ins_encode %{
11421     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11422   %}
11423   ins_pipe(ialu_reg_mem_alu0);
11424 %}
11425 
11426 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11427 %{
11428   match(Set dst (MulI (LoadI src) imm));
11429   effect(KILL cr);
11430 
11431   ins_cost(300);
11432   format %{ "imull   $dst, $src, $imm\t# int" %}
11433   ins_encode %{
11434     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11435   %}
11436   ins_pipe(ialu_reg_mem_alu0);
11437 %}
11438 
11439 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11440 %{
11441   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11442   effect(KILL cr, KILL src2);
11443 
11444   expand %{ mulI_rReg(dst, src1, cr);
11445            mulI_rReg(src2, src3, cr);
11446            addI_rReg(dst, src2, cr); %}
11447 %}
11448 
11449 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11450 %{
11451   predicate(!UseAPX);
11452   match(Set dst (MulL dst src));
11453   effect(KILL cr);
11454 
11455   ins_cost(300);
11456   format %{ "imulq   $dst, $src\t# long" %}
11457   ins_encode %{
11458     __ imulq($dst$$Register, $src$$Register);
11459   %}
11460   ins_pipe(ialu_reg_reg_alu0);
11461 %}
11462 
11463 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11464 %{
11465   predicate(UseAPX);
11466   match(Set dst (MulL src1 src2));
11467   effect(KILL cr);
11468 
11469   ins_cost(300);
11470   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11471   ins_encode %{
11472     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11473   %}
11474   ins_pipe(ialu_reg_reg_alu0);
11475 %}
11476 
11477 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11478 %{
11479   match(Set dst (MulL src imm));
11480   effect(KILL cr);
11481 
11482   ins_cost(300);
11483   format %{ "imulq   $dst, $src, $imm\t# long" %}
11484   ins_encode %{
11485     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11486   %}
11487   ins_pipe(ialu_reg_reg_alu0);
11488 %}
11489 
11490 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11491 %{
11492   predicate(!UseAPX);
11493   match(Set dst (MulL dst (LoadL src)));
11494   effect(KILL cr);
11495 
11496   ins_cost(350);
11497   format %{ "imulq   $dst, $src\t# long" %}
11498   ins_encode %{
11499     __ imulq($dst$$Register, $src$$Address);
11500   %}
11501   ins_pipe(ialu_reg_mem_alu0);
11502 %}
11503 
11504 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11505 %{
11506   predicate(UseAPX);
11507   match(Set dst (MulL src1 (LoadL src2)));
11508   effect(KILL cr);
11509 
11510   ins_cost(350);
11511   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11512   ins_encode %{
11513     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11514   %}
11515   ins_pipe(ialu_reg_mem_alu0);
11516 %}
11517 
11518 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11519 %{
11520   match(Set dst (MulL (LoadL src) imm));
11521   effect(KILL cr);
11522 
11523   ins_cost(300);
11524   format %{ "imulq   $dst, $src, $imm\t# long" %}
11525   ins_encode %{
11526     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11527   %}
11528   ins_pipe(ialu_reg_mem_alu0);
11529 %}
11530 
11531 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11532 %{
11533   match(Set dst (MulHiL src rax));
11534   effect(USE_KILL rax, KILL cr);
11535 
11536   ins_cost(300);
11537   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11538   ins_encode %{
11539     __ imulq($src$$Register);
11540   %}
11541   ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543 
11544 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11545 %{
11546   match(Set dst (UMulHiL src rax));
11547   effect(USE_KILL rax, KILL cr);
11548 
11549   ins_cost(300);
11550   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11551   ins_encode %{
11552     __ mulq($src$$Register);
11553   %}
11554   ins_pipe(ialu_reg_reg_alu0);
11555 %}
11556 
11557 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11558                    rFlagsReg cr)
11559 %{
11560   match(Set rax (DivI rax div));
11561   effect(KILL rdx, KILL cr);
11562 
11563   ins_cost(30*100+10*100); // XXX
11564   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11565             "jne,s   normal\n\t"
11566             "xorl    rdx, rdx\n\t"
11567             "cmpl    $div, -1\n\t"
11568             "je,s    done\n"
11569     "normal: cdql\n\t"
11570             "idivl   $div\n"
11571     "done:"        %}
11572   ins_encode(cdql_enc(div));
11573   ins_pipe(ialu_reg_reg_alu0);
11574 %}
11575 
11576 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11577                    rFlagsReg cr)
11578 %{
11579   match(Set rax (DivL rax div));
11580   effect(KILL rdx, KILL cr);
11581 
11582   ins_cost(30*100+10*100); // XXX
11583   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11584             "cmpq    rax, rdx\n\t"
11585             "jne,s   normal\n\t"
11586             "xorl    rdx, rdx\n\t"
11587             "cmpq    $div, -1\n\t"
11588             "je,s    done\n"
11589     "normal: cdqq\n\t"
11590             "idivq   $div\n"
11591     "done:"        %}
11592   ins_encode(cdqq_enc(div));
11593   ins_pipe(ialu_reg_reg_alu0);
11594 %}
11595 
11596 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11597 %{
11598   match(Set rax (UDivI rax div));
11599   effect(KILL rdx, KILL cr);
11600 
11601   ins_cost(300);
11602   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11603   ins_encode %{
11604     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11605   %}
11606   ins_pipe(ialu_reg_reg_alu0);
11607 %}
11608 
11609 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11610 %{
11611   match(Set rax (UDivL rax div));
11612   effect(KILL rdx, KILL cr);
11613 
11614   ins_cost(300);
11615   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11616   ins_encode %{
11617      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11618   %}
11619   ins_pipe(ialu_reg_reg_alu0);
11620 %}
11621 
11622 // Integer DIVMOD with Register, both quotient and mod results
11623 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11624                              rFlagsReg cr)
11625 %{
11626   match(DivModI rax div);
11627   effect(KILL cr);
11628 
11629   ins_cost(30*100+10*100); // XXX
11630   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11631             "jne,s   normal\n\t"
11632             "xorl    rdx, rdx\n\t"
11633             "cmpl    $div, -1\n\t"
11634             "je,s    done\n"
11635     "normal: cdql\n\t"
11636             "idivl   $div\n"
11637     "done:"        %}
11638   ins_encode(cdql_enc(div));
11639   ins_pipe(pipe_slow);
11640 %}
11641 
11642 // Long DIVMOD with Register, both quotient and mod results
11643 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11644                              rFlagsReg cr)
11645 %{
11646   match(DivModL rax div);
11647   effect(KILL cr);
11648 
11649   ins_cost(30*100+10*100); // XXX
11650   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11651             "cmpq    rax, rdx\n\t"
11652             "jne,s   normal\n\t"
11653             "xorl    rdx, rdx\n\t"
11654             "cmpq    $div, -1\n\t"
11655             "je,s    done\n"
11656     "normal: cdqq\n\t"
11657             "idivq   $div\n"
11658     "done:"        %}
11659   ins_encode(cdqq_enc(div));
11660   ins_pipe(pipe_slow);
11661 %}
11662 
11663 // Unsigned integer DIVMOD with Register, both quotient and mod results
11664 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11665                               no_rax_rdx_RegI div, rFlagsReg cr)
11666 %{
11667   match(UDivModI rax div);
11668   effect(TEMP tmp, KILL cr);
11669 
11670   ins_cost(300);
11671   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11672             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11673           %}
11674   ins_encode %{
11675     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11676   %}
11677   ins_pipe(pipe_slow);
11678 %}
11679 
11680 // Unsigned long DIVMOD with Register, both quotient and mod results
11681 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11682                               no_rax_rdx_RegL div, rFlagsReg cr)
11683 %{
11684   match(UDivModL rax div);
11685   effect(TEMP tmp, KILL cr);
11686 
11687   ins_cost(300);
11688   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11689             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11690           %}
11691   ins_encode %{
11692     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11693   %}
11694   ins_pipe(pipe_slow);
11695 %}
11696 
11697 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11698                    rFlagsReg cr)
11699 %{
11700   match(Set rdx (ModI rax div));
11701   effect(KILL rax, KILL cr);
11702 
11703   ins_cost(300); // XXX
11704   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11705             "jne,s   normal\n\t"
11706             "xorl    rdx, rdx\n\t"
11707             "cmpl    $div, -1\n\t"
11708             "je,s    done\n"
11709     "normal: cdql\n\t"
11710             "idivl   $div\n"
11711     "done:"        %}
11712   ins_encode(cdql_enc(div));
11713   ins_pipe(ialu_reg_reg_alu0);
11714 %}
11715 
11716 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11717                    rFlagsReg cr)
11718 %{
11719   match(Set rdx (ModL rax div));
11720   effect(KILL rax, KILL cr);
11721 
11722   ins_cost(300); // XXX
11723   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11724             "cmpq    rax, rdx\n\t"
11725             "jne,s   normal\n\t"
11726             "xorl    rdx, rdx\n\t"
11727             "cmpq    $div, -1\n\t"
11728             "je,s    done\n"
11729     "normal: cdqq\n\t"
11730             "idivq   $div\n"
11731     "done:"        %}
11732   ins_encode(cdqq_enc(div));
11733   ins_pipe(ialu_reg_reg_alu0);
11734 %}
11735 
11736 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11737 %{
11738   match(Set rdx (UModI rax div));
11739   effect(KILL rax, KILL cr);
11740 
11741   ins_cost(300);
11742   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11743   ins_encode %{
11744     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11745   %}
11746   ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748 
11749 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11750 %{
11751   match(Set rdx (UModL rax div));
11752   effect(KILL rax, KILL cr);
11753 
11754   ins_cost(300);
11755   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11756   ins_encode %{
11757     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11758   %}
11759   ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761 
11762 // Integer Shift Instructions
11763 // Shift Left by one, two, three
11764 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11765 %{
11766   predicate(!UseAPX);
11767   match(Set dst (LShiftI dst shift));
11768   effect(KILL cr);
11769 
11770   format %{ "sall    $dst, $shift" %}
11771   ins_encode %{
11772     __ sall($dst$$Register, $shift$$constant);
11773   %}
11774   ins_pipe(ialu_reg);
11775 %}
11776 
11777 // Shift Left by one, two, three
11778 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11779 %{
11780   predicate(UseAPX);
11781   match(Set dst (LShiftI src shift));
11782   effect(KILL cr);
11783 
11784   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11785   ins_encode %{
11786     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11787   %}
11788   ins_pipe(ialu_reg);
11789 %}
11790 
11791 // Shift Left by 8-bit immediate
11792 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11793 %{
11794   predicate(!UseAPX);
11795   match(Set dst (LShiftI dst shift));
11796   effect(KILL cr);
11797 
11798   format %{ "sall    $dst, $shift" %}
11799   ins_encode %{
11800     __ sall($dst$$Register, $shift$$constant);
11801   %}
11802   ins_pipe(ialu_reg);
11803 %}
11804 
11805 // Shift Left by 8-bit immediate
11806 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11807 %{
11808   predicate(UseAPX);
11809   match(Set dst (LShiftI src shift));
11810   effect(KILL cr);
11811 
11812   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11813   ins_encode %{
11814     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11815   %}
11816   ins_pipe(ialu_reg);
11817 %}
11818 
11819 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11820 %{
11821   predicate(UseAPX);
11822   match(Set dst (LShiftI (LoadI src) shift));
11823   effect(KILL cr);
11824 
11825   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11826   ins_encode %{
11827     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11828   %}
11829   ins_pipe(ialu_reg);
11830 %}
11831 
11832 // Shift Left by 8-bit immediate
11833 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11834 %{
11835   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11836   effect(KILL cr);
11837 
11838   format %{ "sall    $dst, $shift" %}
11839   ins_encode %{
11840     __ sall($dst$$Address, $shift$$constant);
11841   %}
11842   ins_pipe(ialu_mem_imm);
11843 %}
11844 
11845 // Shift Left by variable
11846 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11847 %{
11848   predicate(!VM_Version::supports_bmi2());
11849   match(Set dst (LShiftI dst shift));
11850   effect(KILL cr);
11851 
11852   format %{ "sall    $dst, $shift" %}
11853   ins_encode %{
11854     __ sall($dst$$Register);
11855   %}
11856   ins_pipe(ialu_reg_reg);
11857 %}
11858 
11859 // Shift Left by variable
11860 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11861 %{
11862   predicate(!VM_Version::supports_bmi2());
11863   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11864   effect(KILL cr);
11865 
11866   format %{ "sall    $dst, $shift" %}
11867   ins_encode %{
11868     __ sall($dst$$Address);
11869   %}
11870   ins_pipe(ialu_mem_reg);
11871 %}
11872 
11873 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11874 %{
11875   predicate(VM_Version::supports_bmi2());
11876   match(Set dst (LShiftI src shift));
11877 
11878   format %{ "shlxl   $dst, $src, $shift" %}
11879   ins_encode %{
11880     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11881   %}
11882   ins_pipe(ialu_reg_reg);
11883 %}
11884 
11885 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11886 %{
11887   predicate(VM_Version::supports_bmi2());
11888   match(Set dst (LShiftI (LoadI src) shift));
11889   ins_cost(175);
11890   format %{ "shlxl   $dst, $src, $shift" %}
11891   ins_encode %{
11892     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11893   %}
11894   ins_pipe(ialu_reg_mem);
11895 %}
11896 
11897 // Arithmetic Shift Right by 8-bit immediate
11898 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11899 %{
11900   predicate(!UseAPX);
11901   match(Set dst (RShiftI dst shift));
11902   effect(KILL cr);
11903 
11904   format %{ "sarl    $dst, $shift" %}
11905   ins_encode %{
11906     __ sarl($dst$$Register, $shift$$constant);
11907   %}
11908   ins_pipe(ialu_mem_imm);
11909 %}
11910 
11911 // Arithmetic Shift Right by 8-bit immediate
11912 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11913 %{
11914   predicate(UseAPX);
11915   match(Set dst (RShiftI src shift));
11916   effect(KILL cr);
11917 
11918   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11919   ins_encode %{
11920     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11921   %}
11922   ins_pipe(ialu_mem_imm);
11923 %}
11924 
11925 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11926 %{
11927   predicate(UseAPX);
11928   match(Set dst (RShiftI (LoadI src) shift));
11929   effect(KILL cr);
11930 
11931   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11932   ins_encode %{
11933     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11934   %}
11935   ins_pipe(ialu_mem_imm);
11936 %}
11937 
11938 // Arithmetic Shift Right by 8-bit immediate
11939 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11940 %{
11941   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11942   effect(KILL cr);
11943 
11944   format %{ "sarl    $dst, $shift" %}
11945   ins_encode %{
11946     __ sarl($dst$$Address, $shift$$constant);
11947   %}
11948   ins_pipe(ialu_mem_imm);
11949 %}
11950 
11951 // Arithmetic Shift Right by variable
11952 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11953 %{
11954   predicate(!VM_Version::supports_bmi2());
11955   match(Set dst (RShiftI dst shift));
11956   effect(KILL cr);
11957 
11958   format %{ "sarl    $dst, $shift" %}
11959   ins_encode %{
11960     __ sarl($dst$$Register);
11961   %}
11962   ins_pipe(ialu_reg_reg);
11963 %}
11964 
11965 // Arithmetic Shift Right by variable
11966 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11967 %{
11968   predicate(!VM_Version::supports_bmi2());
11969   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11970   effect(KILL cr);
11971 
11972   format %{ "sarl    $dst, $shift" %}
11973   ins_encode %{
11974     __ sarl($dst$$Address);
11975   %}
11976   ins_pipe(ialu_mem_reg);
11977 %}
11978 
11979 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11980 %{
11981   predicate(VM_Version::supports_bmi2());
11982   match(Set dst (RShiftI src shift));
11983 
11984   format %{ "sarxl   $dst, $src, $shift" %}
11985   ins_encode %{
11986     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11987   %}
11988   ins_pipe(ialu_reg_reg);
11989 %}
11990 
11991 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11992 %{
11993   predicate(VM_Version::supports_bmi2());
11994   match(Set dst (RShiftI (LoadI src) shift));
11995   ins_cost(175);
11996   format %{ "sarxl   $dst, $src, $shift" %}
11997   ins_encode %{
11998     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11999   %}
12000   ins_pipe(ialu_reg_mem);
12001 %}
12002 
12003 // Logical Shift Right by 8-bit immediate
12004 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12005 %{
12006   predicate(!UseAPX);
12007   match(Set dst (URShiftI dst shift));
12008   effect(KILL cr);
12009 
12010   format %{ "shrl    $dst, $shift" %}
12011   ins_encode %{
12012     __ shrl($dst$$Register, $shift$$constant);
12013   %}
12014   ins_pipe(ialu_reg);
12015 %}
12016 
12017 // Logical Shift Right by 8-bit immediate
12018 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12019 %{
12020   predicate(UseAPX);
12021   match(Set dst (URShiftI src shift));
12022   effect(KILL cr);
12023 
12024   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12025   ins_encode %{
12026     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12027   %}
12028   ins_pipe(ialu_reg);
12029 %}
12030 
12031 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12032 %{
12033   predicate(UseAPX);
12034   match(Set dst (URShiftI (LoadI src) shift));
12035   effect(KILL cr);
12036 
12037   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12038   ins_encode %{
12039     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12040   %}
12041   ins_pipe(ialu_reg);
12042 %}
12043 
12044 // Logical Shift Right by 8-bit immediate
12045 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12046 %{
12047   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12048   effect(KILL cr);
12049 
12050   format %{ "shrl    $dst, $shift" %}
12051   ins_encode %{
12052     __ shrl($dst$$Address, $shift$$constant);
12053   %}
12054   ins_pipe(ialu_mem_imm);
12055 %}
12056 
12057 // Logical Shift Right by variable
12058 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12059 %{
12060   predicate(!VM_Version::supports_bmi2());
12061   match(Set dst (URShiftI dst shift));
12062   effect(KILL cr);
12063 
12064   format %{ "shrl    $dst, $shift" %}
12065   ins_encode %{
12066     __ shrl($dst$$Register);
12067   %}
12068   ins_pipe(ialu_reg_reg);
12069 %}
12070 
12071 // Logical Shift Right by variable
12072 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12073 %{
12074   predicate(!VM_Version::supports_bmi2());
12075   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12076   effect(KILL cr);
12077 
12078   format %{ "shrl    $dst, $shift" %}
12079   ins_encode %{
12080     __ shrl($dst$$Address);
12081   %}
12082   ins_pipe(ialu_mem_reg);
12083 %}
12084 
12085 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12086 %{
12087   predicate(VM_Version::supports_bmi2());
12088   match(Set dst (URShiftI src shift));
12089 
12090   format %{ "shrxl   $dst, $src, $shift" %}
12091   ins_encode %{
12092     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12093   %}
12094   ins_pipe(ialu_reg_reg);
12095 %}
12096 
12097 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12098 %{
12099   predicate(VM_Version::supports_bmi2());
12100   match(Set dst (URShiftI (LoadI src) shift));
12101   ins_cost(175);
12102   format %{ "shrxl   $dst, $src, $shift" %}
12103   ins_encode %{
12104     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12105   %}
12106   ins_pipe(ialu_reg_mem);
12107 %}
12108 
12109 // Long Shift Instructions
12110 // Shift Left by one, two, three
12111 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12112 %{
12113   predicate(!UseAPX);
12114   match(Set dst (LShiftL dst shift));
12115   effect(KILL cr);
12116 
12117   format %{ "salq    $dst, $shift" %}
12118   ins_encode %{
12119     __ salq($dst$$Register, $shift$$constant);
12120   %}
12121   ins_pipe(ialu_reg);
12122 %}
12123 
12124 // Shift Left by one, two, three
12125 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12126 %{
12127   predicate(UseAPX);
12128   match(Set dst (LShiftL src shift));
12129   effect(KILL cr);
12130 
12131   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12132   ins_encode %{
12133     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12134   %}
12135   ins_pipe(ialu_reg);
12136 %}
12137 
12138 // Shift Left by 8-bit immediate
12139 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12140 %{
12141   predicate(!UseAPX);
12142   match(Set dst (LShiftL dst shift));
12143   effect(KILL cr);
12144 
12145   format %{ "salq    $dst, $shift" %}
12146   ins_encode %{
12147     __ salq($dst$$Register, $shift$$constant);
12148   %}
12149   ins_pipe(ialu_reg);
12150 %}
12151 
12152 // Shift Left by 8-bit immediate
12153 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12154 %{
12155   predicate(UseAPX);
12156   match(Set dst (LShiftL src shift));
12157   effect(KILL cr);
12158 
12159   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12160   ins_encode %{
12161     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12162   %}
12163   ins_pipe(ialu_reg);
12164 %}
12165 
12166 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12167 %{
12168   predicate(UseAPX);
12169   match(Set dst (LShiftL (LoadL src) shift));
12170   effect(KILL cr);
12171 
12172   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12173   ins_encode %{
12174     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12175   %}
12176   ins_pipe(ialu_reg);
12177 %}
12178 
12179 // Shift Left by 8-bit immediate
12180 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12181 %{
12182   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12183   effect(KILL cr);
12184 
12185   format %{ "salq    $dst, $shift" %}
12186   ins_encode %{
12187     __ salq($dst$$Address, $shift$$constant);
12188   %}
12189   ins_pipe(ialu_mem_imm);
12190 %}
12191 
12192 // Shift Left by variable
12193 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12194 %{
12195   predicate(!VM_Version::supports_bmi2());
12196   match(Set dst (LShiftL dst shift));
12197   effect(KILL cr);
12198 
12199   format %{ "salq    $dst, $shift" %}
12200   ins_encode %{
12201     __ salq($dst$$Register);
12202   %}
12203   ins_pipe(ialu_reg_reg);
12204 %}
12205 
12206 // Shift Left by variable
12207 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12208 %{
12209   predicate(!VM_Version::supports_bmi2());
12210   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12211   effect(KILL cr);
12212 
12213   format %{ "salq    $dst, $shift" %}
12214   ins_encode %{
12215     __ salq($dst$$Address);
12216   %}
12217   ins_pipe(ialu_mem_reg);
12218 %}
12219 
12220 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12221 %{
12222   predicate(VM_Version::supports_bmi2());
12223   match(Set dst (LShiftL src shift));
12224 
12225   format %{ "shlxq   $dst, $src, $shift" %}
12226   ins_encode %{
12227     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12228   %}
12229   ins_pipe(ialu_reg_reg);
12230 %}
12231 
12232 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12233 %{
12234   predicate(VM_Version::supports_bmi2());
12235   match(Set dst (LShiftL (LoadL src) shift));
12236   ins_cost(175);
12237   format %{ "shlxq   $dst, $src, $shift" %}
12238   ins_encode %{
12239     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12240   %}
12241   ins_pipe(ialu_reg_mem);
12242 %}
12243 
12244 // Arithmetic Shift Right by 8-bit immediate
12245 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12246 %{
12247   predicate(!UseAPX);
12248   match(Set dst (RShiftL dst shift));
12249   effect(KILL cr);
12250 
12251   format %{ "sarq    $dst, $shift" %}
12252   ins_encode %{
12253     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12254   %}
12255   ins_pipe(ialu_mem_imm);
12256 %}
12257 
12258 // Arithmetic Shift Right by 8-bit immediate
12259 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12260 %{
12261   predicate(UseAPX);
12262   match(Set dst (RShiftL src shift));
12263   effect(KILL cr);
12264 
12265   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12266   ins_encode %{
12267     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12268   %}
12269   ins_pipe(ialu_mem_imm);
12270 %}
12271 
12272 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12273 %{
12274   predicate(UseAPX);
12275   match(Set dst (RShiftL (LoadL src) shift));
12276   effect(KILL cr);
12277 
12278   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12279   ins_encode %{
12280     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12281   %}
12282   ins_pipe(ialu_mem_imm);
12283 %}
12284 
12285 // Arithmetic Shift Right by 8-bit immediate
12286 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12287 %{
12288   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12289   effect(KILL cr);
12290 
12291   format %{ "sarq    $dst, $shift" %}
12292   ins_encode %{
12293     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12294   %}
12295   ins_pipe(ialu_mem_imm);
12296 %}
12297 
12298 // Arithmetic Shift Right by variable
12299 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12300 %{
12301   predicate(!VM_Version::supports_bmi2());
12302   match(Set dst (RShiftL dst shift));
12303   effect(KILL cr);
12304 
12305   format %{ "sarq    $dst, $shift" %}
12306   ins_encode %{
12307     __ sarq($dst$$Register);
12308   %}
12309   ins_pipe(ialu_reg_reg);
12310 %}
12311 
12312 // Arithmetic Shift Right by variable
12313 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12314 %{
12315   predicate(!VM_Version::supports_bmi2());
12316   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12317   effect(KILL cr);
12318 
12319   format %{ "sarq    $dst, $shift" %}
12320   ins_encode %{
12321     __ sarq($dst$$Address);
12322   %}
12323   ins_pipe(ialu_mem_reg);
12324 %}
12325 
12326 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12327 %{
12328   predicate(VM_Version::supports_bmi2());
12329   match(Set dst (RShiftL src shift));
12330 
12331   format %{ "sarxq   $dst, $src, $shift" %}
12332   ins_encode %{
12333     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12334   %}
12335   ins_pipe(ialu_reg_reg);
12336 %}
12337 
12338 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12339 %{
12340   predicate(VM_Version::supports_bmi2());
12341   match(Set dst (RShiftL (LoadL src) shift));
12342   ins_cost(175);
12343   format %{ "sarxq   $dst, $src, $shift" %}
12344   ins_encode %{
12345     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12346   %}
12347   ins_pipe(ialu_reg_mem);
12348 %}
12349 
12350 // Logical Shift Right by 8-bit immediate
12351 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12352 %{
12353   predicate(!UseAPX);
12354   match(Set dst (URShiftL dst shift));
12355   effect(KILL cr);
12356 
12357   format %{ "shrq    $dst, $shift" %}
12358   ins_encode %{
12359     __ shrq($dst$$Register, $shift$$constant);
12360   %}
12361   ins_pipe(ialu_reg);
12362 %}
12363 
12364 // Logical Shift Right by 8-bit immediate
12365 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12366 %{
12367   predicate(UseAPX);
12368   match(Set dst (URShiftL src shift));
12369   effect(KILL cr);
12370 
12371   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12372   ins_encode %{
12373     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12374   %}
12375   ins_pipe(ialu_reg);
12376 %}
12377 
12378 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12379 %{
12380   predicate(UseAPX);
12381   match(Set dst (URShiftL (LoadL src) shift));
12382   effect(KILL cr);
12383 
12384   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12385   ins_encode %{
12386     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12387   %}
12388   ins_pipe(ialu_reg);
12389 %}
12390 
12391 // Logical Shift Right by 8-bit immediate
12392 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12393 %{
12394   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12395   effect(KILL cr);
12396 
12397   format %{ "shrq    $dst, $shift" %}
12398   ins_encode %{
12399     __ shrq($dst$$Address, $shift$$constant);
12400   %}
12401   ins_pipe(ialu_mem_imm);
12402 %}
12403 
12404 // Logical Shift Right by variable
12405 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12406 %{
12407   predicate(!VM_Version::supports_bmi2());
12408   match(Set dst (URShiftL dst shift));
12409   effect(KILL cr);
12410 
12411   format %{ "shrq    $dst, $shift" %}
12412   ins_encode %{
12413     __ shrq($dst$$Register);
12414   %}
12415   ins_pipe(ialu_reg_reg);
12416 %}
12417 
12418 // Logical Shift Right by variable
12419 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12420 %{
12421   predicate(!VM_Version::supports_bmi2());
12422   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12423   effect(KILL cr);
12424 
12425   format %{ "shrq    $dst, $shift" %}
12426   ins_encode %{
12427     __ shrq($dst$$Address);
12428   %}
12429   ins_pipe(ialu_mem_reg);
12430 %}
12431 
12432 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12433 %{
12434   predicate(VM_Version::supports_bmi2());
12435   match(Set dst (URShiftL src shift));
12436 
12437   format %{ "shrxq   $dst, $src, $shift" %}
12438   ins_encode %{
12439     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12440   %}
12441   ins_pipe(ialu_reg_reg);
12442 %}
12443 
12444 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12445 %{
12446   predicate(VM_Version::supports_bmi2());
12447   match(Set dst (URShiftL (LoadL src) shift));
12448   ins_cost(175);
12449   format %{ "shrxq   $dst, $src, $shift" %}
12450   ins_encode %{
12451     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12452   %}
12453   ins_pipe(ialu_reg_mem);
12454 %}
12455 
12456 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12457 // This idiom is used by the compiler for the i2b bytecode.
12458 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12459 %{
12460   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12461 
12462   format %{ "movsbl  $dst, $src\t# i2b" %}
12463   ins_encode %{
12464     __ movsbl($dst$$Register, $src$$Register);
12465   %}
12466   ins_pipe(ialu_reg_reg);
12467 %}
12468 
12469 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12470 // This idiom is used by the compiler the i2s bytecode.
12471 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12472 %{
12473   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12474 
12475   format %{ "movswl  $dst, $src\t# i2s" %}
12476   ins_encode %{
12477     __ movswl($dst$$Register, $src$$Register);
12478   %}
12479   ins_pipe(ialu_reg_reg);
12480 %}
12481 
12482 // ROL/ROR instructions
12483 
12484 // Rotate left by constant.
12485 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12486 %{
12487   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12488   match(Set dst (RotateLeft dst shift));
12489   effect(KILL cr);
12490   format %{ "roll    $dst, $shift" %}
12491   ins_encode %{
12492     __ roll($dst$$Register, $shift$$constant);
12493   %}
12494   ins_pipe(ialu_reg);
12495 %}
12496 
12497 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12498 %{
12499   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12500   match(Set dst (RotateLeft src shift));
12501   format %{ "rolxl   $dst, $src, $shift" %}
12502   ins_encode %{
12503     int shift = 32 - ($shift$$constant & 31);
12504     __ rorxl($dst$$Register, $src$$Register, shift);
12505   %}
12506   ins_pipe(ialu_reg_reg);
12507 %}
12508 
12509 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12510 %{
12511   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12512   match(Set dst (RotateLeft (LoadI src) shift));
12513   ins_cost(175);
12514   format %{ "rolxl   $dst, $src, $shift" %}
12515   ins_encode %{
12516     int shift = 32 - ($shift$$constant & 31);
12517     __ rorxl($dst$$Register, $src$$Address, shift);
12518   %}
12519   ins_pipe(ialu_reg_mem);
12520 %}
12521 
12522 // Rotate Left by variable
12523 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12524 %{
12525   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12526   match(Set dst (RotateLeft dst shift));
12527   effect(KILL cr);
12528   format %{ "roll    $dst, $shift" %}
12529   ins_encode %{
12530     __ roll($dst$$Register);
12531   %}
12532   ins_pipe(ialu_reg_reg);
12533 %}
12534 
12535 // Rotate Left by variable
12536 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12537 %{
12538   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12539   match(Set dst (RotateLeft src shift));
12540   effect(KILL cr);
12541 
12542   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12543   ins_encode %{
12544     __ eroll($dst$$Register, $src$$Register, false);
12545   %}
12546   ins_pipe(ialu_reg_reg);
12547 %}
12548 
12549 // Rotate Right by constant.
12550 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12551 %{
12552   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12553   match(Set dst (RotateRight dst shift));
12554   effect(KILL cr);
12555   format %{ "rorl    $dst, $shift" %}
12556   ins_encode %{
12557     __ rorl($dst$$Register, $shift$$constant);
12558   %}
12559   ins_pipe(ialu_reg);
12560 %}
12561 
12562 // Rotate Right by constant.
12563 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12564 %{
12565   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12566   match(Set dst (RotateRight src shift));
12567   format %{ "rorxl   $dst, $src, $shift" %}
12568   ins_encode %{
12569     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12570   %}
12571   ins_pipe(ialu_reg_reg);
12572 %}
12573 
12574 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12575 %{
12576   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12577   match(Set dst (RotateRight (LoadI src) shift));
12578   ins_cost(175);
12579   format %{ "rorxl   $dst, $src, $shift" %}
12580   ins_encode %{
12581     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12582   %}
12583   ins_pipe(ialu_reg_mem);
12584 %}
12585 
12586 // Rotate Right by variable
12587 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12588 %{
12589   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12590   match(Set dst (RotateRight dst shift));
12591   effect(KILL cr);
12592   format %{ "rorl    $dst, $shift" %}
12593   ins_encode %{
12594     __ rorl($dst$$Register);
12595   %}
12596   ins_pipe(ialu_reg_reg);
12597 %}
12598 
12599 // Rotate Right by variable
12600 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12601 %{
12602   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12603   match(Set dst (RotateRight src shift));
12604   effect(KILL cr);
12605 
12606   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12607   ins_encode %{
12608     __ erorl($dst$$Register, $src$$Register, false);
12609   %}
12610   ins_pipe(ialu_reg_reg);
12611 %}
12612 
12613 // Rotate Left by constant.
12614 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12615 %{
12616   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12617   match(Set dst (RotateLeft dst shift));
12618   effect(KILL cr);
12619   format %{ "rolq    $dst, $shift" %}
12620   ins_encode %{
12621     __ rolq($dst$$Register, $shift$$constant);
12622   %}
12623   ins_pipe(ialu_reg);
12624 %}
12625 
12626 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12627 %{
12628   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12629   match(Set dst (RotateLeft src shift));
12630   format %{ "rolxq   $dst, $src, $shift" %}
12631   ins_encode %{
12632     int shift = 64 - ($shift$$constant & 63);
12633     __ rorxq($dst$$Register, $src$$Register, shift);
12634   %}
12635   ins_pipe(ialu_reg_reg);
12636 %}
12637 
12638 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12639 %{
12640   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12641   match(Set dst (RotateLeft (LoadL src) shift));
12642   ins_cost(175);
12643   format %{ "rolxq   $dst, $src, $shift" %}
12644   ins_encode %{
12645     int shift = 64 - ($shift$$constant & 63);
12646     __ rorxq($dst$$Register, $src$$Address, shift);
12647   %}
12648   ins_pipe(ialu_reg_mem);
12649 %}
12650 
12651 // Rotate Left by variable
12652 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12653 %{
12654   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12655   match(Set dst (RotateLeft dst shift));
12656   effect(KILL cr);
12657   format %{ "rolq    $dst, $shift" %}
12658   ins_encode %{
12659     __ rolq($dst$$Register);
12660   %}
12661   ins_pipe(ialu_reg_reg);
12662 %}
12663 
12664 // Rotate Left by variable
12665 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12666 %{
12667   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12668   match(Set dst (RotateLeft src shift));
12669   effect(KILL cr);
12670 
12671   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12672   ins_encode %{
12673     __ erolq($dst$$Register, $src$$Register, false);
12674   %}
12675   ins_pipe(ialu_reg_reg);
12676 %}
12677 
12678 // Rotate Right by constant.
12679 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12680 %{
12681   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12682   match(Set dst (RotateRight dst shift));
12683   effect(KILL cr);
12684   format %{ "rorq    $dst, $shift" %}
12685   ins_encode %{
12686     __ rorq($dst$$Register, $shift$$constant);
12687   %}
12688   ins_pipe(ialu_reg);
12689 %}
12690 
12691 // Rotate Right by constant
12692 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12693 %{
12694   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12695   match(Set dst (RotateRight src shift));
12696   format %{ "rorxq   $dst, $src, $shift" %}
12697   ins_encode %{
12698     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12699   %}
12700   ins_pipe(ialu_reg_reg);
12701 %}
12702 
12703 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12704 %{
12705   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12706   match(Set dst (RotateRight (LoadL src) shift));
12707   ins_cost(175);
12708   format %{ "rorxq   $dst, $src, $shift" %}
12709   ins_encode %{
12710     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12711   %}
12712   ins_pipe(ialu_reg_mem);
12713 %}
12714 
12715 // Rotate Right by variable
12716 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12717 %{
12718   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12719   match(Set dst (RotateRight dst shift));
12720   effect(KILL cr);
12721   format %{ "rorq    $dst, $shift" %}
12722   ins_encode %{
12723     __ rorq($dst$$Register);
12724   %}
12725   ins_pipe(ialu_reg_reg);
12726 %}
12727 
12728 // Rotate Right by variable
12729 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12730 %{
12731   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12732   match(Set dst (RotateRight src shift));
12733   effect(KILL cr);
12734 
12735   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12736   ins_encode %{
12737     __ erorq($dst$$Register, $src$$Register, false);
12738   %}
12739   ins_pipe(ialu_reg_reg);
12740 %}
12741 
12742 //----------------------------- CompressBits/ExpandBits ------------------------
12743 
12744 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12745   predicate(n->bottom_type()->isa_long());
12746   match(Set dst (CompressBits src mask));
12747   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12748   ins_encode %{
12749     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12750   %}
12751   ins_pipe( pipe_slow );
12752 %}
12753 
12754 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12755   predicate(n->bottom_type()->isa_long());
12756   match(Set dst (ExpandBits src mask));
12757   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12758   ins_encode %{
12759     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12760   %}
12761   ins_pipe( pipe_slow );
12762 %}
12763 
12764 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12765   predicate(n->bottom_type()->isa_long());
12766   match(Set dst (CompressBits src (LoadL mask)));
12767   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12768   ins_encode %{
12769     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12770   %}
12771   ins_pipe( pipe_slow );
12772 %}
12773 
12774 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12775   predicate(n->bottom_type()->isa_long());
12776   match(Set dst (ExpandBits src (LoadL mask)));
12777   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12778   ins_encode %{
12779     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12780   %}
12781   ins_pipe( pipe_slow );
12782 %}
12783 
12784 
12785 // Logical Instructions
12786 
12787 // Integer Logical Instructions
12788 
12789 // And Instructions
12790 // And Register with Register
12791 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12792 %{
12793   predicate(!UseAPX);
12794   match(Set dst (AndI dst src));
12795   effect(KILL cr);
12796   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12797 
12798   format %{ "andl    $dst, $src\t# int" %}
12799   ins_encode %{
12800     __ andl($dst$$Register, $src$$Register);
12801   %}
12802   ins_pipe(ialu_reg_reg);
12803 %}
12804 
12805 // And Register with Register using New Data Destination (NDD)
12806 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12807 %{
12808   predicate(UseAPX);
12809   match(Set dst (AndI src1 src2));
12810   effect(KILL cr);
12811   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12812 
12813   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12814   ins_encode %{
12815     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12816 
12817   %}
12818   ins_pipe(ialu_reg_reg);
12819 %}
12820 
12821 // And Register with Immediate 255
12822 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12823 %{
12824   match(Set dst (AndI src mask));
12825 
12826   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12827   ins_encode %{
12828     __ movzbl($dst$$Register, $src$$Register);
12829   %}
12830   ins_pipe(ialu_reg);
12831 %}
12832 
12833 // And Register with Immediate 255 and promote to long
12834 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12835 %{
12836   match(Set dst (ConvI2L (AndI src mask)));
12837 
12838   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12839   ins_encode %{
12840     __ movzbl($dst$$Register, $src$$Register);
12841   %}
12842   ins_pipe(ialu_reg);
12843 %}
12844 
12845 // And Register with Immediate 65535
12846 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12847 %{
12848   match(Set dst (AndI src mask));
12849 
12850   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12851   ins_encode %{
12852     __ movzwl($dst$$Register, $src$$Register);
12853   %}
12854   ins_pipe(ialu_reg);
12855 %}
12856 
12857 // And Register with Immediate 65535 and promote to long
12858 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12859 %{
12860   match(Set dst (ConvI2L (AndI src mask)));
12861 
12862   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12863   ins_encode %{
12864     __ movzwl($dst$$Register, $src$$Register);
12865   %}
12866   ins_pipe(ialu_reg);
12867 %}
12868 
12869 // Can skip int2long conversions after AND with small bitmask
12870 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12871 %{
12872   predicate(VM_Version::supports_bmi2());
12873   ins_cost(125);
12874   effect(TEMP tmp, KILL cr);
12875   match(Set dst (ConvI2L (AndI src mask)));
12876   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12877   ins_encode %{
12878     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12879     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12880   %}
12881   ins_pipe(ialu_reg_reg);
12882 %}
12883 
12884 // And Register with Immediate
12885 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12886 %{
12887   predicate(!UseAPX);
12888   match(Set dst (AndI dst src));
12889   effect(KILL cr);
12890   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12891 
12892   format %{ "andl    $dst, $src\t# int" %}
12893   ins_encode %{
12894     __ andl($dst$$Register, $src$$constant);
12895   %}
12896   ins_pipe(ialu_reg);
12897 %}
12898 
12899 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12900 %{
12901   predicate(UseAPX);
12902   match(Set dst (AndI src1 src2));
12903   effect(KILL cr);
12904   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12905 
12906   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12907   ins_encode %{
12908     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12909   %}
12910   ins_pipe(ialu_reg);
12911 %}
12912 
12913 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12914 %{
12915   predicate(UseAPX);
12916   match(Set dst (AndI (LoadI src1) src2));
12917   effect(KILL cr);
12918   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12919 
12920   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12921   ins_encode %{
12922     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12923   %}
12924   ins_pipe(ialu_reg);
12925 %}
12926 
12927 // And Register with Memory
12928 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12929 %{
12930   predicate(!UseAPX);
12931   match(Set dst (AndI dst (LoadI src)));
12932   effect(KILL cr);
12933   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12934 
12935   ins_cost(150);
12936   format %{ "andl    $dst, $src\t# int" %}
12937   ins_encode %{
12938     __ andl($dst$$Register, $src$$Address);
12939   %}
12940   ins_pipe(ialu_reg_mem);
12941 %}
12942 
12943 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12944 %{
12945   predicate(UseAPX);
12946   match(Set dst (AndI src1 (LoadI src2)));
12947   effect(KILL cr);
12948   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12949 
12950   ins_cost(150);
12951   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12952   ins_encode %{
12953     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12954   %}
12955   ins_pipe(ialu_reg_mem);
12956 %}
12957 
12958 // And Memory with Register
12959 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12960 %{
12961   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12962   effect(KILL cr);
12963   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12964 
12965   ins_cost(150);
12966   format %{ "andb    $dst, $src\t# byte" %}
12967   ins_encode %{
12968     __ andb($dst$$Address, $src$$Register);
12969   %}
12970   ins_pipe(ialu_mem_reg);
12971 %}
12972 
12973 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12974 %{
12975   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12976   effect(KILL cr);
12977   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12978 
12979   ins_cost(150);
12980   format %{ "andl    $dst, $src\t# int" %}
12981   ins_encode %{
12982     __ andl($dst$$Address, $src$$Register);
12983   %}
12984   ins_pipe(ialu_mem_reg);
12985 %}
12986 
12987 // And Memory with Immediate
12988 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12989 %{
12990   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12991   effect(KILL cr);
12992   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12993 
12994   ins_cost(125);
12995   format %{ "andl    $dst, $src\t# int" %}
12996   ins_encode %{
12997     __ andl($dst$$Address, $src$$constant);
12998   %}
12999   ins_pipe(ialu_mem_imm);
13000 %}
13001 
13002 // BMI1 instructions
13003 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13004   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13005   predicate(UseBMI1Instructions);
13006   effect(KILL cr);
13007   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13008 
13009   ins_cost(125);
13010   format %{ "andnl  $dst, $src1, $src2" %}
13011 
13012   ins_encode %{
13013     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13014   %}
13015   ins_pipe(ialu_reg_mem);
13016 %}
13017 
13018 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13019   match(Set dst (AndI (XorI src1 minus_1) src2));
13020   predicate(UseBMI1Instructions);
13021   effect(KILL cr);
13022   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023 
13024   format %{ "andnl  $dst, $src1, $src2" %}
13025 
13026   ins_encode %{
13027     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13028   %}
13029   ins_pipe(ialu_reg);
13030 %}
13031 
13032 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13033   match(Set dst (AndI (SubI imm_zero src) src));
13034   predicate(UseBMI1Instructions);
13035   effect(KILL cr);
13036   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13037 
13038   format %{ "blsil  $dst, $src" %}
13039 
13040   ins_encode %{
13041     __ blsil($dst$$Register, $src$$Register);
13042   %}
13043   ins_pipe(ialu_reg);
13044 %}
13045 
13046 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13047   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13048   predicate(UseBMI1Instructions);
13049   effect(KILL cr);
13050   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13051 
13052   ins_cost(125);
13053   format %{ "blsil  $dst, $src" %}
13054 
13055   ins_encode %{
13056     __ blsil($dst$$Register, $src$$Address);
13057   %}
13058   ins_pipe(ialu_reg_mem);
13059 %}
13060 
13061 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13062 %{
13063   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13064   predicate(UseBMI1Instructions);
13065   effect(KILL cr);
13066   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13067 
13068   ins_cost(125);
13069   format %{ "blsmskl $dst, $src" %}
13070 
13071   ins_encode %{
13072     __ blsmskl($dst$$Register, $src$$Address);
13073   %}
13074   ins_pipe(ialu_reg_mem);
13075 %}
13076 
13077 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13078 %{
13079   match(Set dst (XorI (AddI src minus_1) src));
13080   predicate(UseBMI1Instructions);
13081   effect(KILL cr);
13082   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13083 
13084   format %{ "blsmskl $dst, $src" %}
13085 
13086   ins_encode %{
13087     __ blsmskl($dst$$Register, $src$$Register);
13088   %}
13089 
13090   ins_pipe(ialu_reg);
13091 %}
13092 
13093 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13094 %{
13095   match(Set dst (AndI (AddI src minus_1) src) );
13096   predicate(UseBMI1Instructions);
13097   effect(KILL cr);
13098   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13099 
13100   format %{ "blsrl  $dst, $src" %}
13101 
13102   ins_encode %{
13103     __ blsrl($dst$$Register, $src$$Register);
13104   %}
13105 
13106   ins_pipe(ialu_reg_mem);
13107 %}
13108 
13109 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13110 %{
13111   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13112   predicate(UseBMI1Instructions);
13113   effect(KILL cr);
13114   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13115 
13116   ins_cost(125);
13117   format %{ "blsrl  $dst, $src" %}
13118 
13119   ins_encode %{
13120     __ blsrl($dst$$Register, $src$$Address);
13121   %}
13122 
13123   ins_pipe(ialu_reg);
13124 %}
13125 
13126 // Or Instructions
13127 // Or Register with Register
13128 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13129 %{
13130   predicate(!UseAPX);
13131   match(Set dst (OrI dst src));
13132   effect(KILL cr);
13133   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13134 
13135   format %{ "orl     $dst, $src\t# int" %}
13136   ins_encode %{
13137     __ orl($dst$$Register, $src$$Register);
13138   %}
13139   ins_pipe(ialu_reg_reg);
13140 %}
13141 
13142 // Or Register with Register using New Data Destination (NDD)
13143 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13144 %{
13145   predicate(UseAPX);
13146   match(Set dst (OrI src1 src2));
13147   effect(KILL cr);
13148   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13149 
13150   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13151   ins_encode %{
13152     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13153   %}
13154   ins_pipe(ialu_reg_reg);
13155 %}
13156 
13157 // Or Register with Immediate
13158 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13159 %{
13160   predicate(!UseAPX);
13161   match(Set dst (OrI dst src));
13162   effect(KILL cr);
13163   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13164 
13165   format %{ "orl     $dst, $src\t# int" %}
13166   ins_encode %{
13167     __ orl($dst$$Register, $src$$constant);
13168   %}
13169   ins_pipe(ialu_reg);
13170 %}
13171 
13172 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13173 %{
13174   predicate(UseAPX);
13175   match(Set dst (OrI src1 src2));
13176   effect(KILL cr);
13177   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13178 
13179   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13180   ins_encode %{
13181     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13182   %}
13183   ins_pipe(ialu_reg);
13184 %}
13185 
13186 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13187 %{
13188   predicate(UseAPX);
13189   match(Set dst (OrI src1 src2));
13190   effect(KILL cr);
13191   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13192 
13193   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13194   ins_encode %{
13195     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13196   %}
13197   ins_pipe(ialu_reg);
13198 %}
13199 
13200 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13201 %{
13202   predicate(UseAPX);
13203   match(Set dst (OrI (LoadI src1) src2));
13204   effect(KILL cr);
13205   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13206 
13207   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13208   ins_encode %{
13209     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13210   %}
13211   ins_pipe(ialu_reg);
13212 %}
13213 
13214 // Or Register with Memory
13215 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13216 %{
13217   predicate(!UseAPX);
13218   match(Set dst (OrI dst (LoadI src)));
13219   effect(KILL cr);
13220   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13221 
13222   ins_cost(150);
13223   format %{ "orl     $dst, $src\t# int" %}
13224   ins_encode %{
13225     __ orl($dst$$Register, $src$$Address);
13226   %}
13227   ins_pipe(ialu_reg_mem);
13228 %}
13229 
13230 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13231 %{
13232   predicate(UseAPX);
13233   match(Set dst (OrI src1 (LoadI src2)));
13234   effect(KILL cr);
13235   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13236 
13237   ins_cost(150);
13238   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13239   ins_encode %{
13240     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13241   %}
13242   ins_pipe(ialu_reg_mem);
13243 %}
13244 
13245 // Or Memory with Register
13246 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13247 %{
13248   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13249   effect(KILL cr);
13250   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13251 
13252   ins_cost(150);
13253   format %{ "orb    $dst, $src\t# byte" %}
13254   ins_encode %{
13255     __ orb($dst$$Address, $src$$Register);
13256   %}
13257   ins_pipe(ialu_mem_reg);
13258 %}
13259 
13260 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13261 %{
13262   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13263   effect(KILL cr);
13264   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13265 
13266   ins_cost(150);
13267   format %{ "orl     $dst, $src\t# int" %}
13268   ins_encode %{
13269     __ orl($dst$$Address, $src$$Register);
13270   %}
13271   ins_pipe(ialu_mem_reg);
13272 %}
13273 
13274 // Or Memory with Immediate
13275 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13276 %{
13277   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13278   effect(KILL cr);
13279   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13280 
13281   ins_cost(125);
13282   format %{ "orl     $dst, $src\t# int" %}
13283   ins_encode %{
13284     __ orl($dst$$Address, $src$$constant);
13285   %}
13286   ins_pipe(ialu_mem_imm);
13287 %}
13288 
13289 // Xor Instructions
13290 // Xor Register with Register
13291 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13292 %{
13293   predicate(!UseAPX);
13294   match(Set dst (XorI dst src));
13295   effect(KILL cr);
13296   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13297 
13298   format %{ "xorl    $dst, $src\t# int" %}
13299   ins_encode %{
13300     __ xorl($dst$$Register, $src$$Register);
13301   %}
13302   ins_pipe(ialu_reg_reg);
13303 %}
13304 
13305 // Xor Register with Register using New Data Destination (NDD)
13306 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13307 %{
13308   predicate(UseAPX);
13309   match(Set dst (XorI src1 src2));
13310   effect(KILL cr);
13311   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13312 
13313   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13314   ins_encode %{
13315     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13316   %}
13317   ins_pipe(ialu_reg_reg);
13318 %}
13319 
13320 // Xor Register with Immediate -1
13321 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13322 %{
13323   predicate(!UseAPX);
13324   match(Set dst (XorI dst imm));
13325 
13326   format %{ "notl    $dst" %}
13327   ins_encode %{
13328      __ notl($dst$$Register);
13329   %}
13330   ins_pipe(ialu_reg);
13331 %}
13332 
13333 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13334 %{
13335   match(Set dst (XorI src imm));
13336   predicate(UseAPX);
13337 
13338   format %{ "enotl    $dst, $src" %}
13339   ins_encode %{
13340      __ enotl($dst$$Register, $src$$Register);
13341   %}
13342   ins_pipe(ialu_reg);
13343 %}
13344 
13345 // Xor Register with Immediate
13346 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13347 %{
13348   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13349   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13350   match(Set dst (XorI dst src));
13351   effect(KILL cr);
13352   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13353 
13354   format %{ "xorl    $dst, $src\t# int" %}
13355   ins_encode %{
13356     __ xorl($dst$$Register, $src$$constant);
13357   %}
13358   ins_pipe(ialu_reg);
13359 %}
13360 
13361 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13362 %{
13363   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13364   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13365   match(Set dst (XorI src1 src2));
13366   effect(KILL cr);
13367   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13368 
13369   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13370   ins_encode %{
13371     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13372   %}
13373   ins_pipe(ialu_reg);
13374 %}
13375 
13376 // Xor Memory with Immediate
13377 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13378 %{
13379   predicate(UseAPX);
13380   match(Set dst (XorI (LoadI src1) src2));
13381   effect(KILL cr);
13382   ins_cost(150);
13383   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384 
13385   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13386   ins_encode %{
13387     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13388   %}
13389   ins_pipe(ialu_reg);
13390 %}
13391 
13392 // Xor Register with Memory
13393 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13394 %{
13395   predicate(!UseAPX);
13396   match(Set dst (XorI dst (LoadI src)));
13397   effect(KILL cr);
13398   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13399 
13400   ins_cost(150);
13401   format %{ "xorl    $dst, $src\t# int" %}
13402   ins_encode %{
13403     __ xorl($dst$$Register, $src$$Address);
13404   %}
13405   ins_pipe(ialu_reg_mem);
13406 %}
13407 
13408 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13409 %{
13410   predicate(UseAPX);
13411   match(Set dst (XorI src1 (LoadI src2)));
13412   effect(KILL cr);
13413   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414 
13415   ins_cost(150);
13416   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13417   ins_encode %{
13418     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13419   %}
13420   ins_pipe(ialu_reg_mem);
13421 %}
13422 
13423 // Xor Memory with Register
13424 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13425 %{
13426   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13427   effect(KILL cr);
13428   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13429 
13430   ins_cost(150);
13431   format %{ "xorb    $dst, $src\t# byte" %}
13432   ins_encode %{
13433     __ xorb($dst$$Address, $src$$Register);
13434   %}
13435   ins_pipe(ialu_mem_reg);
13436 %}
13437 
13438 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13439 %{
13440   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13441   effect(KILL cr);
13442   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443 
13444   ins_cost(150);
13445   format %{ "xorl    $dst, $src\t# int" %}
13446   ins_encode %{
13447     __ xorl($dst$$Address, $src$$Register);
13448   %}
13449   ins_pipe(ialu_mem_reg);
13450 %}
13451 
13452 // Xor Memory with Immediate
13453 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13454 %{
13455   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13456   effect(KILL cr);
13457   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13458 
13459   ins_cost(125);
13460   format %{ "xorl    $dst, $src\t# int" %}
13461   ins_encode %{
13462     __ xorl($dst$$Address, $src$$constant);
13463   %}
13464   ins_pipe(ialu_mem_imm);
13465 %}
13466 
13467 
13468 // Long Logical Instructions
13469 
13470 // And Instructions
13471 // And Register with Register
13472 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13473 %{
13474   predicate(!UseAPX);
13475   match(Set dst (AndL dst src));
13476   effect(KILL cr);
13477   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13478 
13479   format %{ "andq    $dst, $src\t# long" %}
13480   ins_encode %{
13481     __ andq($dst$$Register, $src$$Register);
13482   %}
13483   ins_pipe(ialu_reg_reg);
13484 %}
13485 
13486 // And Register with Register using New Data Destination (NDD)
13487 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13488 %{
13489   predicate(UseAPX);
13490   match(Set dst (AndL src1 src2));
13491   effect(KILL cr);
13492   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13493 
13494   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13495   ins_encode %{
13496     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13497 
13498   %}
13499   ins_pipe(ialu_reg_reg);
13500 %}
13501 
13502 // And Register with Immediate 255
13503 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13504 %{
13505   match(Set dst (AndL src mask));
13506 
13507   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13508   ins_encode %{
13509     // movzbl zeroes out the upper 32-bit and does not need REX.W
13510     __ movzbl($dst$$Register, $src$$Register);
13511   %}
13512   ins_pipe(ialu_reg);
13513 %}
13514 
13515 // And Register with Immediate 65535
13516 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13517 %{
13518   match(Set dst (AndL src mask));
13519 
13520   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13521   ins_encode %{
13522     // movzwl zeroes out the upper 32-bit and does not need REX.W
13523     __ movzwl($dst$$Register, $src$$Register);
13524   %}
13525   ins_pipe(ialu_reg);
13526 %}
13527 
13528 // And Register with Immediate
13529 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13530 %{
13531   predicate(!UseAPX);
13532   match(Set dst (AndL dst src));
13533   effect(KILL cr);
13534   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13535 
13536   format %{ "andq    $dst, $src\t# long" %}
13537   ins_encode %{
13538     __ andq($dst$$Register, $src$$constant);
13539   %}
13540   ins_pipe(ialu_reg);
13541 %}
13542 
13543 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13544 %{
13545   predicate(UseAPX);
13546   match(Set dst (AndL src1 src2));
13547   effect(KILL cr);
13548   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13549 
13550   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13551   ins_encode %{
13552     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13553   %}
13554   ins_pipe(ialu_reg);
13555 %}
13556 
13557 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13558 %{
13559   predicate(UseAPX);
13560   match(Set dst (AndL (LoadL src1) src2));
13561   effect(KILL cr);
13562   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13563 
13564   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13565   ins_encode %{
13566     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13567   %}
13568   ins_pipe(ialu_reg);
13569 %}
13570 
13571 // And Register with Memory
13572 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13573 %{
13574   predicate(!UseAPX);
13575   match(Set dst (AndL dst (LoadL src)));
13576   effect(KILL cr);
13577   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13578 
13579   ins_cost(150);
13580   format %{ "andq    $dst, $src\t# long" %}
13581   ins_encode %{
13582     __ andq($dst$$Register, $src$$Address);
13583   %}
13584   ins_pipe(ialu_reg_mem);
13585 %}
13586 
13587 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13588 %{
13589   predicate(UseAPX);
13590   match(Set dst (AndL src1 (LoadL src2)));
13591   effect(KILL cr);
13592   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593 
13594   ins_cost(150);
13595   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13596   ins_encode %{
13597     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13598   %}
13599   ins_pipe(ialu_reg_mem);
13600 %}
13601 
13602 // And Memory with Register
13603 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13604 %{
13605   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13606   effect(KILL cr);
13607   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13608 
13609   ins_cost(150);
13610   format %{ "andq    $dst, $src\t# long" %}
13611   ins_encode %{
13612     __ andq($dst$$Address, $src$$Register);
13613   %}
13614   ins_pipe(ialu_mem_reg);
13615 %}
13616 
13617 // And Memory with Immediate
13618 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13619 %{
13620   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13621   effect(KILL cr);
13622   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13623 
13624   ins_cost(125);
13625   format %{ "andq    $dst, $src\t# long" %}
13626   ins_encode %{
13627     __ andq($dst$$Address, $src$$constant);
13628   %}
13629   ins_pipe(ialu_mem_imm);
13630 %}
13631 
13632 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13633 %{
13634   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13635   // because AND/OR works well enough for 8/32-bit values.
13636   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13637 
13638   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13639   effect(KILL cr);
13640 
13641   ins_cost(125);
13642   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13643   ins_encode %{
13644     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13645   %}
13646   ins_pipe(ialu_mem_imm);
13647 %}
13648 
13649 // BMI1 instructions
13650 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13651   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13652   predicate(UseBMI1Instructions);
13653   effect(KILL cr);
13654   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13655 
13656   ins_cost(125);
13657   format %{ "andnq  $dst, $src1, $src2" %}
13658 
13659   ins_encode %{
13660     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13661   %}
13662   ins_pipe(ialu_reg_mem);
13663 %}
13664 
13665 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13666   match(Set dst (AndL (XorL src1 minus_1) src2));
13667   predicate(UseBMI1Instructions);
13668   effect(KILL cr);
13669   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13670 
13671   format %{ "andnq  $dst, $src1, $src2" %}
13672 
13673   ins_encode %{
13674   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13675   %}
13676   ins_pipe(ialu_reg_mem);
13677 %}
13678 
13679 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13680   match(Set dst (AndL (SubL imm_zero src) src));
13681   predicate(UseBMI1Instructions);
13682   effect(KILL cr);
13683   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13684 
13685   format %{ "blsiq  $dst, $src" %}
13686 
13687   ins_encode %{
13688     __ blsiq($dst$$Register, $src$$Register);
13689   %}
13690   ins_pipe(ialu_reg);
13691 %}
13692 
13693 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13694   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13695   predicate(UseBMI1Instructions);
13696   effect(KILL cr);
13697   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13698 
13699   ins_cost(125);
13700   format %{ "blsiq  $dst, $src" %}
13701 
13702   ins_encode %{
13703     __ blsiq($dst$$Register, $src$$Address);
13704   %}
13705   ins_pipe(ialu_reg_mem);
13706 %}
13707 
13708 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13709 %{
13710   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13711   predicate(UseBMI1Instructions);
13712   effect(KILL cr);
13713   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13714 
13715   ins_cost(125);
13716   format %{ "blsmskq $dst, $src" %}
13717 
13718   ins_encode %{
13719     __ blsmskq($dst$$Register, $src$$Address);
13720   %}
13721   ins_pipe(ialu_reg_mem);
13722 %}
13723 
13724 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13725 %{
13726   match(Set dst (XorL (AddL src minus_1) src));
13727   predicate(UseBMI1Instructions);
13728   effect(KILL cr);
13729   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13730 
13731   format %{ "blsmskq $dst, $src" %}
13732 
13733   ins_encode %{
13734     __ blsmskq($dst$$Register, $src$$Register);
13735   %}
13736 
13737   ins_pipe(ialu_reg);
13738 %}
13739 
13740 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13741 %{
13742   match(Set dst (AndL (AddL src minus_1) src) );
13743   predicate(UseBMI1Instructions);
13744   effect(KILL cr);
13745   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13746 
13747   format %{ "blsrq  $dst, $src" %}
13748 
13749   ins_encode %{
13750     __ blsrq($dst$$Register, $src$$Register);
13751   %}
13752 
13753   ins_pipe(ialu_reg);
13754 %}
13755 
13756 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13757 %{
13758   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13759   predicate(UseBMI1Instructions);
13760   effect(KILL cr);
13761   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13762 
13763   ins_cost(125);
13764   format %{ "blsrq  $dst, $src" %}
13765 
13766   ins_encode %{
13767     __ blsrq($dst$$Register, $src$$Address);
13768   %}
13769 
13770   ins_pipe(ialu_reg);
13771 %}
13772 
13773 // Or Instructions
13774 // Or Register with Register
13775 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13776 %{
13777   predicate(!UseAPX);
13778   match(Set dst (OrL dst src));
13779   effect(KILL cr);
13780   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13781 
13782   format %{ "orq     $dst, $src\t# long" %}
13783   ins_encode %{
13784     __ orq($dst$$Register, $src$$Register);
13785   %}
13786   ins_pipe(ialu_reg_reg);
13787 %}
13788 
13789 // Or Register with Register using New Data Destination (NDD)
13790 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13791 %{
13792   predicate(UseAPX);
13793   match(Set dst (OrL src1 src2));
13794   effect(KILL cr);
13795   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13796 
13797   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13798   ins_encode %{
13799     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13800 
13801   %}
13802   ins_pipe(ialu_reg_reg);
13803 %}
13804 
13805 // Use any_RegP to match R15 (TLS register) without spilling.
13806 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13807   match(Set dst (OrL dst (CastP2X src)));
13808   effect(KILL cr);
13809   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13810 
13811   format %{ "orq     $dst, $src\t# long" %}
13812   ins_encode %{
13813     __ orq($dst$$Register, $src$$Register);
13814   %}
13815   ins_pipe(ialu_reg_reg);
13816 %}
13817 
13818 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13819   match(Set dst (OrL src1 (CastP2X src2)));
13820   effect(KILL cr);
13821   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13822 
13823   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13824   ins_encode %{
13825     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13826   %}
13827   ins_pipe(ialu_reg_reg);
13828 %}
13829 
13830 // Or Register with Immediate
13831 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13832 %{
13833   predicate(!UseAPX);
13834   match(Set dst (OrL dst src));
13835   effect(KILL cr);
13836   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13837 
13838   format %{ "orq     $dst, $src\t# long" %}
13839   ins_encode %{
13840     __ orq($dst$$Register, $src$$constant);
13841   %}
13842   ins_pipe(ialu_reg);
13843 %}
13844 
13845 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13846 %{
13847   predicate(UseAPX);
13848   match(Set dst (OrL src1 src2));
13849   effect(KILL cr);
13850   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13851 
13852   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13853   ins_encode %{
13854     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13855   %}
13856   ins_pipe(ialu_reg);
13857 %}
13858 
13859 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13860 %{
13861   predicate(UseAPX);
13862   match(Set dst (OrL src1 src2));
13863   effect(KILL cr);
13864   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13865 
13866   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13867   ins_encode %{
13868     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13869   %}
13870   ins_pipe(ialu_reg);
13871 %}
13872 
13873 // Or Memory with Immediate
13874 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13875 %{
13876   predicate(UseAPX);
13877   match(Set dst (OrL (LoadL src1) src2));
13878   effect(KILL cr);
13879   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13880 
13881   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13882   ins_encode %{
13883     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13884   %}
13885   ins_pipe(ialu_reg);
13886 %}
13887 
13888 // Or Register with Memory
13889 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13890 %{
13891   predicate(!UseAPX);
13892   match(Set dst (OrL dst (LoadL src)));
13893   effect(KILL cr);
13894   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13895 
13896   ins_cost(150);
13897   format %{ "orq     $dst, $src\t# long" %}
13898   ins_encode %{
13899     __ orq($dst$$Register, $src$$Address);
13900   %}
13901   ins_pipe(ialu_reg_mem);
13902 %}
13903 
13904 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13905 %{
13906   predicate(UseAPX);
13907   match(Set dst (OrL src1 (LoadL src2)));
13908   effect(KILL cr);
13909   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13910 
13911   ins_cost(150);
13912   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13913   ins_encode %{
13914     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13915   %}
13916   ins_pipe(ialu_reg_mem);
13917 %}
13918 
13919 // Or Memory with Register
13920 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13921 %{
13922   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13923   effect(KILL cr);
13924   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13925 
13926   ins_cost(150);
13927   format %{ "orq     $dst, $src\t# long" %}
13928   ins_encode %{
13929     __ orq($dst$$Address, $src$$Register);
13930   %}
13931   ins_pipe(ialu_mem_reg);
13932 %}
13933 
13934 // Or Memory with Immediate
13935 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13936 %{
13937   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13938   effect(KILL cr);
13939   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13940 
13941   ins_cost(125);
13942   format %{ "orq     $dst, $src\t# long" %}
13943   ins_encode %{
13944     __ orq($dst$$Address, $src$$constant);
13945   %}
13946   ins_pipe(ialu_mem_imm);
13947 %}
13948 
13949 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13950 %{
13951   // con should be a pure 64-bit power of 2 immediate
13952   // because AND/OR works well enough for 8/32-bit values.
13953   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13954 
13955   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13956   effect(KILL cr);
13957 
13958   ins_cost(125);
13959   format %{ "btsq    $dst, log2($con)\t# long" %}
13960   ins_encode %{
13961     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13962   %}
13963   ins_pipe(ialu_mem_imm);
13964 %}
13965 
13966 // Xor Instructions
13967 // Xor Register with Register
13968 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13969 %{
13970   predicate(!UseAPX);
13971   match(Set dst (XorL dst src));
13972   effect(KILL cr);
13973   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13974 
13975   format %{ "xorq    $dst, $src\t# long" %}
13976   ins_encode %{
13977     __ xorq($dst$$Register, $src$$Register);
13978   %}
13979   ins_pipe(ialu_reg_reg);
13980 %}
13981 
13982 // Xor Register with Register using New Data Destination (NDD)
13983 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13984 %{
13985   predicate(UseAPX);
13986   match(Set dst (XorL src1 src2));
13987   effect(KILL cr);
13988   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13989 
13990   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13991   ins_encode %{
13992     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13993   %}
13994   ins_pipe(ialu_reg_reg);
13995 %}
13996 
13997 // Xor Register with Immediate -1
13998 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13999 %{
14000   predicate(!UseAPX);
14001   match(Set dst (XorL dst imm));
14002 
14003   format %{ "notq   $dst" %}
14004   ins_encode %{
14005      __ notq($dst$$Register);
14006   %}
14007   ins_pipe(ialu_reg);
14008 %}
14009 
14010 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14011 %{
14012   predicate(UseAPX);
14013   match(Set dst (XorL src imm));
14014 
14015   format %{ "enotq   $dst, $src" %}
14016   ins_encode %{
14017     __ enotq($dst$$Register, $src$$Register);
14018   %}
14019   ins_pipe(ialu_reg);
14020 %}
14021 
14022 // Xor Register with Immediate
14023 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14024 %{
14025   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14026   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14027   match(Set dst (XorL dst src));
14028   effect(KILL cr);
14029   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14030 
14031   format %{ "xorq    $dst, $src\t# long" %}
14032   ins_encode %{
14033     __ xorq($dst$$Register, $src$$constant);
14034   %}
14035   ins_pipe(ialu_reg);
14036 %}
14037 
14038 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14039 %{
14040   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14041   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14042   match(Set dst (XorL src1 src2));
14043   effect(KILL cr);
14044   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14045 
14046   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14047   ins_encode %{
14048     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14049   %}
14050   ins_pipe(ialu_reg);
14051 %}
14052 
14053 // Xor Memory with Immediate
14054 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14055 %{
14056   predicate(UseAPX);
14057   match(Set dst (XorL (LoadL src1) src2));
14058   effect(KILL cr);
14059   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14060   ins_cost(150);
14061 
14062   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14063   ins_encode %{
14064     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14065   %}
14066   ins_pipe(ialu_reg);
14067 %}
14068 
14069 // Xor Register with Memory
14070 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14071 %{
14072   predicate(!UseAPX);
14073   match(Set dst (XorL dst (LoadL src)));
14074   effect(KILL cr);
14075   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14076 
14077   ins_cost(150);
14078   format %{ "xorq    $dst, $src\t# long" %}
14079   ins_encode %{
14080     __ xorq($dst$$Register, $src$$Address);
14081   %}
14082   ins_pipe(ialu_reg_mem);
14083 %}
14084 
14085 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14086 %{
14087   predicate(UseAPX);
14088   match(Set dst (XorL src1 (LoadL src2)));
14089   effect(KILL cr);
14090   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14091 
14092   ins_cost(150);
14093   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14094   ins_encode %{
14095     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14096   %}
14097   ins_pipe(ialu_reg_mem);
14098 %}
14099 
14100 // Xor Memory with Register
14101 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14102 %{
14103   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14104   effect(KILL cr);
14105   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14106 
14107   ins_cost(150);
14108   format %{ "xorq    $dst, $src\t# long" %}
14109   ins_encode %{
14110     __ xorq($dst$$Address, $src$$Register);
14111   %}
14112   ins_pipe(ialu_mem_reg);
14113 %}
14114 
14115 // Xor Memory with Immediate
14116 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14117 %{
14118   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14119   effect(KILL cr);
14120   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14121 
14122   ins_cost(125);
14123   format %{ "xorq    $dst, $src\t# long" %}
14124   ins_encode %{
14125     __ xorq($dst$$Address, $src$$constant);
14126   %}
14127   ins_pipe(ialu_mem_imm);
14128 %}
14129 
14130 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14131 %{
14132   match(Set dst (CmpLTMask p q));
14133   effect(KILL cr);
14134 
14135   ins_cost(400);
14136   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14137             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14138             "negl    $dst" %}
14139   ins_encode %{
14140     __ cmpl($p$$Register, $q$$Register);
14141     __ setcc(Assembler::less, $dst$$Register);
14142     __ negl($dst$$Register);
14143   %}
14144   ins_pipe(pipe_slow);
14145 %}
14146 
14147 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14148 %{
14149   match(Set dst (CmpLTMask dst zero));
14150   effect(KILL cr);
14151 
14152   ins_cost(100);
14153   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14154   ins_encode %{
14155     __ sarl($dst$$Register, 31);
14156   %}
14157   ins_pipe(ialu_reg);
14158 %}
14159 
14160 /* Better to save a register than avoid a branch */
14161 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14162 %{
14163   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14164   effect(KILL cr);
14165   ins_cost(300);
14166   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14167             "jge     done\n\t"
14168             "addl    $p,$y\n"
14169             "done:   " %}
14170   ins_encode %{
14171     Register Rp = $p$$Register;
14172     Register Rq = $q$$Register;
14173     Register Ry = $y$$Register;
14174     Label done;
14175     __ subl(Rp, Rq);
14176     __ jccb(Assembler::greaterEqual, done);
14177     __ addl(Rp, Ry);
14178     __ bind(done);
14179   %}
14180   ins_pipe(pipe_cmplt);
14181 %}
14182 
14183 /* Better to save a register than avoid a branch */
14184 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14185 %{
14186   match(Set y (AndI (CmpLTMask p q) y));
14187   effect(KILL cr);
14188 
14189   ins_cost(300);
14190 
14191   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14192             "jlt     done\n\t"
14193             "xorl    $y, $y\n"
14194             "done:   " %}
14195   ins_encode %{
14196     Register Rp = $p$$Register;
14197     Register Rq = $q$$Register;
14198     Register Ry = $y$$Register;
14199     Label done;
14200     __ cmpl(Rp, Rq);
14201     __ jccb(Assembler::less, done);
14202     __ xorl(Ry, Ry);
14203     __ bind(done);
14204   %}
14205   ins_pipe(pipe_cmplt);
14206 %}
14207 
14208 
14209 //---------- FP Instructions------------------------------------------------
14210 
14211 // Really expensive, avoid
14212 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14213 %{
14214   match(Set cr (CmpF src1 src2));
14215 
14216   ins_cost(500);
14217   format %{ "ucomiss $src1, $src2\n\t"
14218             "jnp,s   exit\n\t"
14219             "pushfq\t# saw NaN, set CF\n\t"
14220             "andq    [rsp], #0xffffff2b\n\t"
14221             "popfq\n"
14222     "exit:" %}
14223   ins_encode %{
14224     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14225     emit_cmpfp_fixup(masm);
14226   %}
14227   ins_pipe(pipe_slow);
14228 %}
14229 
14230 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14231   match(Set cr (CmpF src1 src2));
14232 
14233   ins_cost(100);
14234   format %{ "ucomiss $src1, $src2" %}
14235   ins_encode %{
14236     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14237   %}
14238   ins_pipe(pipe_slow);
14239 %}
14240 
14241 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14242   match(Set cr (CmpF src1 (LoadF src2)));
14243 
14244   ins_cost(100);
14245   format %{ "ucomiss $src1, $src2" %}
14246   ins_encode %{
14247     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14248   %}
14249   ins_pipe(pipe_slow);
14250 %}
14251 
14252 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14253   match(Set cr (CmpF src con));
14254   ins_cost(100);
14255   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14256   ins_encode %{
14257     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14258   %}
14259   ins_pipe(pipe_slow);
14260 %}
14261 
14262 // Really expensive, avoid
14263 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14264 %{
14265   match(Set cr (CmpD src1 src2));
14266 
14267   ins_cost(500);
14268   format %{ "ucomisd $src1, $src2\n\t"
14269             "jnp,s   exit\n\t"
14270             "pushfq\t# saw NaN, set CF\n\t"
14271             "andq    [rsp], #0xffffff2b\n\t"
14272             "popfq\n"
14273     "exit:" %}
14274   ins_encode %{
14275     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14276     emit_cmpfp_fixup(masm);
14277   %}
14278   ins_pipe(pipe_slow);
14279 %}
14280 
14281 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14282   match(Set cr (CmpD src1 src2));
14283 
14284   ins_cost(100);
14285   format %{ "ucomisd $src1, $src2 test" %}
14286   ins_encode %{
14287     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14288   %}
14289   ins_pipe(pipe_slow);
14290 %}
14291 
14292 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14293   match(Set cr (CmpD src1 (LoadD src2)));
14294 
14295   ins_cost(100);
14296   format %{ "ucomisd $src1, $src2" %}
14297   ins_encode %{
14298     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14299   %}
14300   ins_pipe(pipe_slow);
14301 %}
14302 
14303 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14304   match(Set cr (CmpD src con));
14305   ins_cost(100);
14306   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14307   ins_encode %{
14308     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14309   %}
14310   ins_pipe(pipe_slow);
14311 %}
14312 
14313 // Compare into -1,0,1
14314 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14315 %{
14316   match(Set dst (CmpF3 src1 src2));
14317   effect(KILL cr);
14318 
14319   ins_cost(275);
14320   format %{ "ucomiss $src1, $src2\n\t"
14321             "movl    $dst, #-1\n\t"
14322             "jp,s    done\n\t"
14323             "jb,s    done\n\t"
14324             "setne   $dst\n\t"
14325             "movzbl  $dst, $dst\n"
14326     "done:" %}
14327   ins_encode %{
14328     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14329     emit_cmpfp3(masm, $dst$$Register);
14330   %}
14331   ins_pipe(pipe_slow);
14332 %}
14333 
14334 // Compare into -1,0,1
14335 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14336 %{
14337   match(Set dst (CmpF3 src1 (LoadF src2)));
14338   effect(KILL cr);
14339 
14340   ins_cost(275);
14341   format %{ "ucomiss $src1, $src2\n\t"
14342             "movl    $dst, #-1\n\t"
14343             "jp,s    done\n\t"
14344             "jb,s    done\n\t"
14345             "setne   $dst\n\t"
14346             "movzbl  $dst, $dst\n"
14347     "done:" %}
14348   ins_encode %{
14349     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14350     emit_cmpfp3(masm, $dst$$Register);
14351   %}
14352   ins_pipe(pipe_slow);
14353 %}
14354 
14355 // Compare into -1,0,1
14356 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14357   match(Set dst (CmpF3 src con));
14358   effect(KILL cr);
14359 
14360   ins_cost(275);
14361   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14362             "movl    $dst, #-1\n\t"
14363             "jp,s    done\n\t"
14364             "jb,s    done\n\t"
14365             "setne   $dst\n\t"
14366             "movzbl  $dst, $dst\n"
14367     "done:" %}
14368   ins_encode %{
14369     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14370     emit_cmpfp3(masm, $dst$$Register);
14371   %}
14372   ins_pipe(pipe_slow);
14373 %}
14374 
14375 // Compare into -1,0,1
14376 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14377 %{
14378   match(Set dst (CmpD3 src1 src2));
14379   effect(KILL cr);
14380 
14381   ins_cost(275);
14382   format %{ "ucomisd $src1, $src2\n\t"
14383             "movl    $dst, #-1\n\t"
14384             "jp,s    done\n\t"
14385             "jb,s    done\n\t"
14386             "setne   $dst\n\t"
14387             "movzbl  $dst, $dst\n"
14388     "done:" %}
14389   ins_encode %{
14390     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14391     emit_cmpfp3(masm, $dst$$Register);
14392   %}
14393   ins_pipe(pipe_slow);
14394 %}
14395 
14396 // Compare into -1,0,1
14397 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14398 %{
14399   match(Set dst (CmpD3 src1 (LoadD src2)));
14400   effect(KILL cr);
14401 
14402   ins_cost(275);
14403   format %{ "ucomisd $src1, $src2\n\t"
14404             "movl    $dst, #-1\n\t"
14405             "jp,s    done\n\t"
14406             "jb,s    done\n\t"
14407             "setne   $dst\n\t"
14408             "movzbl  $dst, $dst\n"
14409     "done:" %}
14410   ins_encode %{
14411     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14412     emit_cmpfp3(masm, $dst$$Register);
14413   %}
14414   ins_pipe(pipe_slow);
14415 %}
14416 
14417 // Compare into -1,0,1
14418 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14419   match(Set dst (CmpD3 src con));
14420   effect(KILL cr);
14421 
14422   ins_cost(275);
14423   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14424             "movl    $dst, #-1\n\t"
14425             "jp,s    done\n\t"
14426             "jb,s    done\n\t"
14427             "setne   $dst\n\t"
14428             "movzbl  $dst, $dst\n"
14429     "done:" %}
14430   ins_encode %{
14431     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14432     emit_cmpfp3(masm, $dst$$Register);
14433   %}
14434   ins_pipe(pipe_slow);
14435 %}
14436 
14437 //----------Arithmetic Conversion Instructions---------------------------------
14438 
14439 instruct convF2D_reg_reg(regD dst, regF src)
14440 %{
14441   match(Set dst (ConvF2D src));
14442 
14443   format %{ "cvtss2sd $dst, $src" %}
14444   ins_encode %{
14445     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14446   %}
14447   ins_pipe(pipe_slow); // XXX
14448 %}
14449 
14450 instruct convF2D_reg_mem(regD dst, memory src)
14451 %{
14452   predicate(UseAVX == 0);
14453   match(Set dst (ConvF2D (LoadF src)));
14454 
14455   format %{ "cvtss2sd $dst, $src" %}
14456   ins_encode %{
14457     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14458   %}
14459   ins_pipe(pipe_slow); // XXX
14460 %}
14461 
14462 instruct convD2F_reg_reg(regF dst, regD src)
14463 %{
14464   match(Set dst (ConvD2F src));
14465 
14466   format %{ "cvtsd2ss $dst, $src" %}
14467   ins_encode %{
14468     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14469   %}
14470   ins_pipe(pipe_slow); // XXX
14471 %}
14472 
14473 instruct convD2F_reg_mem(regF dst, memory src)
14474 %{
14475   predicate(UseAVX == 0);
14476   match(Set dst (ConvD2F (LoadD src)));
14477 
14478   format %{ "cvtsd2ss $dst, $src" %}
14479   ins_encode %{
14480     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14481   %}
14482   ins_pipe(pipe_slow); // XXX
14483 %}
14484 
14485 // XXX do mem variants
14486 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14487 %{
14488   predicate(!VM_Version::supports_avx10_2());
14489   match(Set dst (ConvF2I src));
14490   effect(KILL cr);
14491   format %{ "convert_f2i $dst, $src" %}
14492   ins_encode %{
14493     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14494   %}
14495   ins_pipe(pipe_slow);
14496 %}
14497 
14498 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14499 %{
14500   predicate(VM_Version::supports_avx10_2());
14501   match(Set dst (ConvF2I src));
14502   format %{ "evcvttss2sisl $dst, $src" %}
14503   ins_encode %{
14504     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14505   %}
14506   ins_pipe(pipe_slow);
14507 %}
14508 
14509 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14510 %{
14511   predicate(VM_Version::supports_avx10_2());
14512   match(Set dst (ConvF2I (LoadF src)));
14513   format %{ "evcvttss2sisl $dst, $src" %}
14514   ins_encode %{
14515     __ evcvttss2sisl($dst$$Register, $src$$Address);
14516   %}
14517   ins_pipe(pipe_slow);
14518 %}
14519 
14520 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14521 %{
14522   predicate(!VM_Version::supports_avx10_2());
14523   match(Set dst (ConvF2L src));
14524   effect(KILL cr);
14525   format %{ "convert_f2l $dst, $src"%}
14526   ins_encode %{
14527     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14528   %}
14529   ins_pipe(pipe_slow);
14530 %}
14531 
14532 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14533 %{
14534   predicate(VM_Version::supports_avx10_2());
14535   match(Set dst (ConvF2L src));
14536   format %{ "evcvttss2sisq $dst, $src" %}
14537   ins_encode %{
14538     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14539   %}
14540   ins_pipe(pipe_slow);
14541 %}
14542 
14543 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14544 %{
14545   predicate(VM_Version::supports_avx10_2());
14546   match(Set dst (ConvF2L (LoadF src)));
14547   format %{ "evcvttss2sisq $dst, $src" %}
14548   ins_encode %{
14549     __ evcvttss2sisq($dst$$Register, $src$$Address);
14550   %}
14551   ins_pipe(pipe_slow);
14552 %}
14553 
14554 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14555 %{
14556   predicate(!VM_Version::supports_avx10_2());
14557   match(Set dst (ConvD2I src));
14558   effect(KILL cr);
14559   format %{ "convert_d2i $dst, $src"%}
14560   ins_encode %{
14561     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14562   %}
14563   ins_pipe(pipe_slow);
14564 %}
14565 
14566 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14567 %{
14568   predicate(VM_Version::supports_avx10_2());
14569   match(Set dst (ConvD2I src));
14570   format %{ "evcvttsd2sisl $dst, $src" %}
14571   ins_encode %{
14572     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14573   %}
14574   ins_pipe(pipe_slow);
14575 %}
14576 
14577 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14578 %{
14579   predicate(VM_Version::supports_avx10_2());
14580   match(Set dst (ConvD2I (LoadD src)));
14581   format %{ "evcvttsd2sisl $dst, $src" %}
14582   ins_encode %{
14583     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14584   %}
14585   ins_pipe(pipe_slow);
14586 %}
14587 
14588 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14589 %{
14590   predicate(!VM_Version::supports_avx10_2());
14591   match(Set dst (ConvD2L src));
14592   effect(KILL cr);
14593   format %{ "convert_d2l $dst, $src"%}
14594   ins_encode %{
14595     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14596   %}
14597   ins_pipe(pipe_slow);
14598 %}
14599 
14600 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14601 %{
14602   predicate(VM_Version::supports_avx10_2());
14603   match(Set dst (ConvD2L src));
14604   format %{ "evcvttsd2sisq $dst, $src" %}
14605   ins_encode %{
14606     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14607   %}
14608   ins_pipe(pipe_slow);
14609 %}
14610 
14611 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14612 %{
14613   predicate(VM_Version::supports_avx10_2());
14614   match(Set dst (ConvD2L (LoadD src)));
14615   format %{ "evcvttsd2sisq $dst, $src" %}
14616   ins_encode %{
14617     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14618   %}
14619   ins_pipe(pipe_slow);
14620 %}
14621 
14622 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14623 %{
14624   match(Set dst (RoundD src));
14625   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14626   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14627   ins_encode %{
14628     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14629   %}
14630   ins_pipe(pipe_slow);
14631 %}
14632 
14633 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14634 %{
14635   match(Set dst (RoundF src));
14636   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14637   format %{ "round_float $dst,$src" %}
14638   ins_encode %{
14639     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14640   %}
14641   ins_pipe(pipe_slow);
14642 %}
14643 
14644 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14645 %{
14646   predicate(!UseXmmI2F);
14647   match(Set dst (ConvI2F src));
14648 
14649   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14650   ins_encode %{
14651     if (UseAVX > 0) {
14652       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14653     }
14654     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14655   %}
14656   ins_pipe(pipe_slow); // XXX
14657 %}
14658 
14659 instruct convI2F_reg_mem(regF dst, memory src)
14660 %{
14661   predicate(UseAVX == 0);
14662   match(Set dst (ConvI2F (LoadI src)));
14663 
14664   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14665   ins_encode %{
14666     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14667   %}
14668   ins_pipe(pipe_slow); // XXX
14669 %}
14670 
14671 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14672 %{
14673   predicate(!UseXmmI2D);
14674   match(Set dst (ConvI2D src));
14675 
14676   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14677   ins_encode %{
14678     if (UseAVX > 0) {
14679       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14680     }
14681     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14682   %}
14683   ins_pipe(pipe_slow); // XXX
14684 %}
14685 
14686 instruct convI2D_reg_mem(regD dst, memory src)
14687 %{
14688   predicate(UseAVX == 0);
14689   match(Set dst (ConvI2D (LoadI src)));
14690 
14691   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14692   ins_encode %{
14693     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14694   %}
14695   ins_pipe(pipe_slow); // XXX
14696 %}
14697 
14698 instruct convXI2F_reg(regF dst, rRegI src)
14699 %{
14700   predicate(UseXmmI2F);
14701   match(Set dst (ConvI2F src));
14702 
14703   format %{ "movdl $dst, $src\n\t"
14704             "cvtdq2psl $dst, $dst\t# i2f" %}
14705   ins_encode %{
14706     __ movdl($dst$$XMMRegister, $src$$Register);
14707     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14708   %}
14709   ins_pipe(pipe_slow); // XXX
14710 %}
14711 
14712 instruct convXI2D_reg(regD dst, rRegI src)
14713 %{
14714   predicate(UseXmmI2D);
14715   match(Set dst (ConvI2D src));
14716 
14717   format %{ "movdl $dst, $src\n\t"
14718             "cvtdq2pdl $dst, $dst\t# i2d" %}
14719   ins_encode %{
14720     __ movdl($dst$$XMMRegister, $src$$Register);
14721     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14722   %}
14723   ins_pipe(pipe_slow); // XXX
14724 %}
14725 
14726 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14727 %{
14728   match(Set dst (ConvL2F src));
14729 
14730   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14731   ins_encode %{
14732     if (UseAVX > 0) {
14733       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14734     }
14735     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14736   %}
14737   ins_pipe(pipe_slow); // XXX
14738 %}
14739 
14740 instruct convL2F_reg_mem(regF dst, memory src)
14741 %{
14742   predicate(UseAVX == 0);
14743   match(Set dst (ConvL2F (LoadL src)));
14744 
14745   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14746   ins_encode %{
14747     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14748   %}
14749   ins_pipe(pipe_slow); // XXX
14750 %}
14751 
14752 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14753 %{
14754   match(Set dst (ConvL2D src));
14755 
14756   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14757   ins_encode %{
14758     if (UseAVX > 0) {
14759       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14760     }
14761     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14762   %}
14763   ins_pipe(pipe_slow); // XXX
14764 %}
14765 
14766 instruct convL2D_reg_mem(regD dst, memory src)
14767 %{
14768   predicate(UseAVX == 0);
14769   match(Set dst (ConvL2D (LoadL src)));
14770 
14771   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14772   ins_encode %{
14773     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14774   %}
14775   ins_pipe(pipe_slow); // XXX
14776 %}
14777 
14778 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14779 %{
14780   match(Set dst (ConvI2L src));
14781 
14782   ins_cost(125);
14783   format %{ "movslq  $dst, $src\t# i2l" %}
14784   ins_encode %{
14785     __ movslq($dst$$Register, $src$$Register);
14786   %}
14787   ins_pipe(ialu_reg_reg);
14788 %}
14789 
14790 // Zero-extend convert int to long
14791 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14792 %{
14793   match(Set dst (AndL (ConvI2L src) mask));
14794 
14795   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14796   ins_encode %{
14797     if ($dst$$reg != $src$$reg) {
14798       __ movl($dst$$Register, $src$$Register);
14799     }
14800   %}
14801   ins_pipe(ialu_reg_reg);
14802 %}
14803 
14804 // Zero-extend convert int to long
14805 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14806 %{
14807   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14808 
14809   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14810   ins_encode %{
14811     __ movl($dst$$Register, $src$$Address);
14812   %}
14813   ins_pipe(ialu_reg_mem);
14814 %}
14815 
14816 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14817 %{
14818   match(Set dst (AndL src mask));
14819 
14820   format %{ "movl    $dst, $src\t# zero-extend long" %}
14821   ins_encode %{
14822     __ movl($dst$$Register, $src$$Register);
14823   %}
14824   ins_pipe(ialu_reg_reg);
14825 %}
14826 
14827 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14828 %{
14829   match(Set dst (ConvL2I src));
14830 
14831   format %{ "movl    $dst, $src\t# l2i" %}
14832   ins_encode %{
14833     __ movl($dst$$Register, $src$$Register);
14834   %}
14835   ins_pipe(ialu_reg_reg);
14836 %}
14837 
14838 
14839 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14840   match(Set dst (MoveF2I src));
14841   effect(DEF dst, USE src);
14842 
14843   ins_cost(125);
14844   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14845   ins_encode %{
14846     __ movl($dst$$Register, Address(rsp, $src$$disp));
14847   %}
14848   ins_pipe(ialu_reg_mem);
14849 %}
14850 
14851 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14852   match(Set dst (MoveI2F src));
14853   effect(DEF dst, USE src);
14854 
14855   ins_cost(125);
14856   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14857   ins_encode %{
14858     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14859   %}
14860   ins_pipe(pipe_slow);
14861 %}
14862 
14863 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14864   match(Set dst (MoveD2L src));
14865   effect(DEF dst, USE src);
14866 
14867   ins_cost(125);
14868   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14869   ins_encode %{
14870     __ movq($dst$$Register, Address(rsp, $src$$disp));
14871   %}
14872   ins_pipe(ialu_reg_mem);
14873 %}
14874 
14875 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14876   predicate(!UseXmmLoadAndClearUpper);
14877   match(Set dst (MoveL2D src));
14878   effect(DEF dst, USE src);
14879 
14880   ins_cost(125);
14881   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14882   ins_encode %{
14883     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14884   %}
14885   ins_pipe(pipe_slow);
14886 %}
14887 
14888 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14889   predicate(UseXmmLoadAndClearUpper);
14890   match(Set dst (MoveL2D src));
14891   effect(DEF dst, USE src);
14892 
14893   ins_cost(125);
14894   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14895   ins_encode %{
14896     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14897   %}
14898   ins_pipe(pipe_slow);
14899 %}
14900 
14901 
14902 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14903   match(Set dst (MoveF2I src));
14904   effect(DEF dst, USE src);
14905 
14906   ins_cost(95); // XXX
14907   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14908   ins_encode %{
14909     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14910   %}
14911   ins_pipe(pipe_slow);
14912 %}
14913 
14914 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14915   match(Set dst (MoveI2F src));
14916   effect(DEF dst, USE src);
14917 
14918   ins_cost(100);
14919   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14920   ins_encode %{
14921     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14922   %}
14923   ins_pipe( ialu_mem_reg );
14924 %}
14925 
14926 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14927   match(Set dst (MoveD2L src));
14928   effect(DEF dst, USE src);
14929 
14930   ins_cost(95); // XXX
14931   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14932   ins_encode %{
14933     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14934   %}
14935   ins_pipe(pipe_slow);
14936 %}
14937 
14938 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14939   match(Set dst (MoveL2D src));
14940   effect(DEF dst, USE src);
14941 
14942   ins_cost(100);
14943   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14944   ins_encode %{
14945     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14946   %}
14947   ins_pipe(ialu_mem_reg);
14948 %}
14949 
14950 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14951   match(Set dst (MoveF2I src));
14952   effect(DEF dst, USE src);
14953   ins_cost(85);
14954   format %{ "movd    $dst,$src\t# MoveF2I" %}
14955   ins_encode %{
14956     __ movdl($dst$$Register, $src$$XMMRegister);
14957   %}
14958   ins_pipe( pipe_slow );
14959 %}
14960 
14961 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14962   match(Set dst (MoveD2L src));
14963   effect(DEF dst, USE src);
14964   ins_cost(85);
14965   format %{ "movd    $dst,$src\t# MoveD2L" %}
14966   ins_encode %{
14967     __ movdq($dst$$Register, $src$$XMMRegister);
14968   %}
14969   ins_pipe( pipe_slow );
14970 %}
14971 
14972 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14973   match(Set dst (MoveI2F src));
14974   effect(DEF dst, USE src);
14975   ins_cost(100);
14976   format %{ "movd    $dst,$src\t# MoveI2F" %}
14977   ins_encode %{
14978     __ movdl($dst$$XMMRegister, $src$$Register);
14979   %}
14980   ins_pipe( pipe_slow );
14981 %}
14982 
14983 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14984   match(Set dst (MoveL2D src));
14985   effect(DEF dst, USE src);
14986   ins_cost(100);
14987   format %{ "movd    $dst,$src\t# MoveL2D" %}
14988   ins_encode %{
14989      __ movdq($dst$$XMMRegister, $src$$Register);
14990   %}
14991   ins_pipe( pipe_slow );
14992 %}
14993 
14994 // Fast clearing of an array
14995 // Small non-constant lenght ClearArray for non-AVX512 targets.
14996 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14997                   Universe dummy, rFlagsReg cr)
14998 %{
14999   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15000   match(Set dummy (ClearArray cnt base));
15001   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15002 
15003   format %{ $$template
15004     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15005     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15006     $$emit$$"jg      LARGE\n\t"
15007     $$emit$$"dec     rcx\n\t"
15008     $$emit$$"js      DONE\t# Zero length\n\t"
15009     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15010     $$emit$$"dec     rcx\n\t"
15011     $$emit$$"jge     LOOP\n\t"
15012     $$emit$$"jmp     DONE\n\t"
15013     $$emit$$"# LARGE:\n\t"
15014     if (UseFastStosb) {
15015        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15016        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15017     } else if (UseXMMForObjInit) {
15018        $$emit$$"mov     rdi,rax\n\t"
15019        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15020        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15021        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15022        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15023        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15024        $$emit$$"add     0x40,rax\n\t"
15025        $$emit$$"# L_zero_64_bytes:\n\t"
15026        $$emit$$"sub     0x8,rcx\n\t"
15027        $$emit$$"jge     L_loop\n\t"
15028        $$emit$$"add     0x4,rcx\n\t"
15029        $$emit$$"jl      L_tail\n\t"
15030        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15031        $$emit$$"add     0x20,rax\n\t"
15032        $$emit$$"sub     0x4,rcx\n\t"
15033        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15034        $$emit$$"add     0x4,rcx\n\t"
15035        $$emit$$"jle     L_end\n\t"
15036        $$emit$$"dec     rcx\n\t"
15037        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15038        $$emit$$"vmovq   xmm0,(rax)\n\t"
15039        $$emit$$"add     0x8,rax\n\t"
15040        $$emit$$"dec     rcx\n\t"
15041        $$emit$$"jge     L_sloop\n\t"
15042        $$emit$$"# L_end:\n\t"
15043     } else {
15044        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15045     }
15046     $$emit$$"# DONE"
15047   %}
15048   ins_encode %{
15049     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15050                  $tmp$$XMMRegister, false, knoreg);
15051   %}
15052   ins_pipe(pipe_slow);
15053 %}
15054 
15055 // Small non-constant length ClearArray for AVX512 targets.
15056 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15057                        Universe dummy, rFlagsReg cr)
15058 %{
15059   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15060   match(Set dummy (ClearArray cnt base));
15061   ins_cost(125);
15062   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15063 
15064   format %{ $$template
15065     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15066     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15067     $$emit$$"jg      LARGE\n\t"
15068     $$emit$$"dec     rcx\n\t"
15069     $$emit$$"js      DONE\t# Zero length\n\t"
15070     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15071     $$emit$$"dec     rcx\n\t"
15072     $$emit$$"jge     LOOP\n\t"
15073     $$emit$$"jmp     DONE\n\t"
15074     $$emit$$"# LARGE:\n\t"
15075     if (UseFastStosb) {
15076        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15077        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15078     } else if (UseXMMForObjInit) {
15079        $$emit$$"mov     rdi,rax\n\t"
15080        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15081        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15082        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15083        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15084        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15085        $$emit$$"add     0x40,rax\n\t"
15086        $$emit$$"# L_zero_64_bytes:\n\t"
15087        $$emit$$"sub     0x8,rcx\n\t"
15088        $$emit$$"jge     L_loop\n\t"
15089        $$emit$$"add     0x4,rcx\n\t"
15090        $$emit$$"jl      L_tail\n\t"
15091        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15092        $$emit$$"add     0x20,rax\n\t"
15093        $$emit$$"sub     0x4,rcx\n\t"
15094        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15095        $$emit$$"add     0x4,rcx\n\t"
15096        $$emit$$"jle     L_end\n\t"
15097        $$emit$$"dec     rcx\n\t"
15098        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15099        $$emit$$"vmovq   xmm0,(rax)\n\t"
15100        $$emit$$"add     0x8,rax\n\t"
15101        $$emit$$"dec     rcx\n\t"
15102        $$emit$$"jge     L_sloop\n\t"
15103        $$emit$$"# L_end:\n\t"
15104     } else {
15105        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15106     }
15107     $$emit$$"# DONE"
15108   %}
15109   ins_encode %{
15110     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15111                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15112   %}
15113   ins_pipe(pipe_slow);
15114 %}
15115 
15116 // Large non-constant length ClearArray for non-AVX512 targets.
15117 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15118                         Universe dummy, rFlagsReg cr)
15119 %{
15120   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15121   match(Set dummy (ClearArray cnt base));
15122   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15123 
15124   format %{ $$template
15125     if (UseFastStosb) {
15126        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15127        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15128        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15129     } else if (UseXMMForObjInit) {
15130        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15131        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15132        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15133        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15134        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15135        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15136        $$emit$$"add     0x40,rax\n\t"
15137        $$emit$$"# L_zero_64_bytes:\n\t"
15138        $$emit$$"sub     0x8,rcx\n\t"
15139        $$emit$$"jge     L_loop\n\t"
15140        $$emit$$"add     0x4,rcx\n\t"
15141        $$emit$$"jl      L_tail\n\t"
15142        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15143        $$emit$$"add     0x20,rax\n\t"
15144        $$emit$$"sub     0x4,rcx\n\t"
15145        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15146        $$emit$$"add     0x4,rcx\n\t"
15147        $$emit$$"jle     L_end\n\t"
15148        $$emit$$"dec     rcx\n\t"
15149        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15150        $$emit$$"vmovq   xmm0,(rax)\n\t"
15151        $$emit$$"add     0x8,rax\n\t"
15152        $$emit$$"dec     rcx\n\t"
15153        $$emit$$"jge     L_sloop\n\t"
15154        $$emit$$"# L_end:\n\t"
15155     } else {
15156        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15157        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15158     }
15159   %}
15160   ins_encode %{
15161     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15162                  $tmp$$XMMRegister, true, knoreg);
15163   %}
15164   ins_pipe(pipe_slow);
15165 %}
15166 
15167 // Large non-constant length ClearArray for AVX512 targets.
15168 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15169                              Universe dummy, rFlagsReg cr)
15170 %{
15171   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15172   match(Set dummy (ClearArray cnt base));
15173   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15174 
15175   format %{ $$template
15176     if (UseFastStosb) {
15177        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15178        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15179        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15180     } else if (UseXMMForObjInit) {
15181        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15182        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15183        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15184        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15185        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15186        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15187        $$emit$$"add     0x40,rax\n\t"
15188        $$emit$$"# L_zero_64_bytes:\n\t"
15189        $$emit$$"sub     0x8,rcx\n\t"
15190        $$emit$$"jge     L_loop\n\t"
15191        $$emit$$"add     0x4,rcx\n\t"
15192        $$emit$$"jl      L_tail\n\t"
15193        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15194        $$emit$$"add     0x20,rax\n\t"
15195        $$emit$$"sub     0x4,rcx\n\t"
15196        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15197        $$emit$$"add     0x4,rcx\n\t"
15198        $$emit$$"jle     L_end\n\t"
15199        $$emit$$"dec     rcx\n\t"
15200        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15201        $$emit$$"vmovq   xmm0,(rax)\n\t"
15202        $$emit$$"add     0x8,rax\n\t"
15203        $$emit$$"dec     rcx\n\t"
15204        $$emit$$"jge     L_sloop\n\t"
15205        $$emit$$"# L_end:\n\t"
15206     } else {
15207        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15208        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15209     }
15210   %}
15211   ins_encode %{
15212     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15213                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15214   %}
15215   ins_pipe(pipe_slow);
15216 %}
15217 
15218 // Small constant length ClearArray for AVX512 targets.
15219 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15220 %{
15221   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15222   match(Set dummy (ClearArray cnt base));
15223   ins_cost(100);
15224   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15225   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15226   ins_encode %{
15227    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15228   %}
15229   ins_pipe(pipe_slow);
15230 %}
15231 
15232 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15233                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15234 %{
15235   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15236   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15237   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15238 
15239   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15240   ins_encode %{
15241     __ string_compare($str1$$Register, $str2$$Register,
15242                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15243                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15244   %}
15245   ins_pipe( pipe_slow );
15246 %}
15247 
15248 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15249                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15250 %{
15251   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15252   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15253   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15254 
15255   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15256   ins_encode %{
15257     __ string_compare($str1$$Register, $str2$$Register,
15258                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15259                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15260   %}
15261   ins_pipe( pipe_slow );
15262 %}
15263 
15264 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15265                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15266 %{
15267   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15268   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15269   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15270 
15271   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15272   ins_encode %{
15273     __ string_compare($str1$$Register, $str2$$Register,
15274                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15275                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15276   %}
15277   ins_pipe( pipe_slow );
15278 %}
15279 
15280 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15281                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15282 %{
15283   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15284   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15285   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15286 
15287   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15288   ins_encode %{
15289     __ string_compare($str1$$Register, $str2$$Register,
15290                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15291                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15292   %}
15293   ins_pipe( pipe_slow );
15294 %}
15295 
15296 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15297                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15298 %{
15299   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15300   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15301   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15302 
15303   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15304   ins_encode %{
15305     __ string_compare($str1$$Register, $str2$$Register,
15306                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15307                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15308   %}
15309   ins_pipe( pipe_slow );
15310 %}
15311 
15312 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15313                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15314 %{
15315   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15316   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15317   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15318 
15319   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15320   ins_encode %{
15321     __ string_compare($str1$$Register, $str2$$Register,
15322                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15323                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15324   %}
15325   ins_pipe( pipe_slow );
15326 %}
15327 
15328 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15329                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15330 %{
15331   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15332   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15334 
15335   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15336   ins_encode %{
15337     __ string_compare($str2$$Register, $str1$$Register,
15338                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15339                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15340   %}
15341   ins_pipe( pipe_slow );
15342 %}
15343 
15344 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15345                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15346 %{
15347   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15348   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15349   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15350 
15351   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15352   ins_encode %{
15353     __ string_compare($str2$$Register, $str1$$Register,
15354                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15355                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15356   %}
15357   ins_pipe( pipe_slow );
15358 %}
15359 
15360 // fast search of substring with known size.
15361 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15362                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15363 %{
15364   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15365   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15366   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15367 
15368   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15369   ins_encode %{
15370     int icnt2 = (int)$int_cnt2$$constant;
15371     if (icnt2 >= 16) {
15372       // IndexOf for constant substrings with size >= 16 elements
15373       // which don't need to be loaded through stack.
15374       __ string_indexofC8($str1$$Register, $str2$$Register,
15375                           $cnt1$$Register, $cnt2$$Register,
15376                           icnt2, $result$$Register,
15377                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15378     } else {
15379       // Small strings are loaded through stack if they cross page boundary.
15380       __ string_indexof($str1$$Register, $str2$$Register,
15381                         $cnt1$$Register, $cnt2$$Register,
15382                         icnt2, $result$$Register,
15383                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15384     }
15385   %}
15386   ins_pipe( pipe_slow );
15387 %}
15388 
15389 // fast search of substring with known size.
15390 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15391                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15392 %{
15393   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15394   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15395   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15396 
15397   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15398   ins_encode %{
15399     int icnt2 = (int)$int_cnt2$$constant;
15400     if (icnt2 >= 8) {
15401       // IndexOf for constant substrings with size >= 8 elements
15402       // which don't need to be loaded through stack.
15403       __ string_indexofC8($str1$$Register, $str2$$Register,
15404                           $cnt1$$Register, $cnt2$$Register,
15405                           icnt2, $result$$Register,
15406                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15407     } else {
15408       // Small strings are loaded through stack if they cross page boundary.
15409       __ string_indexof($str1$$Register, $str2$$Register,
15410                         $cnt1$$Register, $cnt2$$Register,
15411                         icnt2, $result$$Register,
15412                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15413     }
15414   %}
15415   ins_pipe( pipe_slow );
15416 %}
15417 
15418 // fast search of substring with known size.
15419 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15420                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15421 %{
15422   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15423   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15424   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15425 
15426   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15427   ins_encode %{
15428     int icnt2 = (int)$int_cnt2$$constant;
15429     if (icnt2 >= 8) {
15430       // IndexOf for constant substrings with size >= 8 elements
15431       // which don't need to be loaded through stack.
15432       __ string_indexofC8($str1$$Register, $str2$$Register,
15433                           $cnt1$$Register, $cnt2$$Register,
15434                           icnt2, $result$$Register,
15435                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15436     } else {
15437       // Small strings are loaded through stack if they cross page boundary.
15438       __ string_indexof($str1$$Register, $str2$$Register,
15439                         $cnt1$$Register, $cnt2$$Register,
15440                         icnt2, $result$$Register,
15441                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15442     }
15443   %}
15444   ins_pipe( pipe_slow );
15445 %}
15446 
15447 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15448                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15449 %{
15450   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15451   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15452   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15453 
15454   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15455   ins_encode %{
15456     __ string_indexof($str1$$Register, $str2$$Register,
15457                       $cnt1$$Register, $cnt2$$Register,
15458                       (-1), $result$$Register,
15459                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15460   %}
15461   ins_pipe( pipe_slow );
15462 %}
15463 
15464 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15465                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15466 %{
15467   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15468   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15469   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15470 
15471   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15472   ins_encode %{
15473     __ string_indexof($str1$$Register, $str2$$Register,
15474                       $cnt1$$Register, $cnt2$$Register,
15475                       (-1), $result$$Register,
15476                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15477   %}
15478   ins_pipe( pipe_slow );
15479 %}
15480 
15481 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15482                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15485   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15486   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15487 
15488   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15489   ins_encode %{
15490     __ string_indexof($str1$$Register, $str2$$Register,
15491                       $cnt1$$Register, $cnt2$$Register,
15492                       (-1), $result$$Register,
15493                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15494   %}
15495   ins_pipe( pipe_slow );
15496 %}
15497 
15498 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15499                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15500 %{
15501   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15502   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15503   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15504   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15505   ins_encode %{
15506     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15507                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15508   %}
15509   ins_pipe( pipe_slow );
15510 %}
15511 
15512 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15513                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15514 %{
15515   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15516   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15517   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15518   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15519   ins_encode %{
15520     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15521                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15522   %}
15523   ins_pipe( pipe_slow );
15524 %}
15525 
15526 // fast string equals
15527 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15528                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15529 %{
15530   predicate(!VM_Version::supports_avx512vlbw());
15531   match(Set result (StrEquals (Binary str1 str2) cnt));
15532   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15533 
15534   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15535   ins_encode %{
15536     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15537                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15538                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15539   %}
15540   ins_pipe( pipe_slow );
15541 %}
15542 
15543 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15544                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15545 %{
15546   predicate(VM_Version::supports_avx512vlbw());
15547   match(Set result (StrEquals (Binary str1 str2) cnt));
15548   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15549 
15550   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15551   ins_encode %{
15552     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15553                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15554                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15555   %}
15556   ins_pipe( pipe_slow );
15557 %}
15558 
15559 // fast array equals
15560 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15561                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15562 %{
15563   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15564   match(Set result (AryEq ary1 ary2));
15565   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15566 
15567   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15568   ins_encode %{
15569     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15570                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15571                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15572   %}
15573   ins_pipe( pipe_slow );
15574 %}
15575 
15576 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15577                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15578 %{
15579   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15580   match(Set result (AryEq ary1 ary2));
15581   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15582 
15583   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15584   ins_encode %{
15585     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15586                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15587                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15588   %}
15589   ins_pipe( pipe_slow );
15590 %}
15591 
15592 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15593                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15594 %{
15595   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15596   match(Set result (AryEq ary1 ary2));
15597   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15598 
15599   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15600   ins_encode %{
15601     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15602                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15603                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15604   %}
15605   ins_pipe( pipe_slow );
15606 %}
15607 
15608 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15609                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15610 %{
15611   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15612   match(Set result (AryEq ary1 ary2));
15613   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15614 
15615   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15616   ins_encode %{
15617     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15618                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15619                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15620   %}
15621   ins_pipe( pipe_slow );
15622 %}
15623 
15624 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15625                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15626                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15627                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15628                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15629 %{
15630   predicate(UseAVX >= 2);
15631   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15632   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15633          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15634          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15635          USE basic_type, KILL cr);
15636 
15637   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15638   ins_encode %{
15639     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15640                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15641                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15642                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15643                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15644                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15645                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15646   %}
15647   ins_pipe( pipe_slow );
15648 %}
15649 
15650 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15651                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15652 %{
15653   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15654   match(Set result (CountPositives ary1 len));
15655   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15656 
15657   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15658   ins_encode %{
15659     __ count_positives($ary1$$Register, $len$$Register,
15660                        $result$$Register, $tmp3$$Register,
15661                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15662   %}
15663   ins_pipe( pipe_slow );
15664 %}
15665 
15666 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15667                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15668 %{
15669   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15670   match(Set result (CountPositives ary1 len));
15671   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15672 
15673   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15674   ins_encode %{
15675     __ count_positives($ary1$$Register, $len$$Register,
15676                        $result$$Register, $tmp3$$Register,
15677                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15678   %}
15679   ins_pipe( pipe_slow );
15680 %}
15681 
15682 // fast char[] to byte[] compression
15683 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15684                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15685   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15686   match(Set result (StrCompressedCopy src (Binary dst len)));
15687   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15688          USE_KILL len, KILL tmp5, KILL cr);
15689 
15690   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15691   ins_encode %{
15692     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15693                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15694                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15695                            knoreg, knoreg);
15696   %}
15697   ins_pipe( pipe_slow );
15698 %}
15699 
15700 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15701                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15702   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15703   match(Set result (StrCompressedCopy src (Binary dst len)));
15704   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15705          USE_KILL len, KILL tmp5, KILL cr);
15706 
15707   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15708   ins_encode %{
15709     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15710                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15711                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15712                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15713   %}
15714   ins_pipe( pipe_slow );
15715 %}
15716 // fast byte[] to char[] inflation
15717 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15718                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15719   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15720   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15721   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15722 
15723   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15724   ins_encode %{
15725     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15726                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15727   %}
15728   ins_pipe( pipe_slow );
15729 %}
15730 
15731 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15732                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15733   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15734   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15735   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15736 
15737   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15738   ins_encode %{
15739     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15740                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15741   %}
15742   ins_pipe( pipe_slow );
15743 %}
15744 
15745 // encode char[] to byte[] in ISO_8859_1
15746 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15747                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15748                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15749   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15750   match(Set result (EncodeISOArray src (Binary dst len)));
15751   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15752 
15753   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15754   ins_encode %{
15755     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15756                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15757                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15758   %}
15759   ins_pipe( pipe_slow );
15760 %}
15761 
15762 // encode char[] to byte[] in ASCII
15763 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15764                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15765                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15766   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15767   match(Set result (EncodeISOArray src (Binary dst len)));
15768   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15769 
15770   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15771   ins_encode %{
15772     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15773                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15774                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15775   %}
15776   ins_pipe( pipe_slow );
15777 %}
15778 
15779 //----------Overflow Math Instructions-----------------------------------------
15780 
15781 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15782 %{
15783   match(Set cr (OverflowAddI op1 op2));
15784   effect(DEF cr, USE_KILL op1, USE op2);
15785 
15786   format %{ "addl    $op1, $op2\t# overflow check int" %}
15787 
15788   ins_encode %{
15789     __ addl($op1$$Register, $op2$$Register);
15790   %}
15791   ins_pipe(ialu_reg_reg);
15792 %}
15793 
15794 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15795 %{
15796   match(Set cr (OverflowAddI op1 op2));
15797   effect(DEF cr, USE_KILL op1, USE op2);
15798 
15799   format %{ "addl    $op1, $op2\t# overflow check int" %}
15800 
15801   ins_encode %{
15802     __ addl($op1$$Register, $op2$$constant);
15803   %}
15804   ins_pipe(ialu_reg_reg);
15805 %}
15806 
15807 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15808 %{
15809   match(Set cr (OverflowAddL op1 op2));
15810   effect(DEF cr, USE_KILL op1, USE op2);
15811 
15812   format %{ "addq    $op1, $op2\t# overflow check long" %}
15813   ins_encode %{
15814     __ addq($op1$$Register, $op2$$Register);
15815   %}
15816   ins_pipe(ialu_reg_reg);
15817 %}
15818 
15819 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15820 %{
15821   match(Set cr (OverflowAddL op1 op2));
15822   effect(DEF cr, USE_KILL op1, USE op2);
15823 
15824   format %{ "addq    $op1, $op2\t# overflow check long" %}
15825   ins_encode %{
15826     __ addq($op1$$Register, $op2$$constant);
15827   %}
15828   ins_pipe(ialu_reg_reg);
15829 %}
15830 
15831 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15832 %{
15833   match(Set cr (OverflowSubI op1 op2));
15834 
15835   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15836   ins_encode %{
15837     __ cmpl($op1$$Register, $op2$$Register);
15838   %}
15839   ins_pipe(ialu_reg_reg);
15840 %}
15841 
15842 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15843 %{
15844   match(Set cr (OverflowSubI op1 op2));
15845 
15846   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15847   ins_encode %{
15848     __ cmpl($op1$$Register, $op2$$constant);
15849   %}
15850   ins_pipe(ialu_reg_reg);
15851 %}
15852 
15853 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15854 %{
15855   match(Set cr (OverflowSubL op1 op2));
15856 
15857   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15858   ins_encode %{
15859     __ cmpq($op1$$Register, $op2$$Register);
15860   %}
15861   ins_pipe(ialu_reg_reg);
15862 %}
15863 
15864 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15865 %{
15866   match(Set cr (OverflowSubL op1 op2));
15867 
15868   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15869   ins_encode %{
15870     __ cmpq($op1$$Register, $op2$$constant);
15871   %}
15872   ins_pipe(ialu_reg_reg);
15873 %}
15874 
15875 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15876 %{
15877   match(Set cr (OverflowSubI zero op2));
15878   effect(DEF cr, USE_KILL op2);
15879 
15880   format %{ "negl    $op2\t# overflow check int" %}
15881   ins_encode %{
15882     __ negl($op2$$Register);
15883   %}
15884   ins_pipe(ialu_reg_reg);
15885 %}
15886 
15887 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15888 %{
15889   match(Set cr (OverflowSubL zero op2));
15890   effect(DEF cr, USE_KILL op2);
15891 
15892   format %{ "negq    $op2\t# overflow check long" %}
15893   ins_encode %{
15894     __ negq($op2$$Register);
15895   %}
15896   ins_pipe(ialu_reg_reg);
15897 %}
15898 
15899 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15900 %{
15901   match(Set cr (OverflowMulI op1 op2));
15902   effect(DEF cr, USE_KILL op1, USE op2);
15903 
15904   format %{ "imull    $op1, $op2\t# overflow check int" %}
15905   ins_encode %{
15906     __ imull($op1$$Register, $op2$$Register);
15907   %}
15908   ins_pipe(ialu_reg_reg_alu0);
15909 %}
15910 
15911 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15912 %{
15913   match(Set cr (OverflowMulI op1 op2));
15914   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15915 
15916   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15917   ins_encode %{
15918     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15919   %}
15920   ins_pipe(ialu_reg_reg_alu0);
15921 %}
15922 
15923 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15924 %{
15925   match(Set cr (OverflowMulL op1 op2));
15926   effect(DEF cr, USE_KILL op1, USE op2);
15927 
15928   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15929   ins_encode %{
15930     __ imulq($op1$$Register, $op2$$Register);
15931   %}
15932   ins_pipe(ialu_reg_reg_alu0);
15933 %}
15934 
15935 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15936 %{
15937   match(Set cr (OverflowMulL op1 op2));
15938   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15939 
15940   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15941   ins_encode %{
15942     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15943   %}
15944   ins_pipe(ialu_reg_reg_alu0);
15945 %}
15946 
15947 
15948 //----------Control Flow Instructions------------------------------------------
15949 // Signed compare Instructions
15950 
15951 // XXX more variants!!
15952 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15953 %{
15954   match(Set cr (CmpI op1 op2));
15955   effect(DEF cr, USE op1, USE op2);
15956 
15957   format %{ "cmpl    $op1, $op2" %}
15958   ins_encode %{
15959     __ cmpl($op1$$Register, $op2$$Register);
15960   %}
15961   ins_pipe(ialu_cr_reg_reg);
15962 %}
15963 
15964 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15965 %{
15966   match(Set cr (CmpI op1 op2));
15967 
15968   format %{ "cmpl    $op1, $op2" %}
15969   ins_encode %{
15970     __ cmpl($op1$$Register, $op2$$constant);
15971   %}
15972   ins_pipe(ialu_cr_reg_imm);
15973 %}
15974 
15975 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15976 %{
15977   match(Set cr (CmpI op1 (LoadI op2)));
15978 
15979   ins_cost(500); // XXX
15980   format %{ "cmpl    $op1, $op2" %}
15981   ins_encode %{
15982     __ cmpl($op1$$Register, $op2$$Address);
15983   %}
15984   ins_pipe(ialu_cr_reg_mem);
15985 %}
15986 
15987 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15988 %{
15989   match(Set cr (CmpI src zero));
15990 
15991   format %{ "testl   $src, $src" %}
15992   ins_encode %{
15993     __ testl($src$$Register, $src$$Register);
15994   %}
15995   ins_pipe(ialu_cr_reg_imm);
15996 %}
15997 
15998 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15999 %{
16000   match(Set cr (CmpI (AndI src con) zero));
16001 
16002   format %{ "testl   $src, $con" %}
16003   ins_encode %{
16004     __ testl($src$$Register, $con$$constant);
16005   %}
16006   ins_pipe(ialu_cr_reg_imm);
16007 %}
16008 
16009 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16010 %{
16011   match(Set cr (CmpI (AndI src1 src2) zero));
16012 
16013   format %{ "testl   $src1, $src2" %}
16014   ins_encode %{
16015     __ testl($src1$$Register, $src2$$Register);
16016   %}
16017   ins_pipe(ialu_cr_reg_imm);
16018 %}
16019 
16020 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16021 %{
16022   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16023 
16024   format %{ "testl   $src, $mem" %}
16025   ins_encode %{
16026     __ testl($src$$Register, $mem$$Address);
16027   %}
16028   ins_pipe(ialu_cr_reg_mem);
16029 %}
16030 
16031 // Unsigned compare Instructions; really, same as signed except they
16032 // produce an rFlagsRegU instead of rFlagsReg.
16033 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16034 %{
16035   match(Set cr (CmpU op1 op2));
16036 
16037   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16038   ins_encode %{
16039     __ cmpl($op1$$Register, $op2$$Register);
16040   %}
16041   ins_pipe(ialu_cr_reg_reg);
16042 %}
16043 
16044 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16045 %{
16046   match(Set cr (CmpU op1 op2));
16047 
16048   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16049   ins_encode %{
16050     __ cmpl($op1$$Register, $op2$$constant);
16051   %}
16052   ins_pipe(ialu_cr_reg_imm);
16053 %}
16054 
16055 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16056 %{
16057   match(Set cr (CmpU op1 (LoadI op2)));
16058 
16059   ins_cost(500); // XXX
16060   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16061   ins_encode %{
16062     __ cmpl($op1$$Register, $op2$$Address);
16063   %}
16064   ins_pipe(ialu_cr_reg_mem);
16065 %}
16066 
16067 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16068 %{
16069   match(Set cr (CmpU src zero));
16070 
16071   format %{ "testl   $src, $src\t# unsigned" %}
16072   ins_encode %{
16073     __ testl($src$$Register, $src$$Register);
16074   %}
16075   ins_pipe(ialu_cr_reg_imm);
16076 %}
16077 
16078 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16079 %{
16080   match(Set cr (CmpP op1 op2));
16081 
16082   format %{ "cmpq    $op1, $op2\t# ptr" %}
16083   ins_encode %{
16084     __ cmpq($op1$$Register, $op2$$Register);
16085   %}
16086   ins_pipe(ialu_cr_reg_reg);
16087 %}
16088 
16089 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16090 %{
16091   match(Set cr (CmpP op1 (LoadP op2)));
16092   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16093 
16094   ins_cost(500); // XXX
16095   format %{ "cmpq    $op1, $op2\t# ptr" %}
16096   ins_encode %{
16097     __ cmpq($op1$$Register, $op2$$Address);
16098   %}
16099   ins_pipe(ialu_cr_reg_mem);
16100 %}
16101 
16102 // XXX this is generalized by compP_rReg_mem???
16103 // Compare raw pointer (used in out-of-heap check).
16104 // Only works because non-oop pointers must be raw pointers
16105 // and raw pointers have no anti-dependencies.
16106 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16107 %{
16108   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16109             n->in(2)->as_Load()->barrier_data() == 0);
16110   match(Set cr (CmpP op1 (LoadP op2)));
16111 
16112   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16113   ins_encode %{
16114     __ cmpq($op1$$Register, $op2$$Address);
16115   %}
16116   ins_pipe(ialu_cr_reg_mem);
16117 %}
16118 
16119 // This will generate a signed flags result. This should be OK since
16120 // any compare to a zero should be eq/neq.
16121 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16122 %{
16123   match(Set cr (CmpP src zero));
16124 
16125   format %{ "testq   $src, $src\t# ptr" %}
16126   ins_encode %{
16127     __ testq($src$$Register, $src$$Register);
16128   %}
16129   ins_pipe(ialu_cr_reg_imm);
16130 %}
16131 
16132 // This will generate a signed flags result. This should be OK since
16133 // any compare to a zero should be eq/neq.
16134 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16135 %{
16136   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16137             n->in(1)->as_Load()->barrier_data() == 0);
16138   match(Set cr (CmpP (LoadP op) zero));
16139 
16140   ins_cost(500); // XXX
16141   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16142   ins_encode %{
16143     __ testq($op$$Address, 0xFFFFFFFF);
16144   %}
16145   ins_pipe(ialu_cr_reg_imm);
16146 %}
16147 
16148 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16149 %{
16150   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16151             n->in(1)->as_Load()->barrier_data() == 0);
16152   match(Set cr (CmpP (LoadP mem) zero));
16153 
16154   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16155   ins_encode %{
16156     __ cmpq(r12, $mem$$Address);
16157   %}
16158   ins_pipe(ialu_cr_reg_mem);
16159 %}
16160 
16161 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16162 %{
16163   match(Set cr (CmpN op1 op2));
16164 
16165   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16166   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16167   ins_pipe(ialu_cr_reg_reg);
16168 %}
16169 
16170 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16171 %{
16172   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16173   match(Set cr (CmpN src (LoadN mem)));
16174 
16175   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16176   ins_encode %{
16177     __ cmpl($src$$Register, $mem$$Address);
16178   %}
16179   ins_pipe(ialu_cr_reg_mem);
16180 %}
16181 
16182 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16183   match(Set cr (CmpN op1 op2));
16184 
16185   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16186   ins_encode %{
16187     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16188   %}
16189   ins_pipe(ialu_cr_reg_imm);
16190 %}
16191 
16192 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16193 %{
16194   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16195   match(Set cr (CmpN src (LoadN mem)));
16196 
16197   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16198   ins_encode %{
16199     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16200   %}
16201   ins_pipe(ialu_cr_reg_mem);
16202 %}
16203 
16204 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16205   match(Set cr (CmpN op1 op2));
16206 
16207   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16208   ins_encode %{
16209     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16210   %}
16211   ins_pipe(ialu_cr_reg_imm);
16212 %}
16213 
16214 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16215 %{
16216   predicate(!UseCompactObjectHeaders);
16217   match(Set cr (CmpN src (LoadNKlass mem)));
16218 
16219   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16220   ins_encode %{
16221     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16222   %}
16223   ins_pipe(ialu_cr_reg_mem);
16224 %}
16225 
16226 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16227   match(Set cr (CmpN src zero));
16228 
16229   format %{ "testl   $src, $src\t# compressed ptr" %}
16230   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16231   ins_pipe(ialu_cr_reg_imm);
16232 %}
16233 
16234 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16235 %{
16236   predicate(CompressedOops::base() != nullptr &&
16237             n->in(1)->as_Load()->barrier_data() == 0);
16238   match(Set cr (CmpN (LoadN mem) zero));
16239 
16240   ins_cost(500); // XXX
16241   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16242   ins_encode %{
16243     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16244   %}
16245   ins_pipe(ialu_cr_reg_mem);
16246 %}
16247 
16248 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16249 %{
16250   predicate(CompressedOops::base() == nullptr &&
16251             n->in(1)->as_Load()->barrier_data() == 0);
16252   match(Set cr (CmpN (LoadN mem) zero));
16253 
16254   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16255   ins_encode %{
16256     __ cmpl(r12, $mem$$Address);
16257   %}
16258   ins_pipe(ialu_cr_reg_mem);
16259 %}
16260 
16261 // Yanked all unsigned pointer compare operations.
16262 // Pointer compares are done with CmpP which is already unsigned.
16263 
16264 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16265 %{
16266   match(Set cr (CmpL op1 op2));
16267 
16268   format %{ "cmpq    $op1, $op2" %}
16269   ins_encode %{
16270     __ cmpq($op1$$Register, $op2$$Register);
16271   %}
16272   ins_pipe(ialu_cr_reg_reg);
16273 %}
16274 
16275 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16276 %{
16277   match(Set cr (CmpL op1 op2));
16278 
16279   format %{ "cmpq    $op1, $op2" %}
16280   ins_encode %{
16281     __ cmpq($op1$$Register, $op2$$constant);
16282   %}
16283   ins_pipe(ialu_cr_reg_imm);
16284 %}
16285 
16286 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16287 %{
16288   match(Set cr (CmpL op1 (LoadL op2)));
16289 
16290   format %{ "cmpq    $op1, $op2" %}
16291   ins_encode %{
16292     __ cmpq($op1$$Register, $op2$$Address);
16293   %}
16294   ins_pipe(ialu_cr_reg_mem);
16295 %}
16296 
16297 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16298 %{
16299   match(Set cr (CmpL src zero));
16300 
16301   format %{ "testq   $src, $src" %}
16302   ins_encode %{
16303     __ testq($src$$Register, $src$$Register);
16304   %}
16305   ins_pipe(ialu_cr_reg_imm);
16306 %}
16307 
16308 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16309 %{
16310   match(Set cr (CmpL (AndL src con) zero));
16311 
16312   format %{ "testq   $src, $con\t# long" %}
16313   ins_encode %{
16314     __ testq($src$$Register, $con$$constant);
16315   %}
16316   ins_pipe(ialu_cr_reg_imm);
16317 %}
16318 
16319 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16320 %{
16321   match(Set cr (CmpL (AndL src1 src2) zero));
16322 
16323   format %{ "testq   $src1, $src2\t# long" %}
16324   ins_encode %{
16325     __ testq($src1$$Register, $src2$$Register);
16326   %}
16327   ins_pipe(ialu_cr_reg_imm);
16328 %}
16329 
16330 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16331 %{
16332   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16333 
16334   format %{ "testq   $src, $mem" %}
16335   ins_encode %{
16336     __ testq($src$$Register, $mem$$Address);
16337   %}
16338   ins_pipe(ialu_cr_reg_mem);
16339 %}
16340 
16341 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16342 %{
16343   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16344 
16345   format %{ "testq   $src, $mem" %}
16346   ins_encode %{
16347     __ testq($src$$Register, $mem$$Address);
16348   %}
16349   ins_pipe(ialu_cr_reg_mem);
16350 %}
16351 
16352 // Manifest a CmpU result in an integer register.  Very painful.
16353 // This is the test to avoid.
16354 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16355 %{
16356   match(Set dst (CmpU3 src1 src2));
16357   effect(KILL flags);
16358 
16359   ins_cost(275); // XXX
16360   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16361             "movl    $dst, -1\n\t"
16362             "jb,u    done\n\t"
16363             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16364     "done:" %}
16365   ins_encode %{
16366     Label done;
16367     __ cmpl($src1$$Register, $src2$$Register);
16368     __ movl($dst$$Register, -1);
16369     __ jccb(Assembler::below, done);
16370     __ setcc(Assembler::notZero, $dst$$Register);
16371     __ bind(done);
16372   %}
16373   ins_pipe(pipe_slow);
16374 %}
16375 
16376 // Manifest a CmpL result in an integer register.  Very painful.
16377 // This is the test to avoid.
16378 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16379 %{
16380   match(Set dst (CmpL3 src1 src2));
16381   effect(KILL flags);
16382 
16383   ins_cost(275); // XXX
16384   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16385             "movl    $dst, -1\n\t"
16386             "jl,s    done\n\t"
16387             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16388     "done:" %}
16389   ins_encode %{
16390     Label done;
16391     __ cmpq($src1$$Register, $src2$$Register);
16392     __ movl($dst$$Register, -1);
16393     __ jccb(Assembler::less, done);
16394     __ setcc(Assembler::notZero, $dst$$Register);
16395     __ bind(done);
16396   %}
16397   ins_pipe(pipe_slow);
16398 %}
16399 
16400 // Manifest a CmpUL result in an integer register.  Very painful.
16401 // This is the test to avoid.
16402 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16403 %{
16404   match(Set dst (CmpUL3 src1 src2));
16405   effect(KILL flags);
16406 
16407   ins_cost(275); // XXX
16408   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16409             "movl    $dst, -1\n\t"
16410             "jb,u    done\n\t"
16411             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16412     "done:" %}
16413   ins_encode %{
16414     Label done;
16415     __ cmpq($src1$$Register, $src2$$Register);
16416     __ movl($dst$$Register, -1);
16417     __ jccb(Assembler::below, done);
16418     __ setcc(Assembler::notZero, $dst$$Register);
16419     __ bind(done);
16420   %}
16421   ins_pipe(pipe_slow);
16422 %}
16423 
16424 // Unsigned long compare Instructions; really, same as signed long except they
16425 // produce an rFlagsRegU instead of rFlagsReg.
16426 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16427 %{
16428   match(Set cr (CmpUL op1 op2));
16429 
16430   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16431   ins_encode %{
16432     __ cmpq($op1$$Register, $op2$$Register);
16433   %}
16434   ins_pipe(ialu_cr_reg_reg);
16435 %}
16436 
16437 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16438 %{
16439   match(Set cr (CmpUL op1 op2));
16440 
16441   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16442   ins_encode %{
16443     __ cmpq($op1$$Register, $op2$$constant);
16444   %}
16445   ins_pipe(ialu_cr_reg_imm);
16446 %}
16447 
16448 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16449 %{
16450   match(Set cr (CmpUL op1 (LoadL op2)));
16451 
16452   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16453   ins_encode %{
16454     __ cmpq($op1$$Register, $op2$$Address);
16455   %}
16456   ins_pipe(ialu_cr_reg_mem);
16457 %}
16458 
16459 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16460 %{
16461   match(Set cr (CmpUL src zero));
16462 
16463   format %{ "testq   $src, $src\t# unsigned" %}
16464   ins_encode %{
16465     __ testq($src$$Register, $src$$Register);
16466   %}
16467   ins_pipe(ialu_cr_reg_imm);
16468 %}
16469 
16470 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16471 %{
16472   match(Set cr (CmpI (LoadB mem) imm));
16473 
16474   ins_cost(125);
16475   format %{ "cmpb    $mem, $imm" %}
16476   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16477   ins_pipe(ialu_cr_reg_mem);
16478 %}
16479 
16480 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16481 %{
16482   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16483 
16484   ins_cost(125);
16485   format %{ "testb   $mem, $imm\t# ubyte" %}
16486   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487   ins_pipe(ialu_cr_reg_mem);
16488 %}
16489 
16490 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16491 %{
16492   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16493 
16494   ins_cost(125);
16495   format %{ "testb   $mem, $imm\t# byte" %}
16496   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16497   ins_pipe(ialu_cr_reg_mem);
16498 %}
16499 
16500 //----------Max and Min--------------------------------------------------------
16501 // Min Instructions
16502 
16503 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16504 %{
16505   predicate(!UseAPX);
16506   effect(USE_DEF dst, USE src, USE cr);
16507 
16508   format %{ "cmovlgt $dst, $src\t# min" %}
16509   ins_encode %{
16510     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16511   %}
16512   ins_pipe(pipe_cmov_reg);
16513 %}
16514 
16515 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16516 %{
16517   predicate(UseAPX);
16518   effect(DEF dst, USE src1, USE src2, USE cr);
16519 
16520   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16521   ins_encode %{
16522     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16523   %}
16524   ins_pipe(pipe_cmov_reg);
16525 %}
16526 
16527 instruct minI_rReg(rRegI dst, rRegI src)
16528 %{
16529   predicate(!UseAPX);
16530   match(Set dst (MinI dst src));
16531 
16532   ins_cost(200);
16533   expand %{
16534     rFlagsReg cr;
16535     compI_rReg(cr, dst, src);
16536     cmovI_reg_g(dst, src, cr);
16537   %}
16538 %}
16539 
16540 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16541 %{
16542   predicate(UseAPX);
16543   match(Set dst (MinI src1 src2));
16544   effect(DEF dst, USE src1, USE src2);
16545 
16546   ins_cost(200);
16547   expand %{
16548     rFlagsReg cr;
16549     compI_rReg(cr, src1, src2);
16550     cmovI_reg_g_ndd(dst, src1, src2, cr);
16551   %}
16552 %}
16553 
16554 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16555 %{
16556   predicate(!UseAPX);
16557   effect(USE_DEF dst, USE src, USE cr);
16558 
16559   format %{ "cmovllt $dst, $src\t# max" %}
16560   ins_encode %{
16561     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16562   %}
16563   ins_pipe(pipe_cmov_reg);
16564 %}
16565 
16566 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16567 %{
16568   predicate(UseAPX);
16569   effect(DEF dst, USE src1, USE src2, USE cr);
16570 
16571   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16572   ins_encode %{
16573     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16574   %}
16575   ins_pipe(pipe_cmov_reg);
16576 %}
16577 
16578 instruct maxI_rReg(rRegI dst, rRegI src)
16579 %{
16580   predicate(!UseAPX);
16581   match(Set dst (MaxI dst src));
16582 
16583   ins_cost(200);
16584   expand %{
16585     rFlagsReg cr;
16586     compI_rReg(cr, dst, src);
16587     cmovI_reg_l(dst, src, cr);
16588   %}
16589 %}
16590 
16591 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16592 %{
16593   predicate(UseAPX);
16594   match(Set dst (MaxI src1 src2));
16595   effect(DEF dst, USE src1, USE src2);
16596 
16597   ins_cost(200);
16598   expand %{
16599     rFlagsReg cr;
16600     compI_rReg(cr, src1, src2);
16601     cmovI_reg_l_ndd(dst, src1, src2, cr);
16602   %}
16603 %}
16604 
16605 // ============================================================================
16606 // Branch Instructions
16607 
16608 // Jump Direct - Label defines a relative address from JMP+1
16609 instruct jmpDir(label labl)
16610 %{
16611   match(Goto);
16612   effect(USE labl);
16613 
16614   ins_cost(300);
16615   format %{ "jmp     $labl" %}
16616   size(5);
16617   ins_encode %{
16618     Label* L = $labl$$label;
16619     __ jmp(*L, false); // Always long jump
16620   %}
16621   ins_pipe(pipe_jmp);
16622 %}
16623 
16624 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16625 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16626 %{
16627   match(If cop cr);
16628   effect(USE labl);
16629 
16630   ins_cost(300);
16631   format %{ "j$cop     $labl" %}
16632   size(6);
16633   ins_encode %{
16634     Label* L = $labl$$label;
16635     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16636   %}
16637   ins_pipe(pipe_jcc);
16638 %}
16639 
16640 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16641 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16642 %{
16643   match(CountedLoopEnd cop cr);
16644   effect(USE labl);
16645 
16646   ins_cost(300);
16647   format %{ "j$cop     $labl\t# loop end" %}
16648   size(6);
16649   ins_encode %{
16650     Label* L = $labl$$label;
16651     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16652   %}
16653   ins_pipe(pipe_jcc);
16654 %}
16655 
16656 // Jump Direct Conditional - using unsigned comparison
16657 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16658   match(If cop cmp);
16659   effect(USE labl);
16660 
16661   ins_cost(300);
16662   format %{ "j$cop,u   $labl" %}
16663   size(6);
16664   ins_encode %{
16665     Label* L = $labl$$label;
16666     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16667   %}
16668   ins_pipe(pipe_jcc);
16669 %}
16670 
16671 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16672   match(If cop cmp);
16673   effect(USE labl);
16674 
16675   ins_cost(200);
16676   format %{ "j$cop,u   $labl" %}
16677   size(6);
16678   ins_encode %{
16679     Label* L = $labl$$label;
16680     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16681   %}
16682   ins_pipe(pipe_jcc);
16683 %}
16684 
16685 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16686   match(If cop cmp);
16687   effect(USE labl);
16688 
16689   ins_cost(200);
16690   format %{ $$template
16691     if ($cop$$cmpcode == Assembler::notEqual) {
16692       $$emit$$"jp,u    $labl\n\t"
16693       $$emit$$"j$cop,u   $labl"
16694     } else {
16695       $$emit$$"jp,u    done\n\t"
16696       $$emit$$"j$cop,u   $labl\n\t"
16697       $$emit$$"done:"
16698     }
16699   %}
16700   ins_encode %{
16701     Label* l = $labl$$label;
16702     if ($cop$$cmpcode == Assembler::notEqual) {
16703       __ jcc(Assembler::parity, *l, false);
16704       __ jcc(Assembler::notEqual, *l, false);
16705     } else if ($cop$$cmpcode == Assembler::equal) {
16706       Label done;
16707       __ jccb(Assembler::parity, done);
16708       __ jcc(Assembler::equal, *l, false);
16709       __ bind(done);
16710     } else {
16711        ShouldNotReachHere();
16712     }
16713   %}
16714   ins_pipe(pipe_jcc);
16715 %}
16716 
16717 // ============================================================================
16718 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16719 // superklass array for an instance of the superklass.  Set a hidden
16720 // internal cache on a hit (cache is checked with exposed code in
16721 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16722 // encoding ALSO sets flags.
16723 
16724 instruct partialSubtypeCheck(rdi_RegP result,
16725                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16726                              rFlagsReg cr)
16727 %{
16728   match(Set result (PartialSubtypeCheck sub super));
16729   predicate(!UseSecondarySupersTable);
16730   effect(KILL rcx, KILL cr);
16731 
16732   ins_cost(1100);  // slightly larger than the next version
16733   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16734             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16735             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16736             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16737             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16738             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16739             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16740     "miss:\t" %}
16741 
16742   ins_encode %{
16743     Label miss;
16744     // NB: Callers may assume that, when $result is a valid register,
16745     // check_klass_subtype_slow_path_linear sets it to a nonzero
16746     // value.
16747     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16748                                             $rcx$$Register, $result$$Register,
16749                                             nullptr, &miss,
16750                                             /*set_cond_codes:*/ true);
16751     __ xorptr($result$$Register, $result$$Register);
16752     __ bind(miss);
16753   %}
16754 
16755   ins_pipe(pipe_slow);
16756 %}
16757 
16758 // ============================================================================
16759 // Two versions of hashtable-based partialSubtypeCheck, both used when
16760 // we need to search for a super class in the secondary supers array.
16761 // The first is used when we don't know _a priori_ the class being
16762 // searched for. The second, far more common, is used when we do know:
16763 // this is used for instanceof, checkcast, and any case where C2 can
16764 // determine it by constant propagation.
16765 
16766 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16767                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16768                                        rFlagsReg cr)
16769 %{
16770   match(Set result (PartialSubtypeCheck sub super));
16771   predicate(UseSecondarySupersTable);
16772   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16773 
16774   ins_cost(1000);
16775   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16776 
16777   ins_encode %{
16778     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16779 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16780   %}
16781 
16782   ins_pipe(pipe_slow);
16783 %}
16784 
16785 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16786                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16787                                        rFlagsReg cr)
16788 %{
16789   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16790   predicate(UseSecondarySupersTable);
16791   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16792 
16793   ins_cost(700);  // smaller than the next version
16794   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16795 
16796   ins_encode %{
16797     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16798     if (InlineSecondarySupersTest) {
16799       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16800                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16801                                        super_klass_slot);
16802     } else {
16803       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16804     }
16805   %}
16806 
16807   ins_pipe(pipe_slow);
16808 %}
16809 
16810 // ============================================================================
16811 // Branch Instructions -- short offset versions
16812 //
16813 // These instructions are used to replace jumps of a long offset (the default
16814 // match) with jumps of a shorter offset.  These instructions are all tagged
16815 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16816 // match rules in general matching.  Instead, the ADLC generates a conversion
16817 // method in the MachNode which can be used to do in-place replacement of the
16818 // long variant with the shorter variant.  The compiler will determine if a
16819 // branch can be taken by the is_short_branch_offset() predicate in the machine
16820 // specific code section of the file.
16821 
16822 // Jump Direct - Label defines a relative address from JMP+1
16823 instruct jmpDir_short(label labl) %{
16824   match(Goto);
16825   effect(USE labl);
16826 
16827   ins_cost(300);
16828   format %{ "jmp,s   $labl" %}
16829   size(2);
16830   ins_encode %{
16831     Label* L = $labl$$label;
16832     __ jmpb(*L);
16833   %}
16834   ins_pipe(pipe_jmp);
16835   ins_short_branch(1);
16836 %}
16837 
16838 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16839 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16840   match(If cop cr);
16841   effect(USE labl);
16842 
16843   ins_cost(300);
16844   format %{ "j$cop,s   $labl" %}
16845   size(2);
16846   ins_encode %{
16847     Label* L = $labl$$label;
16848     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16849   %}
16850   ins_pipe(pipe_jcc);
16851   ins_short_branch(1);
16852 %}
16853 
16854 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16855 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16856   match(CountedLoopEnd cop cr);
16857   effect(USE labl);
16858 
16859   ins_cost(300);
16860   format %{ "j$cop,s   $labl\t# loop end" %}
16861   size(2);
16862   ins_encode %{
16863     Label* L = $labl$$label;
16864     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16865   %}
16866   ins_pipe(pipe_jcc);
16867   ins_short_branch(1);
16868 %}
16869 
16870 // Jump Direct Conditional - using unsigned comparison
16871 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16872   match(If cop cmp);
16873   effect(USE labl);
16874 
16875   ins_cost(300);
16876   format %{ "j$cop,us  $labl" %}
16877   size(2);
16878   ins_encode %{
16879     Label* L = $labl$$label;
16880     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16881   %}
16882   ins_pipe(pipe_jcc);
16883   ins_short_branch(1);
16884 %}
16885 
16886 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16887   match(If cop cmp);
16888   effect(USE labl);
16889 
16890   ins_cost(300);
16891   format %{ "j$cop,us  $labl" %}
16892   size(2);
16893   ins_encode %{
16894     Label* L = $labl$$label;
16895     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16896   %}
16897   ins_pipe(pipe_jcc);
16898   ins_short_branch(1);
16899 %}
16900 
16901 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16902   match(If cop cmp);
16903   effect(USE labl);
16904 
16905   ins_cost(300);
16906   format %{ $$template
16907     if ($cop$$cmpcode == Assembler::notEqual) {
16908       $$emit$$"jp,u,s  $labl\n\t"
16909       $$emit$$"j$cop,u,s  $labl"
16910     } else {
16911       $$emit$$"jp,u,s  done\n\t"
16912       $$emit$$"j$cop,u,s  $labl\n\t"
16913       $$emit$$"done:"
16914     }
16915   %}
16916   size(4);
16917   ins_encode %{
16918     Label* l = $labl$$label;
16919     if ($cop$$cmpcode == Assembler::notEqual) {
16920       __ jccb(Assembler::parity, *l);
16921       __ jccb(Assembler::notEqual, *l);
16922     } else if ($cop$$cmpcode == Assembler::equal) {
16923       Label done;
16924       __ jccb(Assembler::parity, done);
16925       __ jccb(Assembler::equal, *l);
16926       __ bind(done);
16927     } else {
16928        ShouldNotReachHere();
16929     }
16930   %}
16931   ins_pipe(pipe_jcc);
16932   ins_short_branch(1);
16933 %}
16934 
16935 // ============================================================================
16936 // inlined locking and unlocking
16937 
16938 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16939   match(Set cr (FastLock object box));
16940   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16941   ins_cost(300);
16942   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16943   ins_encode %{
16944     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16945   %}
16946   ins_pipe(pipe_slow);
16947 %}
16948 
16949 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16950   match(Set cr (FastUnlock object rax_reg));
16951   effect(TEMP tmp, USE_KILL rax_reg);
16952   ins_cost(300);
16953   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16954   ins_encode %{
16955     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16956   %}
16957   ins_pipe(pipe_slow);
16958 %}
16959 
16960 
16961 // ============================================================================
16962 // Safepoint Instructions
16963 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16964 %{
16965   match(SafePoint poll);
16966   effect(KILL cr, USE poll);
16967 
16968   format %{ "testl   rax, [$poll]\t"
16969             "# Safepoint: poll for GC" %}
16970   ins_cost(125);
16971   ins_encode %{
16972     __ relocate(relocInfo::poll_type);
16973     address pre_pc = __ pc();
16974     __ testl(rax, Address($poll$$Register, 0));
16975     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16976   %}
16977   ins_pipe(ialu_reg_mem);
16978 %}
16979 
16980 instruct mask_all_evexL(kReg dst, rRegL src) %{
16981   match(Set dst (MaskAll src));
16982   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16983   ins_encode %{
16984     int mask_len = Matcher::vector_length(this);
16985     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16986   %}
16987   ins_pipe( pipe_slow );
16988 %}
16989 
16990 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16991   predicate(Matcher::vector_length(n) > 32);
16992   match(Set dst (MaskAll src));
16993   effect(TEMP tmp);
16994   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16995   ins_encode %{
16996     int mask_len = Matcher::vector_length(this);
16997     __ movslq($tmp$$Register, $src$$Register);
16998     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16999   %}
17000   ins_pipe( pipe_slow );
17001 %}
17002 
17003 // ============================================================================
17004 // Procedure Call/Return Instructions
17005 // Call Java Static Instruction
17006 // Note: If this code changes, the corresponding ret_addr_offset() and
17007 //       compute_padding() functions will have to be adjusted.
17008 instruct CallStaticJavaDirect(method meth) %{
17009   match(CallStaticJava);
17010   effect(USE meth);
17011 
17012   ins_cost(300);
17013   format %{ "call,static " %}
17014   opcode(0xE8); /* E8 cd */
17015   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17016   ins_pipe(pipe_slow);
17017   ins_alignment(4);
17018 %}
17019 
17020 // Call Java Dynamic Instruction
17021 // Note: If this code changes, the corresponding ret_addr_offset() and
17022 //       compute_padding() functions will have to be adjusted.
17023 instruct CallDynamicJavaDirect(method meth)
17024 %{
17025   match(CallDynamicJava);
17026   effect(USE meth);
17027 
17028   ins_cost(300);
17029   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17030             "call,dynamic " %}
17031   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17032   ins_pipe(pipe_slow);
17033   ins_alignment(4);
17034 %}
17035 
17036 // Call Runtime Instruction
17037 instruct CallRuntimeDirect(method meth)
17038 %{
17039   match(CallRuntime);
17040   effect(USE meth);
17041 
17042   ins_cost(300);
17043   format %{ "call,runtime " %}
17044   ins_encode(clear_avx, Java_To_Runtime(meth));
17045   ins_pipe(pipe_slow);
17046 %}
17047 
17048 // Call runtime without safepoint
17049 instruct CallLeafDirect(method meth)
17050 %{
17051   match(CallLeaf);
17052   effect(USE meth);
17053 
17054   ins_cost(300);
17055   format %{ "call_leaf,runtime " %}
17056   ins_encode(clear_avx, Java_To_Runtime(meth));
17057   ins_pipe(pipe_slow);
17058 %}
17059 
17060 // Call runtime without safepoint and with vector arguments
17061 instruct CallLeafDirectVector(method meth)
17062 %{
17063   match(CallLeafVector);
17064   effect(USE meth);
17065 
17066   ins_cost(300);
17067   format %{ "call_leaf,vector " %}
17068   ins_encode(Java_To_Runtime(meth));
17069   ins_pipe(pipe_slow);
17070 %}
17071 
17072 // Call runtime without safepoint
17073 instruct CallLeafNoFPDirect(method meth)
17074 %{
17075   match(CallLeafNoFP);
17076   effect(USE meth);
17077 
17078   ins_cost(300);
17079   format %{ "call_leaf_nofp,runtime " %}
17080   ins_encode(clear_avx, Java_To_Runtime(meth));
17081   ins_pipe(pipe_slow);
17082 %}
17083 
17084 // Return Instruction
17085 // Remove the return address & jump to it.
17086 // Notice: We always emit a nop after a ret to make sure there is room
17087 // for safepoint patching
17088 instruct Ret()
17089 %{
17090   match(Return);
17091 
17092   format %{ "ret" %}
17093   ins_encode %{
17094     __ ret(0);
17095   %}
17096   ins_pipe(pipe_jmp);
17097 %}
17098 
17099 // Tail Call; Jump from runtime stub to Java code.
17100 // Also known as an 'interprocedural jump'.
17101 // Target of jump will eventually return to caller.
17102 // TailJump below removes the return address.
17103 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17104 // emitted just above the TailCall which has reset rbp to the caller state.
17105 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17106 %{
17107   match(TailCall jump_target method_ptr);
17108 
17109   ins_cost(300);
17110   format %{ "jmp     $jump_target\t# rbx holds method" %}
17111   ins_encode %{
17112     __ jmp($jump_target$$Register);
17113   %}
17114   ins_pipe(pipe_jmp);
17115 %}
17116 
17117 // Tail Jump; remove the return address; jump to target.
17118 // TailCall above leaves the return address around.
17119 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17120 %{
17121   match(TailJump jump_target ex_oop);
17122 
17123   ins_cost(300);
17124   format %{ "popq    rdx\t# pop return address\n\t"
17125             "jmp     $jump_target" %}
17126   ins_encode %{
17127     __ popq(as_Register(RDX_enc));
17128     __ jmp($jump_target$$Register);
17129   %}
17130   ins_pipe(pipe_jmp);
17131 %}
17132 
17133 // Forward exception.
17134 instruct ForwardExceptionjmp()
17135 %{
17136   match(ForwardException);
17137 
17138   format %{ "jmp     forward_exception_stub" %}
17139   ins_encode %{
17140     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17141   %}
17142   ins_pipe(pipe_jmp);
17143 %}
17144 
17145 // Create exception oop: created by stack-crawling runtime code.
17146 // Created exception is now available to this handler, and is setup
17147 // just prior to jumping to this handler.  No code emitted.
17148 instruct CreateException(rax_RegP ex_oop)
17149 %{
17150   match(Set ex_oop (CreateEx));
17151 
17152   size(0);
17153   // use the following format syntax
17154   format %{ "# exception oop is in rax; no code emitted" %}
17155   ins_encode();
17156   ins_pipe(empty);
17157 %}
17158 
17159 // Rethrow exception:
17160 // The exception oop will come in the first argument position.
17161 // Then JUMP (not call) to the rethrow stub code.
17162 instruct RethrowException()
17163 %{
17164   match(Rethrow);
17165 
17166   // use the following format syntax
17167   format %{ "jmp     rethrow_stub" %}
17168   ins_encode %{
17169     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17170   %}
17171   ins_pipe(pipe_jmp);
17172 %}
17173 
17174 // ============================================================================
17175 // This name is KNOWN by the ADLC and cannot be changed.
17176 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17177 // for this guy.
17178 instruct tlsLoadP(r15_RegP dst) %{
17179   match(Set dst (ThreadLocal));
17180   effect(DEF dst);
17181 
17182   size(0);
17183   format %{ "# TLS is in R15" %}
17184   ins_encode( /*empty encoding*/ );
17185   ins_pipe(ialu_reg_reg);
17186 %}
17187 
17188 instruct addF_reg(regF dst, regF src) %{
17189   predicate(UseAVX == 0);
17190   match(Set dst (AddF dst src));
17191 
17192   format %{ "addss   $dst, $src" %}
17193   ins_cost(150);
17194   ins_encode %{
17195     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17196   %}
17197   ins_pipe(pipe_slow);
17198 %}
17199 
17200 instruct addF_mem(regF dst, memory src) %{
17201   predicate(UseAVX == 0);
17202   match(Set dst (AddF dst (LoadF src)));
17203 
17204   format %{ "addss   $dst, $src" %}
17205   ins_cost(150);
17206   ins_encode %{
17207     __ addss($dst$$XMMRegister, $src$$Address);
17208   %}
17209   ins_pipe(pipe_slow);
17210 %}
17211 
17212 instruct addF_imm(regF dst, immF con) %{
17213   predicate(UseAVX == 0);
17214   match(Set dst (AddF dst con));
17215   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17216   ins_cost(150);
17217   ins_encode %{
17218     __ addss($dst$$XMMRegister, $constantaddress($con));
17219   %}
17220   ins_pipe(pipe_slow);
17221 %}
17222 
17223 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17224   predicate(UseAVX > 0);
17225   match(Set dst (AddF src1 src2));
17226 
17227   format %{ "vaddss  $dst, $src1, $src2" %}
17228   ins_cost(150);
17229   ins_encode %{
17230     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17231   %}
17232   ins_pipe(pipe_slow);
17233 %}
17234 
17235 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17236   predicate(UseAVX > 0);
17237   match(Set dst (AddF src1 (LoadF src2)));
17238 
17239   format %{ "vaddss  $dst, $src1, $src2" %}
17240   ins_cost(150);
17241   ins_encode %{
17242     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17243   %}
17244   ins_pipe(pipe_slow);
17245 %}
17246 
17247 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17248   predicate(UseAVX > 0);
17249   match(Set dst (AddF src con));
17250 
17251   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17252   ins_cost(150);
17253   ins_encode %{
17254     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17255   %}
17256   ins_pipe(pipe_slow);
17257 %}
17258 
17259 instruct addD_reg(regD dst, regD src) %{
17260   predicate(UseAVX == 0);
17261   match(Set dst (AddD dst src));
17262 
17263   format %{ "addsd   $dst, $src" %}
17264   ins_cost(150);
17265   ins_encode %{
17266     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17267   %}
17268   ins_pipe(pipe_slow);
17269 %}
17270 
17271 instruct addD_mem(regD dst, memory src) %{
17272   predicate(UseAVX == 0);
17273   match(Set dst (AddD dst (LoadD src)));
17274 
17275   format %{ "addsd   $dst, $src" %}
17276   ins_cost(150);
17277   ins_encode %{
17278     __ addsd($dst$$XMMRegister, $src$$Address);
17279   %}
17280   ins_pipe(pipe_slow);
17281 %}
17282 
17283 instruct addD_imm(regD dst, immD con) %{
17284   predicate(UseAVX == 0);
17285   match(Set dst (AddD dst con));
17286   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17287   ins_cost(150);
17288   ins_encode %{
17289     __ addsd($dst$$XMMRegister, $constantaddress($con));
17290   %}
17291   ins_pipe(pipe_slow);
17292 %}
17293 
17294 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17295   predicate(UseAVX > 0);
17296   match(Set dst (AddD src1 src2));
17297 
17298   format %{ "vaddsd  $dst, $src1, $src2" %}
17299   ins_cost(150);
17300   ins_encode %{
17301     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17302   %}
17303   ins_pipe(pipe_slow);
17304 %}
17305 
17306 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17307   predicate(UseAVX > 0);
17308   match(Set dst (AddD src1 (LoadD src2)));
17309 
17310   format %{ "vaddsd  $dst, $src1, $src2" %}
17311   ins_cost(150);
17312   ins_encode %{
17313     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17314   %}
17315   ins_pipe(pipe_slow);
17316 %}
17317 
17318 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17319   predicate(UseAVX > 0);
17320   match(Set dst (AddD src con));
17321 
17322   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17323   ins_cost(150);
17324   ins_encode %{
17325     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17326   %}
17327   ins_pipe(pipe_slow);
17328 %}
17329 
17330 instruct subF_reg(regF dst, regF src) %{
17331   predicate(UseAVX == 0);
17332   match(Set dst (SubF dst src));
17333 
17334   format %{ "subss   $dst, $src" %}
17335   ins_cost(150);
17336   ins_encode %{
17337     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17338   %}
17339   ins_pipe(pipe_slow);
17340 %}
17341 
17342 instruct subF_mem(regF dst, memory src) %{
17343   predicate(UseAVX == 0);
17344   match(Set dst (SubF dst (LoadF src)));
17345 
17346   format %{ "subss   $dst, $src" %}
17347   ins_cost(150);
17348   ins_encode %{
17349     __ subss($dst$$XMMRegister, $src$$Address);
17350   %}
17351   ins_pipe(pipe_slow);
17352 %}
17353 
17354 instruct subF_imm(regF dst, immF con) %{
17355   predicate(UseAVX == 0);
17356   match(Set dst (SubF dst con));
17357   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17358   ins_cost(150);
17359   ins_encode %{
17360     __ subss($dst$$XMMRegister, $constantaddress($con));
17361   %}
17362   ins_pipe(pipe_slow);
17363 %}
17364 
17365 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17366   predicate(UseAVX > 0);
17367   match(Set dst (SubF src1 src2));
17368 
17369   format %{ "vsubss  $dst, $src1, $src2" %}
17370   ins_cost(150);
17371   ins_encode %{
17372     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17373   %}
17374   ins_pipe(pipe_slow);
17375 %}
17376 
17377 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17378   predicate(UseAVX > 0);
17379   match(Set dst (SubF src1 (LoadF src2)));
17380 
17381   format %{ "vsubss  $dst, $src1, $src2" %}
17382   ins_cost(150);
17383   ins_encode %{
17384     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17385   %}
17386   ins_pipe(pipe_slow);
17387 %}
17388 
17389 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17390   predicate(UseAVX > 0);
17391   match(Set dst (SubF src con));
17392 
17393   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17394   ins_cost(150);
17395   ins_encode %{
17396     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17397   %}
17398   ins_pipe(pipe_slow);
17399 %}
17400 
17401 instruct subD_reg(regD dst, regD src) %{
17402   predicate(UseAVX == 0);
17403   match(Set dst (SubD dst src));
17404 
17405   format %{ "subsd   $dst, $src" %}
17406   ins_cost(150);
17407   ins_encode %{
17408     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17409   %}
17410   ins_pipe(pipe_slow);
17411 %}
17412 
17413 instruct subD_mem(regD dst, memory src) %{
17414   predicate(UseAVX == 0);
17415   match(Set dst (SubD dst (LoadD src)));
17416 
17417   format %{ "subsd   $dst, $src" %}
17418   ins_cost(150);
17419   ins_encode %{
17420     __ subsd($dst$$XMMRegister, $src$$Address);
17421   %}
17422   ins_pipe(pipe_slow);
17423 %}
17424 
17425 instruct subD_imm(regD dst, immD con) %{
17426   predicate(UseAVX == 0);
17427   match(Set dst (SubD dst con));
17428   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17429   ins_cost(150);
17430   ins_encode %{
17431     __ subsd($dst$$XMMRegister, $constantaddress($con));
17432   %}
17433   ins_pipe(pipe_slow);
17434 %}
17435 
17436 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17437   predicate(UseAVX > 0);
17438   match(Set dst (SubD src1 src2));
17439 
17440   format %{ "vsubsd  $dst, $src1, $src2" %}
17441   ins_cost(150);
17442   ins_encode %{
17443     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17444   %}
17445   ins_pipe(pipe_slow);
17446 %}
17447 
17448 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17449   predicate(UseAVX > 0);
17450   match(Set dst (SubD src1 (LoadD src2)));
17451 
17452   format %{ "vsubsd  $dst, $src1, $src2" %}
17453   ins_cost(150);
17454   ins_encode %{
17455     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17456   %}
17457   ins_pipe(pipe_slow);
17458 %}
17459 
17460 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17461   predicate(UseAVX > 0);
17462   match(Set dst (SubD src con));
17463 
17464   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17465   ins_cost(150);
17466   ins_encode %{
17467     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17468   %}
17469   ins_pipe(pipe_slow);
17470 %}
17471 
17472 instruct mulF_reg(regF dst, regF src) %{
17473   predicate(UseAVX == 0);
17474   match(Set dst (MulF dst src));
17475 
17476   format %{ "mulss   $dst, $src" %}
17477   ins_cost(150);
17478   ins_encode %{
17479     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17480   %}
17481   ins_pipe(pipe_slow);
17482 %}
17483 
17484 instruct mulF_mem(regF dst, memory src) %{
17485   predicate(UseAVX == 0);
17486   match(Set dst (MulF dst (LoadF src)));
17487 
17488   format %{ "mulss   $dst, $src" %}
17489   ins_cost(150);
17490   ins_encode %{
17491     __ mulss($dst$$XMMRegister, $src$$Address);
17492   %}
17493   ins_pipe(pipe_slow);
17494 %}
17495 
17496 instruct mulF_imm(regF dst, immF con) %{
17497   predicate(UseAVX == 0);
17498   match(Set dst (MulF dst con));
17499   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17500   ins_cost(150);
17501   ins_encode %{
17502     __ mulss($dst$$XMMRegister, $constantaddress($con));
17503   %}
17504   ins_pipe(pipe_slow);
17505 %}
17506 
17507 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17508   predicate(UseAVX > 0);
17509   match(Set dst (MulF src1 src2));
17510 
17511   format %{ "vmulss  $dst, $src1, $src2" %}
17512   ins_cost(150);
17513   ins_encode %{
17514     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17515   %}
17516   ins_pipe(pipe_slow);
17517 %}
17518 
17519 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17520   predicate(UseAVX > 0);
17521   match(Set dst (MulF src1 (LoadF src2)));
17522 
17523   format %{ "vmulss  $dst, $src1, $src2" %}
17524   ins_cost(150);
17525   ins_encode %{
17526     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17527   %}
17528   ins_pipe(pipe_slow);
17529 %}
17530 
17531 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17532   predicate(UseAVX > 0);
17533   match(Set dst (MulF src con));
17534 
17535   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17536   ins_cost(150);
17537   ins_encode %{
17538     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17539   %}
17540   ins_pipe(pipe_slow);
17541 %}
17542 
17543 instruct mulD_reg(regD dst, regD src) %{
17544   predicate(UseAVX == 0);
17545   match(Set dst (MulD dst src));
17546 
17547   format %{ "mulsd   $dst, $src" %}
17548   ins_cost(150);
17549   ins_encode %{
17550     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17551   %}
17552   ins_pipe(pipe_slow);
17553 %}
17554 
17555 instruct mulD_mem(regD dst, memory src) %{
17556   predicate(UseAVX == 0);
17557   match(Set dst (MulD dst (LoadD src)));
17558 
17559   format %{ "mulsd   $dst, $src" %}
17560   ins_cost(150);
17561   ins_encode %{
17562     __ mulsd($dst$$XMMRegister, $src$$Address);
17563   %}
17564   ins_pipe(pipe_slow);
17565 %}
17566 
17567 instruct mulD_imm(regD dst, immD con) %{
17568   predicate(UseAVX == 0);
17569   match(Set dst (MulD dst con));
17570   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17571   ins_cost(150);
17572   ins_encode %{
17573     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17574   %}
17575   ins_pipe(pipe_slow);
17576 %}
17577 
17578 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17579   predicate(UseAVX > 0);
17580   match(Set dst (MulD src1 src2));
17581 
17582   format %{ "vmulsd  $dst, $src1, $src2" %}
17583   ins_cost(150);
17584   ins_encode %{
17585     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17586   %}
17587   ins_pipe(pipe_slow);
17588 %}
17589 
17590 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17591   predicate(UseAVX > 0);
17592   match(Set dst (MulD src1 (LoadD src2)));
17593 
17594   format %{ "vmulsd  $dst, $src1, $src2" %}
17595   ins_cost(150);
17596   ins_encode %{
17597     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17598   %}
17599   ins_pipe(pipe_slow);
17600 %}
17601 
17602 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17603   predicate(UseAVX > 0);
17604   match(Set dst (MulD src con));
17605 
17606   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17607   ins_cost(150);
17608   ins_encode %{
17609     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17610   %}
17611   ins_pipe(pipe_slow);
17612 %}
17613 
17614 instruct divF_reg(regF dst, regF src) %{
17615   predicate(UseAVX == 0);
17616   match(Set dst (DivF dst src));
17617 
17618   format %{ "divss   $dst, $src" %}
17619   ins_cost(150);
17620   ins_encode %{
17621     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17622   %}
17623   ins_pipe(pipe_slow);
17624 %}
17625 
17626 instruct divF_mem(regF dst, memory src) %{
17627   predicate(UseAVX == 0);
17628   match(Set dst (DivF dst (LoadF src)));
17629 
17630   format %{ "divss   $dst, $src" %}
17631   ins_cost(150);
17632   ins_encode %{
17633     __ divss($dst$$XMMRegister, $src$$Address);
17634   %}
17635   ins_pipe(pipe_slow);
17636 %}
17637 
17638 instruct divF_imm(regF dst, immF con) %{
17639   predicate(UseAVX == 0);
17640   match(Set dst (DivF dst con));
17641   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17642   ins_cost(150);
17643   ins_encode %{
17644     __ divss($dst$$XMMRegister, $constantaddress($con));
17645   %}
17646   ins_pipe(pipe_slow);
17647 %}
17648 
17649 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17650   predicate(UseAVX > 0);
17651   match(Set dst (DivF src1 src2));
17652 
17653   format %{ "vdivss  $dst, $src1, $src2" %}
17654   ins_cost(150);
17655   ins_encode %{
17656     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17657   %}
17658   ins_pipe(pipe_slow);
17659 %}
17660 
17661 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17662   predicate(UseAVX > 0);
17663   match(Set dst (DivF src1 (LoadF src2)));
17664 
17665   format %{ "vdivss  $dst, $src1, $src2" %}
17666   ins_cost(150);
17667   ins_encode %{
17668     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17669   %}
17670   ins_pipe(pipe_slow);
17671 %}
17672 
17673 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17674   predicate(UseAVX > 0);
17675   match(Set dst (DivF src con));
17676 
17677   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17678   ins_cost(150);
17679   ins_encode %{
17680     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17681   %}
17682   ins_pipe(pipe_slow);
17683 %}
17684 
17685 instruct divD_reg(regD dst, regD src) %{
17686   predicate(UseAVX == 0);
17687   match(Set dst (DivD dst src));
17688 
17689   format %{ "divsd   $dst, $src" %}
17690   ins_cost(150);
17691   ins_encode %{
17692     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17693   %}
17694   ins_pipe(pipe_slow);
17695 %}
17696 
17697 instruct divD_mem(regD dst, memory src) %{
17698   predicate(UseAVX == 0);
17699   match(Set dst (DivD dst (LoadD src)));
17700 
17701   format %{ "divsd   $dst, $src" %}
17702   ins_cost(150);
17703   ins_encode %{
17704     __ divsd($dst$$XMMRegister, $src$$Address);
17705   %}
17706   ins_pipe(pipe_slow);
17707 %}
17708 
17709 instruct divD_imm(regD dst, immD con) %{
17710   predicate(UseAVX == 0);
17711   match(Set dst (DivD dst con));
17712   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17713   ins_cost(150);
17714   ins_encode %{
17715     __ divsd($dst$$XMMRegister, $constantaddress($con));
17716   %}
17717   ins_pipe(pipe_slow);
17718 %}
17719 
17720 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17721   predicate(UseAVX > 0);
17722   match(Set dst (DivD src1 src2));
17723 
17724   format %{ "vdivsd  $dst, $src1, $src2" %}
17725   ins_cost(150);
17726   ins_encode %{
17727     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17728   %}
17729   ins_pipe(pipe_slow);
17730 %}
17731 
17732 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17733   predicate(UseAVX > 0);
17734   match(Set dst (DivD src1 (LoadD src2)));
17735 
17736   format %{ "vdivsd  $dst, $src1, $src2" %}
17737   ins_cost(150);
17738   ins_encode %{
17739     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17740   %}
17741   ins_pipe(pipe_slow);
17742 %}
17743 
17744 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17745   predicate(UseAVX > 0);
17746   match(Set dst (DivD src con));
17747 
17748   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17749   ins_cost(150);
17750   ins_encode %{
17751     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17752   %}
17753   ins_pipe(pipe_slow);
17754 %}
17755 
17756 instruct absF_reg(regF dst) %{
17757   predicate(UseAVX == 0);
17758   match(Set dst (AbsF dst));
17759   ins_cost(150);
17760   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17761   ins_encode %{
17762     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17763   %}
17764   ins_pipe(pipe_slow);
17765 %}
17766 
17767 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17768   predicate(UseAVX > 0);
17769   match(Set dst (AbsF src));
17770   ins_cost(150);
17771   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17772   ins_encode %{
17773     int vlen_enc = Assembler::AVX_128bit;
17774     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17775               ExternalAddress(float_signmask()), vlen_enc);
17776   %}
17777   ins_pipe(pipe_slow);
17778 %}
17779 
17780 instruct absD_reg(regD dst) %{
17781   predicate(UseAVX == 0);
17782   match(Set dst (AbsD dst));
17783   ins_cost(150);
17784   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17785             "# abs double by sign masking" %}
17786   ins_encode %{
17787     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17788   %}
17789   ins_pipe(pipe_slow);
17790 %}
17791 
17792 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17793   predicate(UseAVX > 0);
17794   match(Set dst (AbsD src));
17795   ins_cost(150);
17796   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17797             "# abs double by sign masking" %}
17798   ins_encode %{
17799     int vlen_enc = Assembler::AVX_128bit;
17800     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17801               ExternalAddress(double_signmask()), vlen_enc);
17802   %}
17803   ins_pipe(pipe_slow);
17804 %}
17805 
17806 instruct negF_reg(regF dst) %{
17807   predicate(UseAVX == 0);
17808   match(Set dst (NegF dst));
17809   ins_cost(150);
17810   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17811   ins_encode %{
17812     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17813   %}
17814   ins_pipe(pipe_slow);
17815 %}
17816 
17817 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17818   predicate(UseAVX > 0);
17819   match(Set dst (NegF src));
17820   ins_cost(150);
17821   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17822   ins_encode %{
17823     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17824                  ExternalAddress(float_signflip()));
17825   %}
17826   ins_pipe(pipe_slow);
17827 %}
17828 
17829 instruct negD_reg(regD dst) %{
17830   predicate(UseAVX == 0);
17831   match(Set dst (NegD dst));
17832   ins_cost(150);
17833   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17834             "# neg double by sign flipping" %}
17835   ins_encode %{
17836     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17837   %}
17838   ins_pipe(pipe_slow);
17839 %}
17840 
17841 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17842   predicate(UseAVX > 0);
17843   match(Set dst (NegD src));
17844   ins_cost(150);
17845   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17846             "# neg double by sign flipping" %}
17847   ins_encode %{
17848     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17849                  ExternalAddress(double_signflip()));
17850   %}
17851   ins_pipe(pipe_slow);
17852 %}
17853 
17854 // sqrtss instruction needs destination register to be pre initialized for best performance
17855 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17856 instruct sqrtF_reg(regF dst) %{
17857   match(Set dst (SqrtF dst));
17858   format %{ "sqrtss  $dst, $dst" %}
17859   ins_encode %{
17860     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17861   %}
17862   ins_pipe(pipe_slow);
17863 %}
17864 
17865 // sqrtsd instruction needs destination register to be pre initialized for best performance
17866 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17867 instruct sqrtD_reg(regD dst) %{
17868   match(Set dst (SqrtD dst));
17869   format %{ "sqrtsd  $dst, $dst" %}
17870   ins_encode %{
17871     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17872   %}
17873   ins_pipe(pipe_slow);
17874 %}
17875 
17876 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17877   effect(TEMP tmp);
17878   match(Set dst (ConvF2HF src));
17879   ins_cost(125);
17880   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17881   ins_encode %{
17882     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17883   %}
17884   ins_pipe( pipe_slow );
17885 %}
17886 
17887 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17888   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17889   effect(TEMP ktmp, TEMP rtmp);
17890   match(Set mem (StoreC mem (ConvF2HF src)));
17891   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17892   ins_encode %{
17893     __ movl($rtmp$$Register, 0x1);
17894     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17895     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17896   %}
17897   ins_pipe( pipe_slow );
17898 %}
17899 
17900 instruct vconvF2HF(vec dst, vec src) %{
17901   match(Set dst (VectorCastF2HF src));
17902   format %{ "vector_conv_F2HF $dst $src" %}
17903   ins_encode %{
17904     int vlen_enc = vector_length_encoding(this, $src);
17905     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17906   %}
17907   ins_pipe( pipe_slow );
17908 %}
17909 
17910 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17911   predicate(n->as_StoreVector()->memory_size() >= 16);
17912   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17913   format %{ "vcvtps2ph $mem,$src" %}
17914   ins_encode %{
17915     int vlen_enc = vector_length_encoding(this, $src);
17916     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17917   %}
17918   ins_pipe( pipe_slow );
17919 %}
17920 
17921 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17922   match(Set dst (ConvHF2F src));
17923   format %{ "vcvtph2ps $dst,$src" %}
17924   ins_encode %{
17925     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17926   %}
17927   ins_pipe( pipe_slow );
17928 %}
17929 
17930 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17931   match(Set dst (VectorCastHF2F (LoadVector mem)));
17932   format %{ "vcvtph2ps $dst,$mem" %}
17933   ins_encode %{
17934     int vlen_enc = vector_length_encoding(this);
17935     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17936   %}
17937   ins_pipe( pipe_slow );
17938 %}
17939 
17940 instruct vconvHF2F(vec dst, vec src) %{
17941   match(Set dst (VectorCastHF2F src));
17942   ins_cost(125);
17943   format %{ "vector_conv_HF2F $dst,$src" %}
17944   ins_encode %{
17945     int vlen_enc = vector_length_encoding(this);
17946     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17947   %}
17948   ins_pipe( pipe_slow );
17949 %}
17950 
17951 // ---------------------------------------- VectorReinterpret ------------------------------------
17952 instruct reinterpret_mask(kReg dst) %{
17953   predicate(n->bottom_type()->isa_vectmask() &&
17954             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17955   match(Set dst (VectorReinterpret dst));
17956   ins_cost(125);
17957   format %{ "vector_reinterpret $dst\t!" %}
17958   ins_encode %{
17959     // empty
17960   %}
17961   ins_pipe( pipe_slow );
17962 %}
17963 
17964 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17965   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17966             n->bottom_type()->isa_vectmask() &&
17967             n->in(1)->bottom_type()->isa_vectmask() &&
17968             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17969             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17970   match(Set dst (VectorReinterpret src));
17971   effect(TEMP xtmp);
17972   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17973   ins_encode %{
17974      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17975      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17976      assert(src_sz == dst_sz , "src and dst size mismatch");
17977      int vlen_enc = vector_length_encoding(src_sz);
17978      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17979      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17980   %}
17981   ins_pipe( pipe_slow );
17982 %}
17983 
17984 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17985   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17986             n->bottom_type()->isa_vectmask() &&
17987             n->in(1)->bottom_type()->isa_vectmask() &&
17988             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
17989              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
17990             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17991   match(Set dst (VectorReinterpret src));
17992   effect(TEMP xtmp);
17993   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17994   ins_encode %{
17995      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17996      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17997      assert(src_sz == dst_sz , "src and dst size mismatch");
17998      int vlen_enc = vector_length_encoding(src_sz);
17999      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18000      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18001   %}
18002   ins_pipe( pipe_slow );
18003 %}
18004 
18005 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18006   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18007             n->bottom_type()->isa_vectmask() &&
18008             n->in(1)->bottom_type()->isa_vectmask() &&
18009             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18010              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18011             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18012   match(Set dst (VectorReinterpret src));
18013   effect(TEMP xtmp);
18014   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18015   ins_encode %{
18016      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18017      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18018      assert(src_sz == dst_sz , "src and dst size mismatch");
18019      int vlen_enc = vector_length_encoding(src_sz);
18020      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18021      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18022   %}
18023   ins_pipe( pipe_slow );
18024 %}
18025 
18026 instruct reinterpret(vec dst) %{
18027   predicate(!n->bottom_type()->isa_vectmask() &&
18028             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18029   match(Set dst (VectorReinterpret dst));
18030   ins_cost(125);
18031   format %{ "vector_reinterpret $dst\t!" %}
18032   ins_encode %{
18033     // empty
18034   %}
18035   ins_pipe( pipe_slow );
18036 %}
18037 
18038 instruct reinterpret_expand(vec dst, vec src) %{
18039   predicate(UseAVX == 0 &&
18040             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18041   match(Set dst (VectorReinterpret src));
18042   ins_cost(125);
18043   effect(TEMP dst);
18044   format %{ "vector_reinterpret_expand $dst,$src" %}
18045   ins_encode %{
18046     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18047     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18048 
18049     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18050     if (src_vlen_in_bytes == 4) {
18051       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18052     } else {
18053       assert(src_vlen_in_bytes == 8, "");
18054       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18055     }
18056     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18057   %}
18058   ins_pipe( pipe_slow );
18059 %}
18060 
18061 instruct vreinterpret_expand4(legVec dst, vec src) %{
18062   predicate(UseAVX > 0 &&
18063             !n->bottom_type()->isa_vectmask() &&
18064             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18065             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18066   match(Set dst (VectorReinterpret src));
18067   ins_cost(125);
18068   format %{ "vector_reinterpret_expand $dst,$src" %}
18069   ins_encode %{
18070     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18071   %}
18072   ins_pipe( pipe_slow );
18073 %}
18074 
18075 
18076 instruct vreinterpret_expand(legVec dst, vec src) %{
18077   predicate(UseAVX > 0 &&
18078             !n->bottom_type()->isa_vectmask() &&
18079             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18080             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18081   match(Set dst (VectorReinterpret src));
18082   ins_cost(125);
18083   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18084   ins_encode %{
18085     switch (Matcher::vector_length_in_bytes(this, $src)) {
18086       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18087       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18088       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18089       default: ShouldNotReachHere();
18090     }
18091   %}
18092   ins_pipe( pipe_slow );
18093 %}
18094 
18095 instruct reinterpret_shrink(vec dst, legVec src) %{
18096   predicate(!n->bottom_type()->isa_vectmask() &&
18097             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18098   match(Set dst (VectorReinterpret src));
18099   ins_cost(125);
18100   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18101   ins_encode %{
18102     switch (Matcher::vector_length_in_bytes(this)) {
18103       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18104       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18105       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18106       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18107       default: ShouldNotReachHere();
18108     }
18109   %}
18110   ins_pipe( pipe_slow );
18111 %}
18112 
18113 // ----------------------------------------------------------------------------------------------------
18114 
18115 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18116   match(Set dst (RoundDoubleMode src rmode));
18117   format %{ "roundsd $dst,$src" %}
18118   ins_cost(150);
18119   ins_encode %{
18120     assert(UseSSE >= 4, "required");
18121     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18122       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18123     }
18124     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18125   %}
18126   ins_pipe(pipe_slow);
18127 %}
18128 
18129 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18130   match(Set dst (RoundDoubleMode con rmode));
18131   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18132   ins_cost(150);
18133   ins_encode %{
18134     assert(UseSSE >= 4, "required");
18135     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18136   %}
18137   ins_pipe(pipe_slow);
18138 %}
18139 
18140 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18141   predicate(Matcher::vector_length(n) < 8);
18142   match(Set dst (RoundDoubleModeV src rmode));
18143   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18144   ins_encode %{
18145     assert(UseAVX > 0, "required");
18146     int vlen_enc = vector_length_encoding(this);
18147     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18148   %}
18149   ins_pipe( pipe_slow );
18150 %}
18151 
18152 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18153   predicate(Matcher::vector_length(n) == 8);
18154   match(Set dst (RoundDoubleModeV src rmode));
18155   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18156   ins_encode %{
18157     assert(UseAVX > 2, "required");
18158     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18159   %}
18160   ins_pipe( pipe_slow );
18161 %}
18162 
18163 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18164   predicate(Matcher::vector_length(n) < 8);
18165   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18166   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18167   ins_encode %{
18168     assert(UseAVX > 0, "required");
18169     int vlen_enc = vector_length_encoding(this);
18170     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18171   %}
18172   ins_pipe( pipe_slow );
18173 %}
18174 
18175 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18176   predicate(Matcher::vector_length(n) == 8);
18177   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18178   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18179   ins_encode %{
18180     assert(UseAVX > 2, "required");
18181     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18182   %}
18183   ins_pipe( pipe_slow );
18184 %}
18185 
18186 instruct onspinwait() %{
18187   match(OnSpinWait);
18188   ins_cost(200);
18189 
18190   format %{
18191     $$template
18192     $$emit$$"pause\t! membar_onspinwait"
18193   %}
18194   ins_encode %{
18195     __ pause();
18196   %}
18197   ins_pipe(pipe_slow);
18198 %}
18199 
18200 // a * b + c
18201 instruct fmaD_reg(regD a, regD b, regD c) %{
18202   match(Set c (FmaD  c (Binary a b)));
18203   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18204   ins_cost(150);
18205   ins_encode %{
18206     assert(UseFMA, "Needs FMA instructions support.");
18207     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18208   %}
18209   ins_pipe( pipe_slow );
18210 %}
18211 
18212 // a * b + c
18213 instruct fmaF_reg(regF a, regF b, regF c) %{
18214   match(Set c (FmaF  c (Binary a b)));
18215   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18216   ins_cost(150);
18217   ins_encode %{
18218     assert(UseFMA, "Needs FMA instructions support.");
18219     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18220   %}
18221   ins_pipe( pipe_slow );
18222 %}
18223 
18224 // ====================VECTOR INSTRUCTIONS=====================================
18225 
18226 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18227 instruct MoveVec2Leg(legVec dst, vec src) %{
18228   match(Set dst src);
18229   format %{ "" %}
18230   ins_encode %{
18231     ShouldNotReachHere();
18232   %}
18233   ins_pipe( fpu_reg_reg );
18234 %}
18235 
18236 instruct MoveLeg2Vec(vec dst, legVec src) %{
18237   match(Set dst src);
18238   format %{ "" %}
18239   ins_encode %{
18240     ShouldNotReachHere();
18241   %}
18242   ins_pipe( fpu_reg_reg );
18243 %}
18244 
18245 // ============================================================================
18246 
18247 // Load vectors generic operand pattern
18248 instruct loadV(vec dst, memory mem) %{
18249   match(Set dst (LoadVector mem));
18250   ins_cost(125);
18251   format %{ "load_vector $dst,$mem" %}
18252   ins_encode %{
18253     BasicType bt = Matcher::vector_element_basic_type(this);
18254     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18255   %}
18256   ins_pipe( pipe_slow );
18257 %}
18258 
18259 // Store vectors generic operand pattern.
18260 instruct storeV(memory mem, vec src) %{
18261   match(Set mem (StoreVector mem src));
18262   ins_cost(145);
18263   format %{ "store_vector $mem,$src\n\t" %}
18264   ins_encode %{
18265     switch (Matcher::vector_length_in_bytes(this, $src)) {
18266       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18267       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18268       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18269       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18270       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18271       default: ShouldNotReachHere();
18272     }
18273   %}
18274   ins_pipe( pipe_slow );
18275 %}
18276 
18277 // ---------------------------------------- Gather ------------------------------------
18278 
18279 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18280 
18281 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18282   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18283             Matcher::vector_length_in_bytes(n) <= 32);
18284   match(Set dst (LoadVectorGather mem idx));
18285   effect(TEMP dst, TEMP tmp, TEMP mask);
18286   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18287   ins_encode %{
18288     int vlen_enc = vector_length_encoding(this);
18289     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18290     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18291     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18292     __ lea($tmp$$Register, $mem$$Address);
18293     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18294   %}
18295   ins_pipe( pipe_slow );
18296 %}
18297 
18298 
18299 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18300   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18301             !is_subword_type(Matcher::vector_element_basic_type(n)));
18302   match(Set dst (LoadVectorGather mem idx));
18303   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18304   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18305   ins_encode %{
18306     int vlen_enc = vector_length_encoding(this);
18307     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18308     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18309     __ lea($tmp$$Register, $mem$$Address);
18310     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18311   %}
18312   ins_pipe( pipe_slow );
18313 %}
18314 
18315 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18316   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18317             !is_subword_type(Matcher::vector_element_basic_type(n)));
18318   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18319   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18320   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18321   ins_encode %{
18322     assert(UseAVX > 2, "sanity");
18323     int vlen_enc = vector_length_encoding(this);
18324     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18325     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18326     // Note: Since gather instruction partially updates the opmask register used
18327     // for predication hense moving mask operand to a temporary.
18328     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18329     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18330     __ lea($tmp$$Register, $mem$$Address);
18331     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18332   %}
18333   ins_pipe( pipe_slow );
18334 %}
18335 
18336 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18337   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18338   match(Set dst (LoadVectorGather mem idx_base));
18339   effect(TEMP tmp, TEMP rtmp);
18340   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18341   ins_encode %{
18342     int vlen_enc = vector_length_encoding(this);
18343     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18344     __ lea($tmp$$Register, $mem$$Address);
18345     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18346   %}
18347   ins_pipe( pipe_slow );
18348 %}
18349 
18350 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18351                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18352   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18353   match(Set dst (LoadVectorGather mem idx_base));
18354   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18355   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18356   ins_encode %{
18357     int vlen_enc = vector_length_encoding(this);
18358     int vector_len = Matcher::vector_length(this);
18359     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18360     __ lea($tmp$$Register, $mem$$Address);
18361     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18362     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18363                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18364   %}
18365   ins_pipe( pipe_slow );
18366 %}
18367 
18368 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18369   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18370   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18371   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18372   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18373   ins_encode %{
18374     int vlen_enc = vector_length_encoding(this);
18375     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18376     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18377     __ lea($tmp$$Register, $mem$$Address);
18378     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18379     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18380   %}
18381   ins_pipe( pipe_slow );
18382 %}
18383 
18384 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18385                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18386   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18387   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18388   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18389   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18390   ins_encode %{
18391     int vlen_enc = vector_length_encoding(this);
18392     int vector_len = Matcher::vector_length(this);
18393     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18394     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18395     __ lea($tmp$$Register, $mem$$Address);
18396     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18397     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18398     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18399                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18400   %}
18401   ins_pipe( pipe_slow );
18402 %}
18403 
18404 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18405   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18406   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18407   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18408   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18409   ins_encode %{
18410     int vlen_enc = vector_length_encoding(this);
18411     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18412     __ lea($tmp$$Register, $mem$$Address);
18413     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18414     if (elem_bt == T_SHORT) {
18415       __ movl($mask_idx$$Register, 0x55555555);
18416       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18417     }
18418     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18419     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18420   %}
18421   ins_pipe( pipe_slow );
18422 %}
18423 
18424 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18425                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18426   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18427   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18428   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18429   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18430   ins_encode %{
18431     int vlen_enc = vector_length_encoding(this);
18432     int vector_len = Matcher::vector_length(this);
18433     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18434     __ lea($tmp$$Register, $mem$$Address);
18435     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18436     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18437     if (elem_bt == T_SHORT) {
18438       __ movl($mask_idx$$Register, 0x55555555);
18439       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18440     }
18441     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18442     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18443                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18444   %}
18445   ins_pipe( pipe_slow );
18446 %}
18447 
18448 // ====================Scatter=======================================
18449 
18450 // Scatter INT, LONG, FLOAT, DOUBLE
18451 
18452 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18453   predicate(UseAVX > 2);
18454   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18455   effect(TEMP tmp, TEMP ktmp);
18456   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18457   ins_encode %{
18458     int vlen_enc = vector_length_encoding(this, $src);
18459     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18460 
18461     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18462     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18463 
18464     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18465     __ lea($tmp$$Register, $mem$$Address);
18466     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18467   %}
18468   ins_pipe( pipe_slow );
18469 %}
18470 
18471 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18472   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18473   effect(TEMP tmp, TEMP ktmp);
18474   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18475   ins_encode %{
18476     int vlen_enc = vector_length_encoding(this, $src);
18477     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18478     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18479     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18480     // Note: Since scatter instruction partially updates the opmask register used
18481     // for predication hense moving mask operand to a temporary.
18482     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18483     __ lea($tmp$$Register, $mem$$Address);
18484     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18485   %}
18486   ins_pipe( pipe_slow );
18487 %}
18488 
18489 // ====================REPLICATE=======================================
18490 
18491 // Replicate byte scalar to be vector
18492 instruct vReplB_reg(vec dst, rRegI src) %{
18493   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18494   match(Set dst (Replicate src));
18495   format %{ "replicateB $dst,$src" %}
18496   ins_encode %{
18497     uint vlen = Matcher::vector_length(this);
18498     if (UseAVX >= 2) {
18499       int vlen_enc = vector_length_encoding(this);
18500       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18501         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18502         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18503       } else {
18504         __ movdl($dst$$XMMRegister, $src$$Register);
18505         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18506       }
18507     } else {
18508        assert(UseAVX < 2, "");
18509       __ movdl($dst$$XMMRegister, $src$$Register);
18510       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18511       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18512       if (vlen >= 16) {
18513         assert(vlen == 16, "");
18514         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18515       }
18516     }
18517   %}
18518   ins_pipe( pipe_slow );
18519 %}
18520 
18521 instruct ReplB_mem(vec dst, memory mem) %{
18522   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18523   match(Set dst (Replicate (LoadB mem)));
18524   format %{ "replicateB $dst,$mem" %}
18525   ins_encode %{
18526     int vlen_enc = vector_length_encoding(this);
18527     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18528   %}
18529   ins_pipe( pipe_slow );
18530 %}
18531 
18532 // ====================ReplicateS=======================================
18533 
18534 instruct vReplS_reg(vec dst, rRegI src) %{
18535   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18536   match(Set dst (Replicate src));
18537   format %{ "replicateS $dst,$src" %}
18538   ins_encode %{
18539     uint vlen = Matcher::vector_length(this);
18540     int vlen_enc = vector_length_encoding(this);
18541     if (UseAVX >= 2) {
18542       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18543         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18544         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18545       } else {
18546         __ movdl($dst$$XMMRegister, $src$$Register);
18547         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18548       }
18549     } else {
18550       assert(UseAVX < 2, "");
18551       __ movdl($dst$$XMMRegister, $src$$Register);
18552       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18553       if (vlen >= 8) {
18554         assert(vlen == 8, "");
18555         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18556       }
18557     }
18558   %}
18559   ins_pipe( pipe_slow );
18560 %}
18561 
18562 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18563   match(Set dst (Replicate con));
18564   effect(TEMP rtmp);
18565   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18566   ins_encode %{
18567     int vlen_enc = vector_length_encoding(this);
18568     BasicType bt = Matcher::vector_element_basic_type(this);
18569     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18570     __ movl($rtmp$$Register, $con$$constant);
18571     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18572   %}
18573   ins_pipe( pipe_slow );
18574 %}
18575 
18576 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18577   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18578   match(Set dst (Replicate src));
18579   effect(TEMP rtmp);
18580   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18581   ins_encode %{
18582     int vlen_enc = vector_length_encoding(this);
18583     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18584     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18585   %}
18586   ins_pipe( pipe_slow );
18587 %}
18588 
18589 instruct ReplS_mem(vec dst, memory mem) %{
18590   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18591   match(Set dst (Replicate (LoadS mem)));
18592   format %{ "replicateS $dst,$mem" %}
18593   ins_encode %{
18594     int vlen_enc = vector_length_encoding(this);
18595     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18596   %}
18597   ins_pipe( pipe_slow );
18598 %}
18599 
18600 // ====================ReplicateI=======================================
18601 
18602 instruct ReplI_reg(vec dst, rRegI src) %{
18603   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18604   match(Set dst (Replicate src));
18605   format %{ "replicateI $dst,$src" %}
18606   ins_encode %{
18607     uint vlen = Matcher::vector_length(this);
18608     int vlen_enc = vector_length_encoding(this);
18609     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18610       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18611     } else if (VM_Version::supports_avx2()) {
18612       __ movdl($dst$$XMMRegister, $src$$Register);
18613       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18614     } else {
18615       __ movdl($dst$$XMMRegister, $src$$Register);
18616       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18617     }
18618   %}
18619   ins_pipe( pipe_slow );
18620 %}
18621 
18622 instruct ReplI_mem(vec dst, memory mem) %{
18623   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18624   match(Set dst (Replicate (LoadI mem)));
18625   format %{ "replicateI $dst,$mem" %}
18626   ins_encode %{
18627     int vlen_enc = vector_length_encoding(this);
18628     if (VM_Version::supports_avx2()) {
18629       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18630     } else if (VM_Version::supports_avx()) {
18631       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18632     } else {
18633       __ movdl($dst$$XMMRegister, $mem$$Address);
18634       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18635     }
18636   %}
18637   ins_pipe( pipe_slow );
18638 %}
18639 
18640 instruct ReplI_imm(vec dst, immI con) %{
18641   predicate(Matcher::is_non_long_integral_vector(n));
18642   match(Set dst (Replicate con));
18643   format %{ "replicateI $dst,$con" %}
18644   ins_encode %{
18645     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18646                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18647                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18648     BasicType bt = Matcher::vector_element_basic_type(this);
18649     int vlen = Matcher::vector_length_in_bytes(this);
18650     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18651   %}
18652   ins_pipe( pipe_slow );
18653 %}
18654 
18655 // Replicate scalar zero to be vector
18656 instruct ReplI_zero(vec dst, immI_0 zero) %{
18657   predicate(Matcher::is_non_long_integral_vector(n));
18658   match(Set dst (Replicate zero));
18659   format %{ "replicateI $dst,$zero" %}
18660   ins_encode %{
18661     int vlen_enc = vector_length_encoding(this);
18662     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18663       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18664     } else {
18665       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18666     }
18667   %}
18668   ins_pipe( fpu_reg_reg );
18669 %}
18670 
18671 instruct ReplI_M1(vec dst, immI_M1 con) %{
18672   predicate(Matcher::is_non_long_integral_vector(n));
18673   match(Set dst (Replicate con));
18674   format %{ "vallones $dst" %}
18675   ins_encode %{
18676     int vector_len = vector_length_encoding(this);
18677     __ vallones($dst$$XMMRegister, vector_len);
18678   %}
18679   ins_pipe( pipe_slow );
18680 %}
18681 
18682 // ====================ReplicateL=======================================
18683 
18684 // Replicate long (8 byte) scalar to be vector
18685 instruct ReplL_reg(vec dst, rRegL src) %{
18686   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18687   match(Set dst (Replicate src));
18688   format %{ "replicateL $dst,$src" %}
18689   ins_encode %{
18690     int vlen = Matcher::vector_length(this);
18691     int vlen_enc = vector_length_encoding(this);
18692     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18693       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18694     } else if (VM_Version::supports_avx2()) {
18695       __ movdq($dst$$XMMRegister, $src$$Register);
18696       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18697     } else {
18698       __ movdq($dst$$XMMRegister, $src$$Register);
18699       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18700     }
18701   %}
18702   ins_pipe( pipe_slow );
18703 %}
18704 
18705 instruct ReplL_mem(vec dst, memory mem) %{
18706   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18707   match(Set dst (Replicate (LoadL mem)));
18708   format %{ "replicateL $dst,$mem" %}
18709   ins_encode %{
18710     int vlen_enc = vector_length_encoding(this);
18711     if (VM_Version::supports_avx2()) {
18712       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18713     } else if (VM_Version::supports_sse3()) {
18714       __ movddup($dst$$XMMRegister, $mem$$Address);
18715     } else {
18716       __ movq($dst$$XMMRegister, $mem$$Address);
18717       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18718     }
18719   %}
18720   ins_pipe( pipe_slow );
18721 %}
18722 
18723 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18724 instruct ReplL_imm(vec dst, immL con) %{
18725   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18726   match(Set dst (Replicate con));
18727   format %{ "replicateL $dst,$con" %}
18728   ins_encode %{
18729     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18730     int vlen = Matcher::vector_length_in_bytes(this);
18731     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18732   %}
18733   ins_pipe( pipe_slow );
18734 %}
18735 
18736 instruct ReplL_zero(vec dst, immL0 zero) %{
18737   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18738   match(Set dst (Replicate zero));
18739   format %{ "replicateL $dst,$zero" %}
18740   ins_encode %{
18741     int vlen_enc = vector_length_encoding(this);
18742     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18743       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18744     } else {
18745       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18746     }
18747   %}
18748   ins_pipe( fpu_reg_reg );
18749 %}
18750 
18751 instruct ReplL_M1(vec dst, immL_M1 con) %{
18752   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18753   match(Set dst (Replicate con));
18754   format %{ "vallones $dst" %}
18755   ins_encode %{
18756     int vector_len = vector_length_encoding(this);
18757     __ vallones($dst$$XMMRegister, vector_len);
18758   %}
18759   ins_pipe( pipe_slow );
18760 %}
18761 
18762 // ====================ReplicateF=======================================
18763 
18764 instruct vReplF_reg(vec dst, vlRegF src) %{
18765   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18766   match(Set dst (Replicate src));
18767   format %{ "replicateF $dst,$src" %}
18768   ins_encode %{
18769     uint vlen = Matcher::vector_length(this);
18770     int vlen_enc = vector_length_encoding(this);
18771     if (vlen <= 4) {
18772       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18773     } else if (VM_Version::supports_avx2()) {
18774       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18775     } else {
18776       assert(vlen == 8, "sanity");
18777       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18778       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18779     }
18780   %}
18781   ins_pipe( pipe_slow );
18782 %}
18783 
18784 instruct ReplF_reg(vec dst, vlRegF src) %{
18785   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18786   match(Set dst (Replicate src));
18787   format %{ "replicateF $dst,$src" %}
18788   ins_encode %{
18789     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18790   %}
18791   ins_pipe( pipe_slow );
18792 %}
18793 
18794 instruct ReplF_mem(vec dst, memory mem) %{
18795   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18796   match(Set dst (Replicate (LoadF mem)));
18797   format %{ "replicateF $dst,$mem" %}
18798   ins_encode %{
18799     int vlen_enc = vector_length_encoding(this);
18800     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18801   %}
18802   ins_pipe( pipe_slow );
18803 %}
18804 
18805 // Replicate float scalar immediate to be vector by loading from const table.
18806 instruct ReplF_imm(vec dst, immF con) %{
18807   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18808   match(Set dst (Replicate con));
18809   format %{ "replicateF $dst,$con" %}
18810   ins_encode %{
18811     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18812                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18813     int vlen = Matcher::vector_length_in_bytes(this);
18814     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18815   %}
18816   ins_pipe( pipe_slow );
18817 %}
18818 
18819 instruct ReplF_zero(vec dst, immF0 zero) %{
18820   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18821   match(Set dst (Replicate zero));
18822   format %{ "replicateF $dst,$zero" %}
18823   ins_encode %{
18824     int vlen_enc = vector_length_encoding(this);
18825     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18826       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18827     } else {
18828       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18829     }
18830   %}
18831   ins_pipe( fpu_reg_reg );
18832 %}
18833 
18834 // ====================ReplicateD=======================================
18835 
18836 // Replicate double (8 bytes) scalar to be vector
18837 instruct vReplD_reg(vec dst, vlRegD src) %{
18838   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18839   match(Set dst (Replicate src));
18840   format %{ "replicateD $dst,$src" %}
18841   ins_encode %{
18842     uint vlen = Matcher::vector_length(this);
18843     int vlen_enc = vector_length_encoding(this);
18844     if (vlen <= 2) {
18845       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18846     } else if (VM_Version::supports_avx2()) {
18847       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18848     } else {
18849       assert(vlen == 4, "sanity");
18850       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18851       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18852     }
18853   %}
18854   ins_pipe( pipe_slow );
18855 %}
18856 
18857 instruct ReplD_reg(vec dst, vlRegD src) %{
18858   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18859   match(Set dst (Replicate src));
18860   format %{ "replicateD $dst,$src" %}
18861   ins_encode %{
18862     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18863   %}
18864   ins_pipe( pipe_slow );
18865 %}
18866 
18867 instruct ReplD_mem(vec dst, memory mem) %{
18868   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18869   match(Set dst (Replicate (LoadD mem)));
18870   format %{ "replicateD $dst,$mem" %}
18871   ins_encode %{
18872     if (Matcher::vector_length(this) >= 4) {
18873       int vlen_enc = vector_length_encoding(this);
18874       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18875     } else {
18876       __ movddup($dst$$XMMRegister, $mem$$Address);
18877     }
18878   %}
18879   ins_pipe( pipe_slow );
18880 %}
18881 
18882 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18883 instruct ReplD_imm(vec dst, immD con) %{
18884   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18885   match(Set dst (Replicate con));
18886   format %{ "replicateD $dst,$con" %}
18887   ins_encode %{
18888     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18889     int vlen = Matcher::vector_length_in_bytes(this);
18890     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18891   %}
18892   ins_pipe( pipe_slow );
18893 %}
18894 
18895 instruct ReplD_zero(vec dst, immD0 zero) %{
18896   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18897   match(Set dst (Replicate zero));
18898   format %{ "replicateD $dst,$zero" %}
18899   ins_encode %{
18900     int vlen_enc = vector_length_encoding(this);
18901     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18902       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18903     } else {
18904       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18905     }
18906   %}
18907   ins_pipe( fpu_reg_reg );
18908 %}
18909 
18910 // ====================VECTOR INSERT=======================================
18911 
18912 instruct insert(vec dst, rRegI val, immU8 idx) %{
18913   predicate(Matcher::vector_length_in_bytes(n) < 32);
18914   match(Set dst (VectorInsert (Binary dst val) idx));
18915   format %{ "vector_insert $dst,$val,$idx" %}
18916   ins_encode %{
18917     assert(UseSSE >= 4, "required");
18918     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18919 
18920     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18921 
18922     assert(is_integral_type(elem_bt), "");
18923     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18924 
18925     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18926   %}
18927   ins_pipe( pipe_slow );
18928 %}
18929 
18930 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18931   predicate(Matcher::vector_length_in_bytes(n) == 32);
18932   match(Set dst (VectorInsert (Binary src val) idx));
18933   effect(TEMP vtmp);
18934   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18935   ins_encode %{
18936     int vlen_enc = Assembler::AVX_256bit;
18937     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18938     int elem_per_lane = 16/type2aelembytes(elem_bt);
18939     int log2epr = log2(elem_per_lane);
18940 
18941     assert(is_integral_type(elem_bt), "sanity");
18942     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18943 
18944     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18945     uint y_idx = ($idx$$constant >> log2epr) & 1;
18946     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18947     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18948     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18949   %}
18950   ins_pipe( pipe_slow );
18951 %}
18952 
18953 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18954   predicate(Matcher::vector_length_in_bytes(n) == 64);
18955   match(Set dst (VectorInsert (Binary src val) idx));
18956   effect(TEMP vtmp);
18957   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18958   ins_encode %{
18959     assert(UseAVX > 2, "sanity");
18960 
18961     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18962     int elem_per_lane = 16/type2aelembytes(elem_bt);
18963     int log2epr = log2(elem_per_lane);
18964 
18965     assert(is_integral_type(elem_bt), "");
18966     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18967 
18968     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18969     uint y_idx = ($idx$$constant >> log2epr) & 3;
18970     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18971     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18972     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18973   %}
18974   ins_pipe( pipe_slow );
18975 %}
18976 
18977 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18978   predicate(Matcher::vector_length(n) == 2);
18979   match(Set dst (VectorInsert (Binary dst val) idx));
18980   format %{ "vector_insert $dst,$val,$idx" %}
18981   ins_encode %{
18982     assert(UseSSE >= 4, "required");
18983     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18984     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18985 
18986     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18987   %}
18988   ins_pipe( pipe_slow );
18989 %}
18990 
18991 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18992   predicate(Matcher::vector_length(n) == 4);
18993   match(Set dst (VectorInsert (Binary src val) idx));
18994   effect(TEMP vtmp);
18995   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18996   ins_encode %{
18997     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18998     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18999 
19000     uint x_idx = $idx$$constant & right_n_bits(1);
19001     uint y_idx = ($idx$$constant >> 1) & 1;
19002     int vlen_enc = Assembler::AVX_256bit;
19003     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19004     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19005     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19006   %}
19007   ins_pipe( pipe_slow );
19008 %}
19009 
19010 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19011   predicate(Matcher::vector_length(n) == 8);
19012   match(Set dst (VectorInsert (Binary src val) idx));
19013   effect(TEMP vtmp);
19014   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19015   ins_encode %{
19016     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19017     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19018 
19019     uint x_idx = $idx$$constant & right_n_bits(1);
19020     uint y_idx = ($idx$$constant >> 1) & 3;
19021     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19022     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19023     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19024   %}
19025   ins_pipe( pipe_slow );
19026 %}
19027 
19028 instruct insertF(vec dst, regF val, immU8 idx) %{
19029   predicate(Matcher::vector_length(n) < 8);
19030   match(Set dst (VectorInsert (Binary dst val) idx));
19031   format %{ "vector_insert $dst,$val,$idx" %}
19032   ins_encode %{
19033     assert(UseSSE >= 4, "sanity");
19034 
19035     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19036     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19037 
19038     uint x_idx = $idx$$constant & right_n_bits(2);
19039     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19040   %}
19041   ins_pipe( pipe_slow );
19042 %}
19043 
19044 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19045   predicate(Matcher::vector_length(n) >= 8);
19046   match(Set dst (VectorInsert (Binary src val) idx));
19047   effect(TEMP vtmp);
19048   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19049   ins_encode %{
19050     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19051     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19052 
19053     int vlen = Matcher::vector_length(this);
19054     uint x_idx = $idx$$constant & right_n_bits(2);
19055     if (vlen == 8) {
19056       uint y_idx = ($idx$$constant >> 2) & 1;
19057       int vlen_enc = Assembler::AVX_256bit;
19058       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19059       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19060       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19061     } else {
19062       assert(vlen == 16, "sanity");
19063       uint y_idx = ($idx$$constant >> 2) & 3;
19064       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19065       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19066       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19067     }
19068   %}
19069   ins_pipe( pipe_slow );
19070 %}
19071 
19072 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19073   predicate(Matcher::vector_length(n) == 2);
19074   match(Set dst (VectorInsert (Binary dst val) idx));
19075   effect(TEMP tmp);
19076   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19077   ins_encode %{
19078     assert(UseSSE >= 4, "sanity");
19079     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19080     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19081 
19082     __ movq($tmp$$Register, $val$$XMMRegister);
19083     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19084   %}
19085   ins_pipe( pipe_slow );
19086 %}
19087 
19088 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19089   predicate(Matcher::vector_length(n) == 4);
19090   match(Set dst (VectorInsert (Binary src val) idx));
19091   effect(TEMP vtmp, TEMP tmp);
19092   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19093   ins_encode %{
19094     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19095     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19096 
19097     uint x_idx = $idx$$constant & right_n_bits(1);
19098     uint y_idx = ($idx$$constant >> 1) & 1;
19099     int vlen_enc = Assembler::AVX_256bit;
19100     __ movq($tmp$$Register, $val$$XMMRegister);
19101     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19102     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19103     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19104   %}
19105   ins_pipe( pipe_slow );
19106 %}
19107 
19108 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19109   predicate(Matcher::vector_length(n) == 8);
19110   match(Set dst (VectorInsert (Binary src val) idx));
19111   effect(TEMP tmp, TEMP vtmp);
19112   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19113   ins_encode %{
19114     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19115     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19116 
19117     uint x_idx = $idx$$constant & right_n_bits(1);
19118     uint y_idx = ($idx$$constant >> 1) & 3;
19119     __ movq($tmp$$Register, $val$$XMMRegister);
19120     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19121     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19122     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19123   %}
19124   ins_pipe( pipe_slow );
19125 %}
19126 
19127 // ====================REDUCTION ARITHMETIC=======================================
19128 
19129 // =======================Int Reduction==========================================
19130 
19131 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19132   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19133   match(Set dst (AddReductionVI src1 src2));
19134   match(Set dst (MulReductionVI src1 src2));
19135   match(Set dst (AndReductionV  src1 src2));
19136   match(Set dst ( OrReductionV  src1 src2));
19137   match(Set dst (XorReductionV  src1 src2));
19138   match(Set dst (MinReductionV  src1 src2));
19139   match(Set dst (MaxReductionV  src1 src2));
19140   effect(TEMP vtmp1, TEMP vtmp2);
19141   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19142   ins_encode %{
19143     int opcode = this->ideal_Opcode();
19144     int vlen = Matcher::vector_length(this, $src2);
19145     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19146   %}
19147   ins_pipe( pipe_slow );
19148 %}
19149 
19150 // =======================Long Reduction==========================================
19151 
19152 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19153   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19154   match(Set dst (AddReductionVL src1 src2));
19155   match(Set dst (MulReductionVL src1 src2));
19156   match(Set dst (AndReductionV  src1 src2));
19157   match(Set dst ( OrReductionV  src1 src2));
19158   match(Set dst (XorReductionV  src1 src2));
19159   match(Set dst (MinReductionV  src1 src2));
19160   match(Set dst (MaxReductionV  src1 src2));
19161   effect(TEMP vtmp1, TEMP vtmp2);
19162   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19163   ins_encode %{
19164     int opcode = this->ideal_Opcode();
19165     int vlen = Matcher::vector_length(this, $src2);
19166     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19167   %}
19168   ins_pipe( pipe_slow );
19169 %}
19170 
19171 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19172   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19173   match(Set dst (AddReductionVL src1 src2));
19174   match(Set dst (MulReductionVL src1 src2));
19175   match(Set dst (AndReductionV  src1 src2));
19176   match(Set dst ( OrReductionV  src1 src2));
19177   match(Set dst (XorReductionV  src1 src2));
19178   match(Set dst (MinReductionV  src1 src2));
19179   match(Set dst (MaxReductionV  src1 src2));
19180   effect(TEMP vtmp1, TEMP vtmp2);
19181   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19182   ins_encode %{
19183     int opcode = this->ideal_Opcode();
19184     int vlen = Matcher::vector_length(this, $src2);
19185     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19186   %}
19187   ins_pipe( pipe_slow );
19188 %}
19189 
19190 // =======================Float Reduction==========================================
19191 
19192 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19193   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19194   match(Set dst (AddReductionVF dst src));
19195   match(Set dst (MulReductionVF dst src));
19196   effect(TEMP dst, TEMP vtmp);
19197   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19198   ins_encode %{
19199     int opcode = this->ideal_Opcode();
19200     int vlen = Matcher::vector_length(this, $src);
19201     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19202   %}
19203   ins_pipe( pipe_slow );
19204 %}
19205 
19206 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19207   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19208   match(Set dst (AddReductionVF dst src));
19209   match(Set dst (MulReductionVF dst src));
19210   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19211   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19212   ins_encode %{
19213     int opcode = this->ideal_Opcode();
19214     int vlen = Matcher::vector_length(this, $src);
19215     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19216   %}
19217   ins_pipe( pipe_slow );
19218 %}
19219 
19220 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19221   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19222   match(Set dst (AddReductionVF dst src));
19223   match(Set dst (MulReductionVF dst src));
19224   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19225   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19226   ins_encode %{
19227     int opcode = this->ideal_Opcode();
19228     int vlen = Matcher::vector_length(this, $src);
19229     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19230   %}
19231   ins_pipe( pipe_slow );
19232 %}
19233 
19234 
19235 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19236   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19237   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19238   // src1 contains reduction identity
19239   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19240   match(Set dst (AddReductionVF src1 src2));
19241   match(Set dst (MulReductionVF src1 src2));
19242   effect(TEMP dst);
19243   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19244   ins_encode %{
19245     int opcode = this->ideal_Opcode();
19246     int vlen = Matcher::vector_length(this, $src2);
19247     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19248   %}
19249   ins_pipe( pipe_slow );
19250 %}
19251 
19252 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19253   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19254   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19255   // src1 contains reduction identity
19256   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19257   match(Set dst (AddReductionVF src1 src2));
19258   match(Set dst (MulReductionVF src1 src2));
19259   effect(TEMP dst, TEMP vtmp);
19260   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19261   ins_encode %{
19262     int opcode = this->ideal_Opcode();
19263     int vlen = Matcher::vector_length(this, $src2);
19264     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19265   %}
19266   ins_pipe( pipe_slow );
19267 %}
19268 
19269 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19270   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19271   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19272   // src1 contains reduction identity
19273   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19274   match(Set dst (AddReductionVF src1 src2));
19275   match(Set dst (MulReductionVF src1 src2));
19276   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19277   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19278   ins_encode %{
19279     int opcode = this->ideal_Opcode();
19280     int vlen = Matcher::vector_length(this, $src2);
19281     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19282   %}
19283   ins_pipe( pipe_slow );
19284 %}
19285 
19286 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19287   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19288   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19289   // src1 contains reduction identity
19290   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19291   match(Set dst (AddReductionVF src1 src2));
19292   match(Set dst (MulReductionVF src1 src2));
19293   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19294   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19295   ins_encode %{
19296     int opcode = this->ideal_Opcode();
19297     int vlen = Matcher::vector_length(this, $src2);
19298     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19299   %}
19300   ins_pipe( pipe_slow );
19301 %}
19302 
19303 // =======================Double Reduction==========================================
19304 
19305 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19306   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19307   match(Set dst (AddReductionVD dst src));
19308   match(Set dst (MulReductionVD dst src));
19309   effect(TEMP dst, TEMP vtmp);
19310   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19311   ins_encode %{
19312     int opcode = this->ideal_Opcode();
19313     int vlen = Matcher::vector_length(this, $src);
19314     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19315 %}
19316   ins_pipe( pipe_slow );
19317 %}
19318 
19319 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19320   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19321   match(Set dst (AddReductionVD dst src));
19322   match(Set dst (MulReductionVD dst src));
19323   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19324   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19325   ins_encode %{
19326     int opcode = this->ideal_Opcode();
19327     int vlen = Matcher::vector_length(this, $src);
19328     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19329   %}
19330   ins_pipe( pipe_slow );
19331 %}
19332 
19333 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19334   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19335   match(Set dst (AddReductionVD dst src));
19336   match(Set dst (MulReductionVD dst src));
19337   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19338   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19339   ins_encode %{
19340     int opcode = this->ideal_Opcode();
19341     int vlen = Matcher::vector_length(this, $src);
19342     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19343   %}
19344   ins_pipe( pipe_slow );
19345 %}
19346 
19347 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19348   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19349   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19350   // src1 contains reduction identity
19351   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19352   match(Set dst (AddReductionVD src1 src2));
19353   match(Set dst (MulReductionVD src1 src2));
19354   effect(TEMP dst);
19355   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19356   ins_encode %{
19357     int opcode = this->ideal_Opcode();
19358     int vlen = Matcher::vector_length(this, $src2);
19359     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19360 %}
19361   ins_pipe( pipe_slow );
19362 %}
19363 
19364 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19365   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19366   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19367   // src1 contains reduction identity
19368   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19369   match(Set dst (AddReductionVD src1 src2));
19370   match(Set dst (MulReductionVD src1 src2));
19371   effect(TEMP dst, TEMP vtmp);
19372   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19373   ins_encode %{
19374     int opcode = this->ideal_Opcode();
19375     int vlen = Matcher::vector_length(this, $src2);
19376     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19377   %}
19378   ins_pipe( pipe_slow );
19379 %}
19380 
19381 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19382   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19383   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19384   // src1 contains reduction identity
19385   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19386   match(Set dst (AddReductionVD src1 src2));
19387   match(Set dst (MulReductionVD src1 src2));
19388   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19389   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19390   ins_encode %{
19391     int opcode = this->ideal_Opcode();
19392     int vlen = Matcher::vector_length(this, $src2);
19393     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19394   %}
19395   ins_pipe( pipe_slow );
19396 %}
19397 
19398 // =======================Byte Reduction==========================================
19399 
19400 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19401   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19402   match(Set dst (AddReductionVI src1 src2));
19403   match(Set dst (AndReductionV  src1 src2));
19404   match(Set dst ( OrReductionV  src1 src2));
19405   match(Set dst (XorReductionV  src1 src2));
19406   match(Set dst (MinReductionV  src1 src2));
19407   match(Set dst (MaxReductionV  src1 src2));
19408   effect(TEMP vtmp1, TEMP vtmp2);
19409   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19410   ins_encode %{
19411     int opcode = this->ideal_Opcode();
19412     int vlen = Matcher::vector_length(this, $src2);
19413     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19414   %}
19415   ins_pipe( pipe_slow );
19416 %}
19417 
19418 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19419   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19420   match(Set dst (AddReductionVI src1 src2));
19421   match(Set dst (AndReductionV  src1 src2));
19422   match(Set dst ( OrReductionV  src1 src2));
19423   match(Set dst (XorReductionV  src1 src2));
19424   match(Set dst (MinReductionV  src1 src2));
19425   match(Set dst (MaxReductionV  src1 src2));
19426   effect(TEMP vtmp1, TEMP vtmp2);
19427   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19428   ins_encode %{
19429     int opcode = this->ideal_Opcode();
19430     int vlen = Matcher::vector_length(this, $src2);
19431     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19432   %}
19433   ins_pipe( pipe_slow );
19434 %}
19435 
19436 // =======================Short Reduction==========================================
19437 
19438 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19439   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19440   match(Set dst (AddReductionVI src1 src2));
19441   match(Set dst (MulReductionVI src1 src2));
19442   match(Set dst (AndReductionV  src1 src2));
19443   match(Set dst ( OrReductionV  src1 src2));
19444   match(Set dst (XorReductionV  src1 src2));
19445   match(Set dst (MinReductionV  src1 src2));
19446   match(Set dst (MaxReductionV  src1 src2));
19447   effect(TEMP vtmp1, TEMP vtmp2);
19448   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449   ins_encode %{
19450     int opcode = this->ideal_Opcode();
19451     int vlen = Matcher::vector_length(this, $src2);
19452     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453   %}
19454   ins_pipe( pipe_slow );
19455 %}
19456 
19457 // =======================Mul Reduction==========================================
19458 
19459 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19460   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19461             Matcher::vector_length(n->in(2)) <= 32); // src2
19462   match(Set dst (MulReductionVI src1 src2));
19463   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19464   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19465   ins_encode %{
19466     int opcode = this->ideal_Opcode();
19467     int vlen = Matcher::vector_length(this, $src2);
19468     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19469   %}
19470   ins_pipe( pipe_slow );
19471 %}
19472 
19473 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19474   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19475             Matcher::vector_length(n->in(2)) == 64); // src2
19476   match(Set dst (MulReductionVI src1 src2));
19477   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19478   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19479   ins_encode %{
19480     int opcode = this->ideal_Opcode();
19481     int vlen = Matcher::vector_length(this, $src2);
19482     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19483   %}
19484   ins_pipe( pipe_slow );
19485 %}
19486 
19487 //--------------------Min/Max Float Reduction --------------------
19488 // Float Min Reduction
19489 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19490                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19491   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19492             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19493              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19494             Matcher::vector_length(n->in(2)) == 2);
19495   match(Set dst (MinReductionV src1 src2));
19496   match(Set dst (MaxReductionV src1 src2));
19497   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19498   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19499   ins_encode %{
19500     assert(UseAVX > 0, "sanity");
19501 
19502     int opcode = this->ideal_Opcode();
19503     int vlen = Matcher::vector_length(this, $src2);
19504     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19505                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19506   %}
19507   ins_pipe( pipe_slow );
19508 %}
19509 
19510 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19511                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19512   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19513             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19514              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19515             Matcher::vector_length(n->in(2)) >= 4);
19516   match(Set dst (MinReductionV src1 src2));
19517   match(Set dst (MaxReductionV src1 src2));
19518   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19519   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19520   ins_encode %{
19521     assert(UseAVX > 0, "sanity");
19522 
19523     int opcode = this->ideal_Opcode();
19524     int vlen = Matcher::vector_length(this, $src2);
19525     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19526                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19527   %}
19528   ins_pipe( pipe_slow );
19529 %}
19530 
19531 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19532                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19533   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19534             Matcher::vector_length(n->in(2)) == 2);
19535   match(Set dst (MinReductionV dst src));
19536   match(Set dst (MaxReductionV dst src));
19537   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19538   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19539   ins_encode %{
19540     assert(UseAVX > 0, "sanity");
19541 
19542     int opcode = this->ideal_Opcode();
19543     int vlen = Matcher::vector_length(this, $src);
19544     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19545                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19546   %}
19547   ins_pipe( pipe_slow );
19548 %}
19549 
19550 
19551 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19552                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19553   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19554             Matcher::vector_length(n->in(2)) >= 4);
19555   match(Set dst (MinReductionV dst src));
19556   match(Set dst (MaxReductionV dst src));
19557   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19558   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19559   ins_encode %{
19560     assert(UseAVX > 0, "sanity");
19561 
19562     int opcode = this->ideal_Opcode();
19563     int vlen = Matcher::vector_length(this, $src);
19564     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19565                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19566   %}
19567   ins_pipe( pipe_slow );
19568 %}
19569 
19570 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19571   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19572             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19573              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19574             Matcher::vector_length(n->in(2)) == 2);
19575   match(Set dst (MinReductionV src1 src2));
19576   match(Set dst (MaxReductionV src1 src2));
19577   effect(TEMP dst, TEMP xtmp1);
19578   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19579   ins_encode %{
19580     int opcode = this->ideal_Opcode();
19581     int vlen = Matcher::vector_length(this, $src2);
19582     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19583                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19584   %}
19585   ins_pipe( pipe_slow );
19586 %}
19587 
19588 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19589   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19590             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19591              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19592             Matcher::vector_length(n->in(2)) >= 4);
19593   match(Set dst (MinReductionV src1 src2));
19594   match(Set dst (MaxReductionV src1 src2));
19595   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19596   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19597   ins_encode %{
19598     int opcode = this->ideal_Opcode();
19599     int vlen = Matcher::vector_length(this, $src2);
19600     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19601                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19602   %}
19603   ins_pipe( pipe_slow );
19604 %}
19605 
19606 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19607   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19608             Matcher::vector_length(n->in(2)) == 2);
19609   match(Set dst (MinReductionV dst src));
19610   match(Set dst (MaxReductionV dst src));
19611   effect(TEMP dst, TEMP xtmp1);
19612   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19613   ins_encode %{
19614     int opcode = this->ideal_Opcode();
19615     int vlen = Matcher::vector_length(this, $src);
19616     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19617                          $xtmp1$$XMMRegister);
19618   %}
19619   ins_pipe( pipe_slow );
19620 %}
19621 
19622 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19623   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19624             Matcher::vector_length(n->in(2)) >= 4);
19625   match(Set dst (MinReductionV dst src));
19626   match(Set dst (MaxReductionV dst src));
19627   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19628   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19629   ins_encode %{
19630     int opcode = this->ideal_Opcode();
19631     int vlen = Matcher::vector_length(this, $src);
19632     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19633                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19634   %}
19635   ins_pipe( pipe_slow );
19636 %}
19637 
19638 //--------------------Min Double Reduction --------------------
19639 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19640                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19641   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19642             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19643              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19644             Matcher::vector_length(n->in(2)) == 2);
19645   match(Set dst (MinReductionV src1 src2));
19646   match(Set dst (MaxReductionV src1 src2));
19647   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19648   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19649   ins_encode %{
19650     assert(UseAVX > 0, "sanity");
19651 
19652     int opcode = this->ideal_Opcode();
19653     int vlen = Matcher::vector_length(this, $src2);
19654     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19655                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19656   %}
19657   ins_pipe( pipe_slow );
19658 %}
19659 
19660 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19661                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19662   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19663             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19664              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19665             Matcher::vector_length(n->in(2)) >= 4);
19666   match(Set dst (MinReductionV src1 src2));
19667   match(Set dst (MaxReductionV src1 src2));
19668   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19669   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19670   ins_encode %{
19671     assert(UseAVX > 0, "sanity");
19672 
19673     int opcode = this->ideal_Opcode();
19674     int vlen = Matcher::vector_length(this, $src2);
19675     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19677   %}
19678   ins_pipe( pipe_slow );
19679 %}
19680 
19681 
19682 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19683                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19684   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19685             Matcher::vector_length(n->in(2)) == 2);
19686   match(Set dst (MinReductionV dst src));
19687   match(Set dst (MaxReductionV dst src));
19688   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19689   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19690   ins_encode %{
19691     assert(UseAVX > 0, "sanity");
19692 
19693     int opcode = this->ideal_Opcode();
19694     int vlen = Matcher::vector_length(this, $src);
19695     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19696                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19697   %}
19698   ins_pipe( pipe_slow );
19699 %}
19700 
19701 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19702                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19703   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19704             Matcher::vector_length(n->in(2)) >= 4);
19705   match(Set dst (MinReductionV dst src));
19706   match(Set dst (MaxReductionV dst src));
19707   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19708   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19709   ins_encode %{
19710     assert(UseAVX > 0, "sanity");
19711 
19712     int opcode = this->ideal_Opcode();
19713     int vlen = Matcher::vector_length(this, $src);
19714     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19715                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19716   %}
19717   ins_pipe( pipe_slow );
19718 %}
19719 
19720 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19721   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19722             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19723              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19724             Matcher::vector_length(n->in(2)) == 2);
19725   match(Set dst (MinReductionV src1 src2));
19726   match(Set dst (MaxReductionV src1 src2));
19727   effect(TEMP dst, TEMP xtmp1);
19728   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19729   ins_encode %{
19730     int opcode = this->ideal_Opcode();
19731     int vlen = Matcher::vector_length(this, $src2);
19732     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19733                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19734   %}
19735   ins_pipe( pipe_slow );
19736 %}
19737 
19738 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19739   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19740             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19741              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19742             Matcher::vector_length(n->in(2)) >= 4);
19743   match(Set dst (MinReductionV src1 src2));
19744   match(Set dst (MaxReductionV src1 src2));
19745   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19746   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19747   ins_encode %{
19748     int opcode = this->ideal_Opcode();
19749     int vlen = Matcher::vector_length(this, $src2);
19750     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19751                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19752   %}
19753   ins_pipe( pipe_slow );
19754 %}
19755 
19756 
19757 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19758   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19759             Matcher::vector_length(n->in(2)) == 2);
19760   match(Set dst (MinReductionV dst src));
19761   match(Set dst (MaxReductionV dst src));
19762   effect(TEMP dst, TEMP xtmp1);
19763   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19764   ins_encode %{
19765     int opcode = this->ideal_Opcode();
19766     int vlen = Matcher::vector_length(this, $src);
19767     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19768                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19769   %}
19770   ins_pipe( pipe_slow );
19771 %}
19772 
19773 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19774   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19775             Matcher::vector_length(n->in(2)) >= 4);
19776   match(Set dst (MinReductionV dst src));
19777   match(Set dst (MaxReductionV dst src));
19778   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19779   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19780   ins_encode %{
19781     int opcode = this->ideal_Opcode();
19782     int vlen = Matcher::vector_length(this, $src);
19783     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19784                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19785   %}
19786   ins_pipe( pipe_slow );
19787 %}
19788 
19789 // ====================VECTOR ARITHMETIC=======================================
19790 
19791 // --------------------------------- ADD --------------------------------------
19792 
19793 // Bytes vector add
19794 instruct vaddB(vec dst, vec src) %{
19795   predicate(UseAVX == 0);
19796   match(Set dst (AddVB dst src));
19797   format %{ "paddb   $dst,$src\t! add packedB" %}
19798   ins_encode %{
19799     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19800   %}
19801   ins_pipe( pipe_slow );
19802 %}
19803 
19804 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19805   predicate(UseAVX > 0);
19806   match(Set dst (AddVB src1 src2));
19807   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19808   ins_encode %{
19809     int vlen_enc = vector_length_encoding(this);
19810     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19811   %}
19812   ins_pipe( pipe_slow );
19813 %}
19814 
19815 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19816   predicate((UseAVX > 0) &&
19817             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19818   match(Set dst (AddVB src (LoadVector mem)));
19819   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19820   ins_encode %{
19821     int vlen_enc = vector_length_encoding(this);
19822     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19823   %}
19824   ins_pipe( pipe_slow );
19825 %}
19826 
19827 // Shorts/Chars vector add
19828 instruct vaddS(vec dst, vec src) %{
19829   predicate(UseAVX == 0);
19830   match(Set dst (AddVS dst src));
19831   format %{ "paddw   $dst,$src\t! add packedS" %}
19832   ins_encode %{
19833     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19834   %}
19835   ins_pipe( pipe_slow );
19836 %}
19837 
19838 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19839   predicate(UseAVX > 0);
19840   match(Set dst (AddVS src1 src2));
19841   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19842   ins_encode %{
19843     int vlen_enc = vector_length_encoding(this);
19844     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19845   %}
19846   ins_pipe( pipe_slow );
19847 %}
19848 
19849 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19850   predicate((UseAVX > 0) &&
19851             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19852   match(Set dst (AddVS src (LoadVector mem)));
19853   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19854   ins_encode %{
19855     int vlen_enc = vector_length_encoding(this);
19856     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19857   %}
19858   ins_pipe( pipe_slow );
19859 %}
19860 
19861 // Integers vector add
19862 instruct vaddI(vec dst, vec src) %{
19863   predicate(UseAVX == 0);
19864   match(Set dst (AddVI dst src));
19865   format %{ "paddd   $dst,$src\t! add packedI" %}
19866   ins_encode %{
19867     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19868   %}
19869   ins_pipe( pipe_slow );
19870 %}
19871 
19872 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19873   predicate(UseAVX > 0);
19874   match(Set dst (AddVI src1 src2));
19875   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19876   ins_encode %{
19877     int vlen_enc = vector_length_encoding(this);
19878     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19879   %}
19880   ins_pipe( pipe_slow );
19881 %}
19882 
19883 
19884 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19885   predicate((UseAVX > 0) &&
19886             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19887   match(Set dst (AddVI src (LoadVector mem)));
19888   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19889   ins_encode %{
19890     int vlen_enc = vector_length_encoding(this);
19891     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19892   %}
19893   ins_pipe( pipe_slow );
19894 %}
19895 
19896 // Longs vector add
19897 instruct vaddL(vec dst, vec src) %{
19898   predicate(UseAVX == 0);
19899   match(Set dst (AddVL dst src));
19900   format %{ "paddq   $dst,$src\t! add packedL" %}
19901   ins_encode %{
19902     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19903   %}
19904   ins_pipe( pipe_slow );
19905 %}
19906 
19907 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19908   predicate(UseAVX > 0);
19909   match(Set dst (AddVL src1 src2));
19910   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19911   ins_encode %{
19912     int vlen_enc = vector_length_encoding(this);
19913     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19914   %}
19915   ins_pipe( pipe_slow );
19916 %}
19917 
19918 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19919   predicate((UseAVX > 0) &&
19920             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19921   match(Set dst (AddVL src (LoadVector mem)));
19922   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19923   ins_encode %{
19924     int vlen_enc = vector_length_encoding(this);
19925     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19926   %}
19927   ins_pipe( pipe_slow );
19928 %}
19929 
19930 // Floats vector add
19931 instruct vaddF(vec dst, vec src) %{
19932   predicate(UseAVX == 0);
19933   match(Set dst (AddVF dst src));
19934   format %{ "addps   $dst,$src\t! add packedF" %}
19935   ins_encode %{
19936     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19937   %}
19938   ins_pipe( pipe_slow );
19939 %}
19940 
19941 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19942   predicate(UseAVX > 0);
19943   match(Set dst (AddVF src1 src2));
19944   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
19945   ins_encode %{
19946     int vlen_enc = vector_length_encoding(this);
19947     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19948   %}
19949   ins_pipe( pipe_slow );
19950 %}
19951 
19952 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19953   predicate((UseAVX > 0) &&
19954             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19955   match(Set dst (AddVF src (LoadVector mem)));
19956   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
19957   ins_encode %{
19958     int vlen_enc = vector_length_encoding(this);
19959     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19960   %}
19961   ins_pipe( pipe_slow );
19962 %}
19963 
19964 // Doubles vector add
19965 instruct vaddD(vec dst, vec src) %{
19966   predicate(UseAVX == 0);
19967   match(Set dst (AddVD dst src));
19968   format %{ "addpd   $dst,$src\t! add packedD" %}
19969   ins_encode %{
19970     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19971   %}
19972   ins_pipe( pipe_slow );
19973 %}
19974 
19975 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19976   predicate(UseAVX > 0);
19977   match(Set dst (AddVD src1 src2));
19978   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
19979   ins_encode %{
19980     int vlen_enc = vector_length_encoding(this);
19981     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19982   %}
19983   ins_pipe( pipe_slow );
19984 %}
19985 
19986 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19987   predicate((UseAVX > 0) &&
19988             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19989   match(Set dst (AddVD src (LoadVector mem)));
19990   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
19991   ins_encode %{
19992     int vlen_enc = vector_length_encoding(this);
19993     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19994   %}
19995   ins_pipe( pipe_slow );
19996 %}
19997 
19998 // --------------------------------- SUB --------------------------------------
19999 
20000 // Bytes vector sub
20001 instruct vsubB(vec dst, vec src) %{
20002   predicate(UseAVX == 0);
20003   match(Set dst (SubVB dst src));
20004   format %{ "psubb   $dst,$src\t! sub packedB" %}
20005   ins_encode %{
20006     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20007   %}
20008   ins_pipe( pipe_slow );
20009 %}
20010 
20011 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20012   predicate(UseAVX > 0);
20013   match(Set dst (SubVB src1 src2));
20014   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20015   ins_encode %{
20016     int vlen_enc = vector_length_encoding(this);
20017     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20018   %}
20019   ins_pipe( pipe_slow );
20020 %}
20021 
20022 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20023   predicate((UseAVX > 0) &&
20024             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20025   match(Set dst (SubVB src (LoadVector mem)));
20026   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20027   ins_encode %{
20028     int vlen_enc = vector_length_encoding(this);
20029     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20030   %}
20031   ins_pipe( pipe_slow );
20032 %}
20033 
20034 // Shorts/Chars vector sub
20035 instruct vsubS(vec dst, vec src) %{
20036   predicate(UseAVX == 0);
20037   match(Set dst (SubVS dst src));
20038   format %{ "psubw   $dst,$src\t! sub packedS" %}
20039   ins_encode %{
20040     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20041   %}
20042   ins_pipe( pipe_slow );
20043 %}
20044 
20045 
20046 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20047   predicate(UseAVX > 0);
20048   match(Set dst (SubVS src1 src2));
20049   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20050   ins_encode %{
20051     int vlen_enc = vector_length_encoding(this);
20052     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20053   %}
20054   ins_pipe( pipe_slow );
20055 %}
20056 
20057 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20058   predicate((UseAVX > 0) &&
20059             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20060   match(Set dst (SubVS src (LoadVector mem)));
20061   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20062   ins_encode %{
20063     int vlen_enc = vector_length_encoding(this);
20064     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20065   %}
20066   ins_pipe( pipe_slow );
20067 %}
20068 
20069 // Integers vector sub
20070 instruct vsubI(vec dst, vec src) %{
20071   predicate(UseAVX == 0);
20072   match(Set dst (SubVI dst src));
20073   format %{ "psubd   $dst,$src\t! sub packedI" %}
20074   ins_encode %{
20075     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20076   %}
20077   ins_pipe( pipe_slow );
20078 %}
20079 
20080 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20081   predicate(UseAVX > 0);
20082   match(Set dst (SubVI src1 src2));
20083   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20084   ins_encode %{
20085     int vlen_enc = vector_length_encoding(this);
20086     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20087   %}
20088   ins_pipe( pipe_slow );
20089 %}
20090 
20091 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20092   predicate((UseAVX > 0) &&
20093             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20094   match(Set dst (SubVI src (LoadVector mem)));
20095   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20096   ins_encode %{
20097     int vlen_enc = vector_length_encoding(this);
20098     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20099   %}
20100   ins_pipe( pipe_slow );
20101 %}
20102 
20103 // Longs vector sub
20104 instruct vsubL(vec dst, vec src) %{
20105   predicate(UseAVX == 0);
20106   match(Set dst (SubVL dst src));
20107   format %{ "psubq   $dst,$src\t! sub packedL" %}
20108   ins_encode %{
20109     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20110   %}
20111   ins_pipe( pipe_slow );
20112 %}
20113 
20114 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20115   predicate(UseAVX > 0);
20116   match(Set dst (SubVL src1 src2));
20117   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20118   ins_encode %{
20119     int vlen_enc = vector_length_encoding(this);
20120     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20121   %}
20122   ins_pipe( pipe_slow );
20123 %}
20124 
20125 
20126 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20127   predicate((UseAVX > 0) &&
20128             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20129   match(Set dst (SubVL src (LoadVector mem)));
20130   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20131   ins_encode %{
20132     int vlen_enc = vector_length_encoding(this);
20133     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20134   %}
20135   ins_pipe( pipe_slow );
20136 %}
20137 
20138 // Floats vector sub
20139 instruct vsubF(vec dst, vec src) %{
20140   predicate(UseAVX == 0);
20141   match(Set dst (SubVF dst src));
20142   format %{ "subps   $dst,$src\t! sub packedF" %}
20143   ins_encode %{
20144     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20145   %}
20146   ins_pipe( pipe_slow );
20147 %}
20148 
20149 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20150   predicate(UseAVX > 0);
20151   match(Set dst (SubVF src1 src2));
20152   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20153   ins_encode %{
20154     int vlen_enc = vector_length_encoding(this);
20155     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20156   %}
20157   ins_pipe( pipe_slow );
20158 %}
20159 
20160 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20161   predicate((UseAVX > 0) &&
20162             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20163   match(Set dst (SubVF src (LoadVector mem)));
20164   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20165   ins_encode %{
20166     int vlen_enc = vector_length_encoding(this);
20167     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20168   %}
20169   ins_pipe( pipe_slow );
20170 %}
20171 
20172 // Doubles vector sub
20173 instruct vsubD(vec dst, vec src) %{
20174   predicate(UseAVX == 0);
20175   match(Set dst (SubVD dst src));
20176   format %{ "subpd   $dst,$src\t! sub packedD" %}
20177   ins_encode %{
20178     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20179   %}
20180   ins_pipe( pipe_slow );
20181 %}
20182 
20183 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20184   predicate(UseAVX > 0);
20185   match(Set dst (SubVD src1 src2));
20186   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20187   ins_encode %{
20188     int vlen_enc = vector_length_encoding(this);
20189     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20190   %}
20191   ins_pipe( pipe_slow );
20192 %}
20193 
20194 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20195   predicate((UseAVX > 0) &&
20196             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20197   match(Set dst (SubVD src (LoadVector mem)));
20198   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20199   ins_encode %{
20200     int vlen_enc = vector_length_encoding(this);
20201     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20202   %}
20203   ins_pipe( pipe_slow );
20204 %}
20205 
20206 // --------------------------------- MUL --------------------------------------
20207 
20208 // Byte vector mul
20209 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20210   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20211   match(Set dst (MulVB src1 src2));
20212   effect(TEMP dst, TEMP xtmp);
20213   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20214   ins_encode %{
20215     assert(UseSSE > 3, "required");
20216     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20217     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20218     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20219     __ psllw($dst$$XMMRegister, 8);
20220     __ psrlw($dst$$XMMRegister, 8);
20221     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20222   %}
20223   ins_pipe( pipe_slow );
20224 %}
20225 
20226 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20227   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20228   match(Set dst (MulVB src1 src2));
20229   effect(TEMP dst, TEMP xtmp);
20230   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20231   ins_encode %{
20232     assert(UseSSE > 3, "required");
20233     // Odd-index elements
20234     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20235     __ psrlw($dst$$XMMRegister, 8);
20236     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20237     __ psrlw($xtmp$$XMMRegister, 8);
20238     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20239     __ psllw($dst$$XMMRegister, 8);
20240     // Even-index elements
20241     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20242     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20243     __ psllw($xtmp$$XMMRegister, 8);
20244     __ psrlw($xtmp$$XMMRegister, 8);
20245     // Combine
20246     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20247   %}
20248   ins_pipe( pipe_slow );
20249 %}
20250 
20251 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20252   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20253   match(Set dst (MulVB src1 src2));
20254   effect(TEMP xtmp1, TEMP xtmp2);
20255   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20256   ins_encode %{
20257     int vlen_enc = vector_length_encoding(this);
20258     // Odd-index elements
20259     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20260     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20261     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20262     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20263     // Even-index elements
20264     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20265     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20266     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20267     // Combine
20268     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20269   %}
20270   ins_pipe( pipe_slow );
20271 %}
20272 
20273 // Shorts/Chars vector mul
20274 instruct vmulS(vec dst, vec src) %{
20275   predicate(UseAVX == 0);
20276   match(Set dst (MulVS dst src));
20277   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20278   ins_encode %{
20279     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20280   %}
20281   ins_pipe( pipe_slow );
20282 %}
20283 
20284 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20285   predicate(UseAVX > 0);
20286   match(Set dst (MulVS src1 src2));
20287   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20288   ins_encode %{
20289     int vlen_enc = vector_length_encoding(this);
20290     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20291   %}
20292   ins_pipe( pipe_slow );
20293 %}
20294 
20295 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20296   predicate((UseAVX > 0) &&
20297             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20298   match(Set dst (MulVS src (LoadVector mem)));
20299   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20300   ins_encode %{
20301     int vlen_enc = vector_length_encoding(this);
20302     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20303   %}
20304   ins_pipe( pipe_slow );
20305 %}
20306 
20307 // Integers vector mul
20308 instruct vmulI(vec dst, vec src) %{
20309   predicate(UseAVX == 0);
20310   match(Set dst (MulVI dst src));
20311   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20312   ins_encode %{
20313     assert(UseSSE > 3, "required");
20314     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20315   %}
20316   ins_pipe( pipe_slow );
20317 %}
20318 
20319 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20320   predicate(UseAVX > 0);
20321   match(Set dst (MulVI src1 src2));
20322   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20323   ins_encode %{
20324     int vlen_enc = vector_length_encoding(this);
20325     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20326   %}
20327   ins_pipe( pipe_slow );
20328 %}
20329 
20330 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20331   predicate((UseAVX > 0) &&
20332             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20333   match(Set dst (MulVI src (LoadVector mem)));
20334   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20335   ins_encode %{
20336     int vlen_enc = vector_length_encoding(this);
20337     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20338   %}
20339   ins_pipe( pipe_slow );
20340 %}
20341 
20342 // Longs vector mul
20343 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20344   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20345              VM_Version::supports_avx512dq()) ||
20346             VM_Version::supports_avx512vldq());
20347   match(Set dst (MulVL src1 src2));
20348   ins_cost(500);
20349   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20350   ins_encode %{
20351     assert(UseAVX > 2, "required");
20352     int vlen_enc = vector_length_encoding(this);
20353     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20354   %}
20355   ins_pipe( pipe_slow );
20356 %}
20357 
20358 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20359   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20360              VM_Version::supports_avx512dq()) ||
20361             (Matcher::vector_length_in_bytes(n) > 8 &&
20362              VM_Version::supports_avx512vldq()));
20363   match(Set dst (MulVL src (LoadVector mem)));
20364   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20365   ins_cost(500);
20366   ins_encode %{
20367     assert(UseAVX > 2, "required");
20368     int vlen_enc = vector_length_encoding(this);
20369     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20370   %}
20371   ins_pipe( pipe_slow );
20372 %}
20373 
20374 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20375   predicate(UseAVX == 0);
20376   match(Set dst (MulVL src1 src2));
20377   ins_cost(500);
20378   effect(TEMP dst, TEMP xtmp);
20379   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20380   ins_encode %{
20381     assert(VM_Version::supports_sse4_1(), "required");
20382     // Get the lo-hi products, only the lower 32 bits is in concerns
20383     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20384     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20385     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20386     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20387     __ psllq($dst$$XMMRegister, 32);
20388     // Get the lo-lo products
20389     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20390     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20391     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20392   %}
20393   ins_pipe( pipe_slow );
20394 %}
20395 
20396 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20397   predicate(UseAVX > 0 &&
20398             ((Matcher::vector_length_in_bytes(n) == 64 &&
20399               !VM_Version::supports_avx512dq()) ||
20400              (Matcher::vector_length_in_bytes(n) < 64 &&
20401               !VM_Version::supports_avx512vldq())));
20402   match(Set dst (MulVL src1 src2));
20403   effect(TEMP xtmp1, TEMP xtmp2);
20404   ins_cost(500);
20405   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20406   ins_encode %{
20407     int vlen_enc = vector_length_encoding(this);
20408     // Get the lo-hi products, only the lower 32 bits is in concerns
20409     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20410     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20411     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20412     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20413     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20414     // Get the lo-lo products
20415     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20416     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20417   %}
20418   ins_pipe( pipe_slow );
20419 %}
20420 
20421 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20422   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20423   match(Set dst (MulVL src1 src2));
20424   ins_cost(100);
20425   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20426   ins_encode %{
20427     int vlen_enc = vector_length_encoding(this);
20428     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429   %}
20430   ins_pipe( pipe_slow );
20431 %}
20432 
20433 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20434   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20435   match(Set dst (MulVL src1 src2));
20436   ins_cost(100);
20437   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20438   ins_encode %{
20439     int vlen_enc = vector_length_encoding(this);
20440     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20441   %}
20442   ins_pipe( pipe_slow );
20443 %}
20444 
20445 // Floats vector mul
20446 instruct vmulF(vec dst, vec src) %{
20447   predicate(UseAVX == 0);
20448   match(Set dst (MulVF dst src));
20449   format %{ "mulps   $dst,$src\t! mul packedF" %}
20450   ins_encode %{
20451     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20452   %}
20453   ins_pipe( pipe_slow );
20454 %}
20455 
20456 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20457   predicate(UseAVX > 0);
20458   match(Set dst (MulVF src1 src2));
20459   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20460   ins_encode %{
20461     int vlen_enc = vector_length_encoding(this);
20462     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463   %}
20464   ins_pipe( pipe_slow );
20465 %}
20466 
20467 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20468   predicate((UseAVX > 0) &&
20469             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470   match(Set dst (MulVF src (LoadVector mem)));
20471   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20472   ins_encode %{
20473     int vlen_enc = vector_length_encoding(this);
20474     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475   %}
20476   ins_pipe( pipe_slow );
20477 %}
20478 
20479 // Doubles vector mul
20480 instruct vmulD(vec dst, vec src) %{
20481   predicate(UseAVX == 0);
20482   match(Set dst (MulVD dst src));
20483   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20484   ins_encode %{
20485     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20486   %}
20487   ins_pipe( pipe_slow );
20488 %}
20489 
20490 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20491   predicate(UseAVX > 0);
20492   match(Set dst (MulVD src1 src2));
20493   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20494   ins_encode %{
20495     int vlen_enc = vector_length_encoding(this);
20496     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20497   %}
20498   ins_pipe( pipe_slow );
20499 %}
20500 
20501 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20502   predicate((UseAVX > 0) &&
20503             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20504   match(Set dst (MulVD src (LoadVector mem)));
20505   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20506   ins_encode %{
20507     int vlen_enc = vector_length_encoding(this);
20508     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20509   %}
20510   ins_pipe( pipe_slow );
20511 %}
20512 
20513 // --------------------------------- DIV --------------------------------------
20514 
20515 // Floats vector div
20516 instruct vdivF(vec dst, vec src) %{
20517   predicate(UseAVX == 0);
20518   match(Set dst (DivVF dst src));
20519   format %{ "divps   $dst,$src\t! div packedF" %}
20520   ins_encode %{
20521     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20522   %}
20523   ins_pipe( pipe_slow );
20524 %}
20525 
20526 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20527   predicate(UseAVX > 0);
20528   match(Set dst (DivVF src1 src2));
20529   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20530   ins_encode %{
20531     int vlen_enc = vector_length_encoding(this);
20532     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20533   %}
20534   ins_pipe( pipe_slow );
20535 %}
20536 
20537 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20538   predicate((UseAVX > 0) &&
20539             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20540   match(Set dst (DivVF src (LoadVector mem)));
20541   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20542   ins_encode %{
20543     int vlen_enc = vector_length_encoding(this);
20544     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20545   %}
20546   ins_pipe( pipe_slow );
20547 %}
20548 
20549 // Doubles vector div
20550 instruct vdivD(vec dst, vec src) %{
20551   predicate(UseAVX == 0);
20552   match(Set dst (DivVD dst src));
20553   format %{ "divpd   $dst,$src\t! div packedD" %}
20554   ins_encode %{
20555     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20556   %}
20557   ins_pipe( pipe_slow );
20558 %}
20559 
20560 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20561   predicate(UseAVX > 0);
20562   match(Set dst (DivVD src1 src2));
20563   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20564   ins_encode %{
20565     int vlen_enc = vector_length_encoding(this);
20566     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20567   %}
20568   ins_pipe( pipe_slow );
20569 %}
20570 
20571 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20572   predicate((UseAVX > 0) &&
20573             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20574   match(Set dst (DivVD src (LoadVector mem)));
20575   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20576   ins_encode %{
20577     int vlen_enc = vector_length_encoding(this);
20578     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20579   %}
20580   ins_pipe( pipe_slow );
20581 %}
20582 
20583 // ------------------------------ MinMax ---------------------------------------
20584 
20585 // Byte, Short, Int vector Min/Max
20586 instruct minmax_reg_sse(vec dst, vec src) %{
20587   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20588             UseAVX == 0);
20589   match(Set dst (MinV dst src));
20590   match(Set dst (MaxV dst src));
20591   format %{ "vector_minmax  $dst,$src\t!  " %}
20592   ins_encode %{
20593     assert(UseSSE >= 4, "required");
20594 
20595     int opcode = this->ideal_Opcode();
20596     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20597     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20598   %}
20599   ins_pipe( pipe_slow );
20600 %}
20601 
20602 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20603   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20604             UseAVX > 0);
20605   match(Set dst (MinV src1 src2));
20606   match(Set dst (MaxV src1 src2));
20607   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20608   ins_encode %{
20609     int opcode = this->ideal_Opcode();
20610     int vlen_enc = vector_length_encoding(this);
20611     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20612 
20613     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20614   %}
20615   ins_pipe( pipe_slow );
20616 %}
20617 
20618 // Long vector Min/Max
20619 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20620   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20621             UseAVX == 0);
20622   match(Set dst (MinV dst src));
20623   match(Set dst (MaxV src dst));
20624   effect(TEMP dst, TEMP tmp);
20625   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20626   ins_encode %{
20627     assert(UseSSE >= 4, "required");
20628 
20629     int opcode = this->ideal_Opcode();
20630     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20631     assert(elem_bt == T_LONG, "sanity");
20632 
20633     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20634   %}
20635   ins_pipe( pipe_slow );
20636 %}
20637 
20638 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20639   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20640             UseAVX > 0 && !VM_Version::supports_avx512vl());
20641   match(Set dst (MinV src1 src2));
20642   match(Set dst (MaxV src1 src2));
20643   effect(TEMP dst);
20644   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20645   ins_encode %{
20646     int vlen_enc = vector_length_encoding(this);
20647     int opcode = this->ideal_Opcode();
20648     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20649     assert(elem_bt == T_LONG, "sanity");
20650 
20651     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20652   %}
20653   ins_pipe( pipe_slow );
20654 %}
20655 
20656 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20657   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20658             Matcher::vector_element_basic_type(n) == T_LONG);
20659   match(Set dst (MinV src1 src2));
20660   match(Set dst (MaxV src1 src2));
20661   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20662   ins_encode %{
20663     assert(UseAVX > 2, "required");
20664 
20665     int vlen_enc = vector_length_encoding(this);
20666     int opcode = this->ideal_Opcode();
20667     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20668     assert(elem_bt == T_LONG, "sanity");
20669 
20670     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20671   %}
20672   ins_pipe( pipe_slow );
20673 %}
20674 
20675 // Float/Double vector Min/Max
20676 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20677   predicate(VM_Version::supports_avx10_2() &&
20678             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20679   match(Set dst (MinV a b));
20680   match(Set dst (MaxV a b));
20681   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20682   ins_encode %{
20683     int vlen_enc = vector_length_encoding(this);
20684     int opcode = this->ideal_Opcode();
20685     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20686     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20687   %}
20688   ins_pipe( pipe_slow );
20689 %}
20690 
20691 // Float/Double vector Min/Max
20692 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20693   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20694             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20695             UseAVX > 0);
20696   match(Set dst (MinV a b));
20697   match(Set dst (MaxV a b));
20698   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20699   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20700   ins_encode %{
20701     assert(UseAVX > 0, "required");
20702 
20703     int opcode = this->ideal_Opcode();
20704     int vlen_enc = vector_length_encoding(this);
20705     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20706 
20707     __ vminmax_fp(opcode, elem_bt,
20708                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20709                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20710   %}
20711   ins_pipe( pipe_slow );
20712 %}
20713 
20714 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20715   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20716             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20717   match(Set dst (MinV a b));
20718   match(Set dst (MaxV a b));
20719   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20720   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20721   ins_encode %{
20722     assert(UseAVX > 2, "required");
20723 
20724     int opcode = this->ideal_Opcode();
20725     int vlen_enc = vector_length_encoding(this);
20726     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20727 
20728     __ evminmax_fp(opcode, elem_bt,
20729                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20730                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20731   %}
20732   ins_pipe( pipe_slow );
20733 %}
20734 
20735 // ------------------------------ Unsigned vector Min/Max ----------------------
20736 
20737 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20738   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20739   match(Set dst (UMinV a b));
20740   match(Set dst (UMaxV a b));
20741   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20742   ins_encode %{
20743     int opcode = this->ideal_Opcode();
20744     int vlen_enc = vector_length_encoding(this);
20745     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20746     assert(is_integral_type(elem_bt), "");
20747     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20748   %}
20749   ins_pipe( pipe_slow );
20750 %}
20751 
20752 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20753   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20754   match(Set dst (UMinV a (LoadVector b)));
20755   match(Set dst (UMaxV a (LoadVector b)));
20756   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20757   ins_encode %{
20758     int opcode = this->ideal_Opcode();
20759     int vlen_enc = vector_length_encoding(this);
20760     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20761     assert(is_integral_type(elem_bt), "");
20762     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20763   %}
20764   ins_pipe( pipe_slow );
20765 %}
20766 
20767 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20768   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20769   match(Set dst (UMinV a b));
20770   match(Set dst (UMaxV a b));
20771   effect(TEMP xtmp1, TEMP xtmp2);
20772   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20773   ins_encode %{
20774     int opcode = this->ideal_Opcode();
20775     int vlen_enc = vector_length_encoding(this);
20776     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20777   %}
20778   ins_pipe( pipe_slow );
20779 %}
20780 
20781 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20782   match(Set dst (UMinV (Binary dst src2) mask));
20783   match(Set dst (UMaxV (Binary dst src2) mask));
20784   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20785   ins_encode %{
20786     int vlen_enc = vector_length_encoding(this);
20787     BasicType bt = Matcher::vector_element_basic_type(this);
20788     int opc = this->ideal_Opcode();
20789     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20790                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20791   %}
20792   ins_pipe( pipe_slow );
20793 %}
20794 
20795 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20796   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20797   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20798   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20799   ins_encode %{
20800     int vlen_enc = vector_length_encoding(this);
20801     BasicType bt = Matcher::vector_element_basic_type(this);
20802     int opc = this->ideal_Opcode();
20803     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20804                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20805   %}
20806   ins_pipe( pipe_slow );
20807 %}
20808 
20809 // --------------------------------- Signum/CopySign ---------------------------
20810 
20811 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20812   match(Set dst (SignumF dst (Binary zero one)));
20813   effect(KILL cr);
20814   format %{ "signumF $dst, $dst" %}
20815   ins_encode %{
20816     int opcode = this->ideal_Opcode();
20817     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20818   %}
20819   ins_pipe( pipe_slow );
20820 %}
20821 
20822 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20823   match(Set dst (SignumD dst (Binary zero one)));
20824   effect(KILL cr);
20825   format %{ "signumD $dst, $dst" %}
20826   ins_encode %{
20827     int opcode = this->ideal_Opcode();
20828     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20829   %}
20830   ins_pipe( pipe_slow );
20831 %}
20832 
20833 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20834   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20835   match(Set dst (SignumVF src (Binary zero one)));
20836   match(Set dst (SignumVD src (Binary zero one)));
20837   effect(TEMP dst, TEMP xtmp1);
20838   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20839   ins_encode %{
20840     int opcode = this->ideal_Opcode();
20841     int vec_enc = vector_length_encoding(this);
20842     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20843                          $xtmp1$$XMMRegister, vec_enc);
20844   %}
20845   ins_pipe( pipe_slow );
20846 %}
20847 
20848 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20849   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20850   match(Set dst (SignumVF src (Binary zero one)));
20851   match(Set dst (SignumVD src (Binary zero one)));
20852   effect(TEMP dst, TEMP ktmp1);
20853   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20854   ins_encode %{
20855     int opcode = this->ideal_Opcode();
20856     int vec_enc = vector_length_encoding(this);
20857     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20858                           $ktmp1$$KRegister, vec_enc);
20859   %}
20860   ins_pipe( pipe_slow );
20861 %}
20862 
20863 // ---------------------------------------
20864 // For copySign use 0xE4 as writemask for vpternlog
20865 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20866 // C (xmm2) is set to 0x7FFFFFFF
20867 // Wherever xmm2 is 0, we want to pick from B (sign)
20868 // Wherever xmm2 is 1, we want to pick from A (src)
20869 //
20870 // A B C Result
20871 // 0 0 0 0
20872 // 0 0 1 0
20873 // 0 1 0 1
20874 // 0 1 1 0
20875 // 1 0 0 0
20876 // 1 0 1 1
20877 // 1 1 0 1
20878 // 1 1 1 1
20879 //
20880 // Result going from high bit to low bit is 0x11100100 = 0xe4
20881 // ---------------------------------------
20882 
20883 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20884   match(Set dst (CopySignF dst src));
20885   effect(TEMP tmp1, TEMP tmp2);
20886   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20887   ins_encode %{
20888     __ movl($tmp2$$Register, 0x7FFFFFFF);
20889     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20890     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20891   %}
20892   ins_pipe( pipe_slow );
20893 %}
20894 
20895 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20896   match(Set dst (CopySignD dst (Binary src zero)));
20897   ins_cost(100);
20898   effect(TEMP tmp1, TEMP tmp2);
20899   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20900   ins_encode %{
20901     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20902     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20903     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20904   %}
20905   ins_pipe( pipe_slow );
20906 %}
20907 
20908 //----------------------------- CompressBits/ExpandBits ------------------------
20909 
20910 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20911   predicate(n->bottom_type()->isa_int());
20912   match(Set dst (CompressBits src mask));
20913   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20914   ins_encode %{
20915     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20916   %}
20917   ins_pipe( pipe_slow );
20918 %}
20919 
20920 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20921   predicate(n->bottom_type()->isa_int());
20922   match(Set dst (ExpandBits src mask));
20923   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20924   ins_encode %{
20925     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20926   %}
20927   ins_pipe( pipe_slow );
20928 %}
20929 
20930 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20931   predicate(n->bottom_type()->isa_int());
20932   match(Set dst (CompressBits src (LoadI mask)));
20933   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20934   ins_encode %{
20935     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20936   %}
20937   ins_pipe( pipe_slow );
20938 %}
20939 
20940 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20941   predicate(n->bottom_type()->isa_int());
20942   match(Set dst (ExpandBits src (LoadI mask)));
20943   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20944   ins_encode %{
20945     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20946   %}
20947   ins_pipe( pipe_slow );
20948 %}
20949 
20950 // --------------------------------- Sqrt --------------------------------------
20951 
20952 instruct vsqrtF_reg(vec dst, vec src) %{
20953   match(Set dst (SqrtVF src));
20954   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
20955   ins_encode %{
20956     assert(UseAVX > 0, "required");
20957     int vlen_enc = vector_length_encoding(this);
20958     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20959   %}
20960   ins_pipe( pipe_slow );
20961 %}
20962 
20963 instruct vsqrtF_mem(vec dst, memory mem) %{
20964   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20965   match(Set dst (SqrtVF (LoadVector mem)));
20966   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
20967   ins_encode %{
20968     assert(UseAVX > 0, "required");
20969     int vlen_enc = vector_length_encoding(this);
20970     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20971   %}
20972   ins_pipe( pipe_slow );
20973 %}
20974 
20975 // Floating point vector sqrt
20976 instruct vsqrtD_reg(vec dst, vec src) %{
20977   match(Set dst (SqrtVD src));
20978   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
20979   ins_encode %{
20980     assert(UseAVX > 0, "required");
20981     int vlen_enc = vector_length_encoding(this);
20982     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20983   %}
20984   ins_pipe( pipe_slow );
20985 %}
20986 
20987 instruct vsqrtD_mem(vec dst, memory mem) %{
20988   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20989   match(Set dst (SqrtVD (LoadVector mem)));
20990   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
20991   ins_encode %{
20992     assert(UseAVX > 0, "required");
20993     int vlen_enc = vector_length_encoding(this);
20994     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20995   %}
20996   ins_pipe( pipe_slow );
20997 %}
20998 
20999 // ------------------------------ Shift ---------------------------------------
21000 
21001 // Left and right shift count vectors are the same on x86
21002 // (only lowest bits of xmm reg are used for count).
21003 instruct vshiftcnt(vec dst, rRegI cnt) %{
21004   match(Set dst (LShiftCntV cnt));
21005   match(Set dst (RShiftCntV cnt));
21006   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21007   ins_encode %{
21008     __ movdl($dst$$XMMRegister, $cnt$$Register);
21009   %}
21010   ins_pipe( pipe_slow );
21011 %}
21012 
21013 // Byte vector shift
21014 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21015   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21016   match(Set dst ( LShiftVB src shift));
21017   match(Set dst ( RShiftVB src shift));
21018   match(Set dst (URShiftVB src shift));
21019   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21020   format %{"vector_byte_shift $dst,$src,$shift" %}
21021   ins_encode %{
21022     assert(UseSSE > 3, "required");
21023     int opcode = this->ideal_Opcode();
21024     bool sign = (opcode != Op_URShiftVB);
21025     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21026     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21027     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21028     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21029     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21030   %}
21031   ins_pipe( pipe_slow );
21032 %}
21033 
21034 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21035   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21036             UseAVX <= 1);
21037   match(Set dst ( LShiftVB src shift));
21038   match(Set dst ( RShiftVB src shift));
21039   match(Set dst (URShiftVB src shift));
21040   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21041   format %{"vector_byte_shift $dst,$src,$shift" %}
21042   ins_encode %{
21043     assert(UseSSE > 3, "required");
21044     int opcode = this->ideal_Opcode();
21045     bool sign = (opcode != Op_URShiftVB);
21046     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21047     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21048     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21049     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21050     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21051     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21052     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21053     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21054     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21055   %}
21056   ins_pipe( pipe_slow );
21057 %}
21058 
21059 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21060   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21061             UseAVX > 1);
21062   match(Set dst ( LShiftVB src shift));
21063   match(Set dst ( RShiftVB src shift));
21064   match(Set dst (URShiftVB src shift));
21065   effect(TEMP dst, TEMP tmp);
21066   format %{"vector_byte_shift $dst,$src,$shift" %}
21067   ins_encode %{
21068     int opcode = this->ideal_Opcode();
21069     bool sign = (opcode != Op_URShiftVB);
21070     int vlen_enc = Assembler::AVX_256bit;
21071     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21072     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21073     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21074     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21075     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21076   %}
21077   ins_pipe( pipe_slow );
21078 %}
21079 
21080 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21081   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21082   match(Set dst ( LShiftVB src shift));
21083   match(Set dst ( RShiftVB src shift));
21084   match(Set dst (URShiftVB src shift));
21085   effect(TEMP dst, TEMP tmp);
21086   format %{"vector_byte_shift $dst,$src,$shift" %}
21087   ins_encode %{
21088     assert(UseAVX > 1, "required");
21089     int opcode = this->ideal_Opcode();
21090     bool sign = (opcode != Op_URShiftVB);
21091     int vlen_enc = Assembler::AVX_256bit;
21092     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21093     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21094     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21095     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21096     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21097     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21098     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21099     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21100     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21101   %}
21102   ins_pipe( pipe_slow );
21103 %}
21104 
21105 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21106   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21107   match(Set dst ( LShiftVB src shift));
21108   match(Set dst  (RShiftVB src shift));
21109   match(Set dst (URShiftVB src shift));
21110   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21111   format %{"vector_byte_shift $dst,$src,$shift" %}
21112   ins_encode %{
21113     assert(UseAVX > 2, "required");
21114     int opcode = this->ideal_Opcode();
21115     bool sign = (opcode != Op_URShiftVB);
21116     int vlen_enc = Assembler::AVX_512bit;
21117     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21118     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21119     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21120     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21121     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21122     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21123     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21124     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21125     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21126     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21127     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21128     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21129   %}
21130   ins_pipe( pipe_slow );
21131 %}
21132 
21133 // Shorts vector logical right shift produces incorrect Java result
21134 // for negative data because java code convert short value into int with
21135 // sign extension before a shift. But char vectors are fine since chars are
21136 // unsigned values.
21137 // Shorts/Chars vector left shift
21138 instruct vshiftS(vec dst, vec src, vec shift) %{
21139   predicate(!n->as_ShiftV()->is_var_shift());
21140   match(Set dst ( LShiftVS src shift));
21141   match(Set dst ( RShiftVS src shift));
21142   match(Set dst (URShiftVS src shift));
21143   effect(TEMP dst, USE src, USE shift);
21144   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21145   ins_encode %{
21146     int opcode = this->ideal_Opcode();
21147     if (UseAVX > 0) {
21148       int vlen_enc = vector_length_encoding(this);
21149       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21150     } else {
21151       int vlen = Matcher::vector_length(this);
21152       if (vlen == 2) {
21153         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21154         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21155       } else if (vlen == 4) {
21156         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21157         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21158       } else {
21159         assert (vlen == 8, "sanity");
21160         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21161         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21162       }
21163     }
21164   %}
21165   ins_pipe( pipe_slow );
21166 %}
21167 
21168 // Integers vector left shift
21169 instruct vshiftI(vec dst, vec src, vec shift) %{
21170   predicate(!n->as_ShiftV()->is_var_shift());
21171   match(Set dst ( LShiftVI src shift));
21172   match(Set dst ( RShiftVI src shift));
21173   match(Set dst (URShiftVI src shift));
21174   effect(TEMP dst, USE src, USE shift);
21175   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21176   ins_encode %{
21177     int opcode = this->ideal_Opcode();
21178     if (UseAVX > 0) {
21179       int vlen_enc = vector_length_encoding(this);
21180       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21181     } else {
21182       int vlen = Matcher::vector_length(this);
21183       if (vlen == 2) {
21184         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21185         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21186       } else {
21187         assert(vlen == 4, "sanity");
21188         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21189         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21190       }
21191     }
21192   %}
21193   ins_pipe( pipe_slow );
21194 %}
21195 
21196 // Integers vector left constant shift
21197 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21198   match(Set dst (LShiftVI src (LShiftCntV shift)));
21199   match(Set dst (RShiftVI src (RShiftCntV shift)));
21200   match(Set dst (URShiftVI src (RShiftCntV shift)));
21201   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21202   ins_encode %{
21203     int opcode = this->ideal_Opcode();
21204     if (UseAVX > 0) {
21205       int vector_len = vector_length_encoding(this);
21206       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21207     } else {
21208       int vlen = Matcher::vector_length(this);
21209       if (vlen == 2) {
21210         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21211         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21212       } else {
21213         assert(vlen == 4, "sanity");
21214         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21215         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21216       }
21217     }
21218   %}
21219   ins_pipe( pipe_slow );
21220 %}
21221 
21222 // Longs vector shift
21223 instruct vshiftL(vec dst, vec src, vec shift) %{
21224   predicate(!n->as_ShiftV()->is_var_shift());
21225   match(Set dst ( LShiftVL src shift));
21226   match(Set dst (URShiftVL src shift));
21227   effect(TEMP dst, USE src, USE shift);
21228   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21229   ins_encode %{
21230     int opcode = this->ideal_Opcode();
21231     if (UseAVX > 0) {
21232       int vlen_enc = vector_length_encoding(this);
21233       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21234     } else {
21235       assert(Matcher::vector_length(this) == 2, "");
21236       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21237       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21238     }
21239   %}
21240   ins_pipe( pipe_slow );
21241 %}
21242 
21243 // Longs vector constant shift
21244 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21245   match(Set dst (LShiftVL src (LShiftCntV shift)));
21246   match(Set dst (URShiftVL src (RShiftCntV shift)));
21247   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21248   ins_encode %{
21249     int opcode = this->ideal_Opcode();
21250     if (UseAVX > 0) {
21251       int vector_len = vector_length_encoding(this);
21252       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21253     } else {
21254       assert(Matcher::vector_length(this) == 2, "");
21255       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21256       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21257     }
21258   %}
21259   ins_pipe( pipe_slow );
21260 %}
21261 
21262 // -------------------ArithmeticRightShift -----------------------------------
21263 // Long vector arithmetic right shift
21264 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21265   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21266   match(Set dst (RShiftVL src shift));
21267   effect(TEMP dst, TEMP tmp);
21268   format %{ "vshiftq $dst,$src,$shift" %}
21269   ins_encode %{
21270     uint vlen = Matcher::vector_length(this);
21271     if (vlen == 2) {
21272       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21273       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21274       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21275       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21276       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21277       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21278     } else {
21279       assert(vlen == 4, "sanity");
21280       assert(UseAVX > 1, "required");
21281       int vlen_enc = Assembler::AVX_256bit;
21282       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21283       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21284       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21285       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21286       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21287     }
21288   %}
21289   ins_pipe( pipe_slow );
21290 %}
21291 
21292 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21293   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21294   match(Set dst (RShiftVL src shift));
21295   format %{ "vshiftq $dst,$src,$shift" %}
21296   ins_encode %{
21297     int vlen_enc = vector_length_encoding(this);
21298     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21299   %}
21300   ins_pipe( pipe_slow );
21301 %}
21302 
21303 // ------------------- Variable Shift -----------------------------
21304 // Byte variable shift
21305 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21306   predicate(Matcher::vector_length(n) <= 8 &&
21307             n->as_ShiftV()->is_var_shift() &&
21308             !VM_Version::supports_avx512bw());
21309   match(Set dst ( LShiftVB src shift));
21310   match(Set dst ( RShiftVB src shift));
21311   match(Set dst (URShiftVB src shift));
21312   effect(TEMP dst, TEMP vtmp);
21313   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21314   ins_encode %{
21315     assert(UseAVX >= 2, "required");
21316 
21317     int opcode = this->ideal_Opcode();
21318     int vlen_enc = Assembler::AVX_128bit;
21319     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21320     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21321   %}
21322   ins_pipe( pipe_slow );
21323 %}
21324 
21325 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21326   predicate(Matcher::vector_length(n) == 16 &&
21327             n->as_ShiftV()->is_var_shift() &&
21328             !VM_Version::supports_avx512bw());
21329   match(Set dst ( LShiftVB src shift));
21330   match(Set dst ( RShiftVB src shift));
21331   match(Set dst (URShiftVB src shift));
21332   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21333   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21334   ins_encode %{
21335     assert(UseAVX >= 2, "required");
21336 
21337     int opcode = this->ideal_Opcode();
21338     int vlen_enc = Assembler::AVX_128bit;
21339     // Shift lower half and get word result in dst
21340     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21341 
21342     // Shift upper half and get word result in vtmp1
21343     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21344     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21345     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21346 
21347     // Merge and down convert the two word results to byte in dst
21348     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21349   %}
21350   ins_pipe( pipe_slow );
21351 %}
21352 
21353 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21354   predicate(Matcher::vector_length(n) == 32 &&
21355             n->as_ShiftV()->is_var_shift() &&
21356             !VM_Version::supports_avx512bw());
21357   match(Set dst ( LShiftVB src shift));
21358   match(Set dst ( RShiftVB src shift));
21359   match(Set dst (URShiftVB src shift));
21360   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21361   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21362   ins_encode %{
21363     assert(UseAVX >= 2, "required");
21364 
21365     int opcode = this->ideal_Opcode();
21366     int vlen_enc = Assembler::AVX_128bit;
21367     // Process lower 128 bits and get result in dst
21368     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21369     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21370     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21371     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21372     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21373 
21374     // Process higher 128 bits and get result in vtmp3
21375     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21376     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21377     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21378     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21379     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21380     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21381     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21382 
21383     // Merge the two results in dst
21384     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21385   %}
21386   ins_pipe( pipe_slow );
21387 %}
21388 
21389 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21390   predicate(Matcher::vector_length(n) <= 32 &&
21391             n->as_ShiftV()->is_var_shift() &&
21392             VM_Version::supports_avx512bw());
21393   match(Set dst ( LShiftVB src shift));
21394   match(Set dst ( RShiftVB src shift));
21395   match(Set dst (URShiftVB src shift));
21396   effect(TEMP dst, TEMP vtmp);
21397   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21398   ins_encode %{
21399     assert(UseAVX > 2, "required");
21400 
21401     int opcode = this->ideal_Opcode();
21402     int vlen_enc = vector_length_encoding(this);
21403     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21404   %}
21405   ins_pipe( pipe_slow );
21406 %}
21407 
21408 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21409   predicate(Matcher::vector_length(n) == 64 &&
21410             n->as_ShiftV()->is_var_shift() &&
21411             VM_Version::supports_avx512bw());
21412   match(Set dst ( LShiftVB src shift));
21413   match(Set dst ( RShiftVB src shift));
21414   match(Set dst (URShiftVB src shift));
21415   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21416   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21417   ins_encode %{
21418     assert(UseAVX > 2, "required");
21419 
21420     int opcode = this->ideal_Opcode();
21421     int vlen_enc = Assembler::AVX_256bit;
21422     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21423     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21424     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21425     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21426     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21427   %}
21428   ins_pipe( pipe_slow );
21429 %}
21430 
21431 // Short variable shift
21432 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21433   predicate(Matcher::vector_length(n) <= 8 &&
21434             n->as_ShiftV()->is_var_shift() &&
21435             !VM_Version::supports_avx512bw());
21436   match(Set dst ( LShiftVS src shift));
21437   match(Set dst ( RShiftVS src shift));
21438   match(Set dst (URShiftVS src shift));
21439   effect(TEMP dst, TEMP vtmp);
21440   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21441   ins_encode %{
21442     assert(UseAVX >= 2, "required");
21443 
21444     int opcode = this->ideal_Opcode();
21445     bool sign = (opcode != Op_URShiftVS);
21446     int vlen_enc = Assembler::AVX_256bit;
21447     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21448     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21449     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21450     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21451     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21452     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21453   %}
21454   ins_pipe( pipe_slow );
21455 %}
21456 
21457 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21458   predicate(Matcher::vector_length(n) == 16 &&
21459             n->as_ShiftV()->is_var_shift() &&
21460             !VM_Version::supports_avx512bw());
21461   match(Set dst ( LShiftVS src shift));
21462   match(Set dst ( RShiftVS src shift));
21463   match(Set dst (URShiftVS src shift));
21464   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21465   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21466   ins_encode %{
21467     assert(UseAVX >= 2, "required");
21468 
21469     int opcode = this->ideal_Opcode();
21470     bool sign = (opcode != Op_URShiftVS);
21471     int vlen_enc = Assembler::AVX_256bit;
21472     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21473     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21474     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21475     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21476     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21477 
21478     // Shift upper half, with result in dst using vtmp1 as TEMP
21479     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21480     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21481     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21482     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21483     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21484     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21485 
21486     // Merge lower and upper half result into dst
21487     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21488     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21489   %}
21490   ins_pipe( pipe_slow );
21491 %}
21492 
21493 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21494   predicate(n->as_ShiftV()->is_var_shift() &&
21495             VM_Version::supports_avx512bw());
21496   match(Set dst ( LShiftVS src shift));
21497   match(Set dst ( RShiftVS src shift));
21498   match(Set dst (URShiftVS src shift));
21499   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21500   ins_encode %{
21501     assert(UseAVX > 2, "required");
21502 
21503     int opcode = this->ideal_Opcode();
21504     int vlen_enc = vector_length_encoding(this);
21505     if (!VM_Version::supports_avx512vl()) {
21506       vlen_enc = Assembler::AVX_512bit;
21507     }
21508     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21509   %}
21510   ins_pipe( pipe_slow );
21511 %}
21512 
21513 //Integer variable shift
21514 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21515   predicate(n->as_ShiftV()->is_var_shift());
21516   match(Set dst ( LShiftVI src shift));
21517   match(Set dst ( RShiftVI src shift));
21518   match(Set dst (URShiftVI src shift));
21519   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21520   ins_encode %{
21521     assert(UseAVX >= 2, "required");
21522 
21523     int opcode = this->ideal_Opcode();
21524     int vlen_enc = vector_length_encoding(this);
21525     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21526   %}
21527   ins_pipe( pipe_slow );
21528 %}
21529 
21530 //Long variable shift
21531 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21532   predicate(n->as_ShiftV()->is_var_shift());
21533   match(Set dst ( LShiftVL src shift));
21534   match(Set dst (URShiftVL src shift));
21535   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21536   ins_encode %{
21537     assert(UseAVX >= 2, "required");
21538 
21539     int opcode = this->ideal_Opcode();
21540     int vlen_enc = vector_length_encoding(this);
21541     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21542   %}
21543   ins_pipe( pipe_slow );
21544 %}
21545 
21546 //Long variable right shift arithmetic
21547 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21548   predicate(Matcher::vector_length(n) <= 4 &&
21549             n->as_ShiftV()->is_var_shift() &&
21550             UseAVX == 2);
21551   match(Set dst (RShiftVL src shift));
21552   effect(TEMP dst, TEMP vtmp);
21553   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21554   ins_encode %{
21555     int opcode = this->ideal_Opcode();
21556     int vlen_enc = vector_length_encoding(this);
21557     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21558                  $vtmp$$XMMRegister);
21559   %}
21560   ins_pipe( pipe_slow );
21561 %}
21562 
21563 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21564   predicate(n->as_ShiftV()->is_var_shift() &&
21565             UseAVX > 2);
21566   match(Set dst (RShiftVL src shift));
21567   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21568   ins_encode %{
21569     int opcode = this->ideal_Opcode();
21570     int vlen_enc = vector_length_encoding(this);
21571     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21572   %}
21573   ins_pipe( pipe_slow );
21574 %}
21575 
21576 // --------------------------------- AND --------------------------------------
21577 
21578 instruct vand(vec dst, vec src) %{
21579   predicate(UseAVX == 0);
21580   match(Set dst (AndV dst src));
21581   format %{ "pand    $dst,$src\t! and vectors" %}
21582   ins_encode %{
21583     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21584   %}
21585   ins_pipe( pipe_slow );
21586 %}
21587 
21588 instruct vand_reg(vec dst, vec src1, vec src2) %{
21589   predicate(UseAVX > 0);
21590   match(Set dst (AndV src1 src2));
21591   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21592   ins_encode %{
21593     int vlen_enc = vector_length_encoding(this);
21594     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21595   %}
21596   ins_pipe( pipe_slow );
21597 %}
21598 
21599 instruct vand_mem(vec dst, vec src, memory mem) %{
21600   predicate((UseAVX > 0) &&
21601             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21602   match(Set dst (AndV src (LoadVector mem)));
21603   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21604   ins_encode %{
21605     int vlen_enc = vector_length_encoding(this);
21606     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21607   %}
21608   ins_pipe( pipe_slow );
21609 %}
21610 
21611 // --------------------------------- OR ---------------------------------------
21612 
21613 instruct vor(vec dst, vec src) %{
21614   predicate(UseAVX == 0);
21615   match(Set dst (OrV dst src));
21616   format %{ "por     $dst,$src\t! or vectors" %}
21617   ins_encode %{
21618     __ por($dst$$XMMRegister, $src$$XMMRegister);
21619   %}
21620   ins_pipe( pipe_slow );
21621 %}
21622 
21623 instruct vor_reg(vec dst, vec src1, vec src2) %{
21624   predicate(UseAVX > 0);
21625   match(Set dst (OrV src1 src2));
21626   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21627   ins_encode %{
21628     int vlen_enc = vector_length_encoding(this);
21629     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21630   %}
21631   ins_pipe( pipe_slow );
21632 %}
21633 
21634 instruct vor_mem(vec dst, vec src, memory mem) %{
21635   predicate((UseAVX > 0) &&
21636             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21637   match(Set dst (OrV src (LoadVector mem)));
21638   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21639   ins_encode %{
21640     int vlen_enc = vector_length_encoding(this);
21641     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21642   %}
21643   ins_pipe( pipe_slow );
21644 %}
21645 
21646 // --------------------------------- XOR --------------------------------------
21647 
21648 instruct vxor(vec dst, vec src) %{
21649   predicate(UseAVX == 0);
21650   match(Set dst (XorV dst src));
21651   format %{ "pxor    $dst,$src\t! xor vectors" %}
21652   ins_encode %{
21653     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21654   %}
21655   ins_pipe( pipe_slow );
21656 %}
21657 
21658 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21659   predicate(UseAVX > 0);
21660   match(Set dst (XorV src1 src2));
21661   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21662   ins_encode %{
21663     int vlen_enc = vector_length_encoding(this);
21664     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21665   %}
21666   ins_pipe( pipe_slow );
21667 %}
21668 
21669 instruct vxor_mem(vec dst, vec src, memory mem) %{
21670   predicate((UseAVX > 0) &&
21671             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21672   match(Set dst (XorV src (LoadVector mem)));
21673   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21674   ins_encode %{
21675     int vlen_enc = vector_length_encoding(this);
21676     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21677   %}
21678   ins_pipe( pipe_slow );
21679 %}
21680 
21681 // --------------------------------- VectorCast --------------------------------------
21682 
21683 instruct vcastBtoX(vec dst, vec src) %{
21684   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21685   match(Set dst (VectorCastB2X src));
21686   format %{ "vector_cast_b2x $dst,$src\t!" %}
21687   ins_encode %{
21688     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21689     int vlen_enc = vector_length_encoding(this);
21690     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21691   %}
21692   ins_pipe( pipe_slow );
21693 %}
21694 
21695 instruct vcastBtoD(legVec dst, legVec src) %{
21696   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21697   match(Set dst (VectorCastB2X src));
21698   format %{ "vector_cast_b2x $dst,$src\t!" %}
21699   ins_encode %{
21700     int vlen_enc = vector_length_encoding(this);
21701     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21702   %}
21703   ins_pipe( pipe_slow );
21704 %}
21705 
21706 instruct castStoX(vec dst, vec src) %{
21707   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21708             Matcher::vector_length(n->in(1)) <= 8 && // src
21709             Matcher::vector_element_basic_type(n) == T_BYTE);
21710   match(Set dst (VectorCastS2X src));
21711   format %{ "vector_cast_s2x $dst,$src" %}
21712   ins_encode %{
21713     assert(UseAVX > 0, "required");
21714 
21715     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21716     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21717   %}
21718   ins_pipe( pipe_slow );
21719 %}
21720 
21721 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21722   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21723             Matcher::vector_length(n->in(1)) == 16 && // src
21724             Matcher::vector_element_basic_type(n) == T_BYTE);
21725   effect(TEMP dst, TEMP vtmp);
21726   match(Set dst (VectorCastS2X src));
21727   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21728   ins_encode %{
21729     assert(UseAVX > 0, "required");
21730 
21731     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21732     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21733     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21734     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21735   %}
21736   ins_pipe( pipe_slow );
21737 %}
21738 
21739 instruct vcastStoX_evex(vec dst, vec src) %{
21740   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21741             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21742   match(Set dst (VectorCastS2X src));
21743   format %{ "vector_cast_s2x $dst,$src\t!" %}
21744   ins_encode %{
21745     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21746     int src_vlen_enc = vector_length_encoding(this, $src);
21747     int vlen_enc = vector_length_encoding(this);
21748     switch (to_elem_bt) {
21749       case T_BYTE:
21750         if (!VM_Version::supports_avx512vl()) {
21751           vlen_enc = Assembler::AVX_512bit;
21752         }
21753         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21754         break;
21755       case T_INT:
21756         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21757         break;
21758       case T_FLOAT:
21759         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21760         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761         break;
21762       case T_LONG:
21763         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21764         break;
21765       case T_DOUBLE: {
21766         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21767         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21768         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21769         break;
21770       }
21771       default:
21772         ShouldNotReachHere();
21773     }
21774   %}
21775   ins_pipe( pipe_slow );
21776 %}
21777 
21778 instruct castItoX(vec dst, vec src) %{
21779   predicate(UseAVX <= 2 &&
21780             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21781             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21782   match(Set dst (VectorCastI2X src));
21783   format %{ "vector_cast_i2x $dst,$src" %}
21784   ins_encode %{
21785     assert(UseAVX > 0, "required");
21786 
21787     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21788     int vlen_enc = vector_length_encoding(this, $src);
21789 
21790     if (to_elem_bt == T_BYTE) {
21791       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21792       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21793       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21794     } else {
21795       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21796       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21797       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21798     }
21799   %}
21800   ins_pipe( pipe_slow );
21801 %}
21802 
21803 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21804   predicate(UseAVX <= 2 &&
21805             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21806             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21807   match(Set dst (VectorCastI2X src));
21808   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21809   effect(TEMP dst, TEMP vtmp);
21810   ins_encode %{
21811     assert(UseAVX > 0, "required");
21812 
21813     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21814     int vlen_enc = vector_length_encoding(this, $src);
21815 
21816     if (to_elem_bt == T_BYTE) {
21817       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21818       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21819       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21820       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21821     } else {
21822       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21823       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21824       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21825       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21826     }
21827   %}
21828   ins_pipe( pipe_slow );
21829 %}
21830 
21831 instruct vcastItoX_evex(vec dst, vec src) %{
21832   predicate(UseAVX > 2 ||
21833             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21834   match(Set dst (VectorCastI2X src));
21835   format %{ "vector_cast_i2x $dst,$src\t!" %}
21836   ins_encode %{
21837     assert(UseAVX > 0, "required");
21838 
21839     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21840     int src_vlen_enc = vector_length_encoding(this, $src);
21841     int dst_vlen_enc = vector_length_encoding(this);
21842     switch (dst_elem_bt) {
21843       case T_BYTE:
21844         if (!VM_Version::supports_avx512vl()) {
21845           src_vlen_enc = Assembler::AVX_512bit;
21846         }
21847         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21848         break;
21849       case T_SHORT:
21850         if (!VM_Version::supports_avx512vl()) {
21851           src_vlen_enc = Assembler::AVX_512bit;
21852         }
21853         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21854         break;
21855       case T_FLOAT:
21856         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21857         break;
21858       case T_LONG:
21859         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21860         break;
21861       case T_DOUBLE:
21862         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21863         break;
21864       default:
21865         ShouldNotReachHere();
21866     }
21867   %}
21868   ins_pipe( pipe_slow );
21869 %}
21870 
21871 instruct vcastLtoBS(vec dst, vec src) %{
21872   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21873             UseAVX <= 2);
21874   match(Set dst (VectorCastL2X src));
21875   format %{ "vector_cast_l2x  $dst,$src" %}
21876   ins_encode %{
21877     assert(UseAVX > 0, "required");
21878 
21879     int vlen = Matcher::vector_length_in_bytes(this, $src);
21880     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21881     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21882                                                       : ExternalAddress(vector_int_to_short_mask());
21883     if (vlen <= 16) {
21884       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21885       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21886       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21887     } else {
21888       assert(vlen <= 32, "required");
21889       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21890       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21891       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21892       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21893     }
21894     if (to_elem_bt == T_BYTE) {
21895       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21896     }
21897   %}
21898   ins_pipe( pipe_slow );
21899 %}
21900 
21901 instruct vcastLtoX_evex(vec dst, vec src) %{
21902   predicate(UseAVX > 2 ||
21903             (Matcher::vector_element_basic_type(n) == T_INT ||
21904              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21905              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21906   match(Set dst (VectorCastL2X src));
21907   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21908   ins_encode %{
21909     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21910     int vlen = Matcher::vector_length_in_bytes(this, $src);
21911     int vlen_enc = vector_length_encoding(this, $src);
21912     switch (to_elem_bt) {
21913       case T_BYTE:
21914         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21915           vlen_enc = Assembler::AVX_512bit;
21916         }
21917         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21918         break;
21919       case T_SHORT:
21920         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21921           vlen_enc = Assembler::AVX_512bit;
21922         }
21923         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21924         break;
21925       case T_INT:
21926         if (vlen == 8) {
21927           if ($dst$$XMMRegister != $src$$XMMRegister) {
21928             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21929           }
21930         } else if (vlen == 16) {
21931           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21932         } else if (vlen == 32) {
21933           if (UseAVX > 2) {
21934             if (!VM_Version::supports_avx512vl()) {
21935               vlen_enc = Assembler::AVX_512bit;
21936             }
21937             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21938           } else {
21939             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21940             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21941           }
21942         } else { // vlen == 64
21943           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21944         }
21945         break;
21946       case T_FLOAT:
21947         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21948         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21949         break;
21950       case T_DOUBLE:
21951         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21952         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21953         break;
21954 
21955       default: assert(false, "%s", type2name(to_elem_bt));
21956     }
21957   %}
21958   ins_pipe( pipe_slow );
21959 %}
21960 
21961 instruct vcastFtoD_reg(vec dst, vec src) %{
21962   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21963   match(Set dst (VectorCastF2X src));
21964   format %{ "vector_cast_f2d  $dst,$src\t!" %}
21965   ins_encode %{
21966     int vlen_enc = vector_length_encoding(this);
21967     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21968   %}
21969   ins_pipe( pipe_slow );
21970 %}
21971 
21972 
21973 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21974   predicate(!VM_Version::supports_avx10_2() &&
21975             !VM_Version::supports_avx512vl() &&
21976             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21977             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21978             is_integral_type(Matcher::vector_element_basic_type(n)));
21979   match(Set dst (VectorCastF2X src));
21980   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21981   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21982   ins_encode %{
21983     int vlen_enc = vector_length_encoding(this, $src);
21984     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21985     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21986     // 32 bit addresses for register indirect addressing mode since stub constants
21987     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21988     // However, targets are free to increase this limit, but having a large code cache size
21989     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21990     // cap we save a temporary register allocation which in limiting case can prevent
21991     // spilling in high register pressure blocks.
21992     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21993                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21994                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21995   %}
21996   ins_pipe( pipe_slow );
21997 %}
21998 
21999 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22000   predicate(!VM_Version::supports_avx10_2() &&
22001             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22002             is_integral_type(Matcher::vector_element_basic_type(n)));
22003   match(Set dst (VectorCastF2X src));
22004   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22005   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22006   ins_encode %{
22007     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22008     if (to_elem_bt == T_LONG) {
22009       int vlen_enc = vector_length_encoding(this);
22010       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22011                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22012                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22013     } else {
22014       int vlen_enc = vector_length_encoding(this, $src);
22015       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22016                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22017                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22018     }
22019   %}
22020   ins_pipe( pipe_slow );
22021 %}
22022 
22023 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22024   predicate(VM_Version::supports_avx10_2() &&
22025             is_integral_type(Matcher::vector_element_basic_type(n)));
22026   match(Set dst (VectorCastF2X src));
22027   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22028   ins_encode %{
22029     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22030     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22031     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22032   %}
22033   ins_pipe( pipe_slow );
22034 %}
22035 
22036 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22037   predicate(VM_Version::supports_avx10_2() &&
22038             is_integral_type(Matcher::vector_element_basic_type(n)));
22039   match(Set dst (VectorCastF2X (LoadVector src)));
22040   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22041   ins_encode %{
22042     int vlen = Matcher::vector_length(this);
22043     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22044     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22045     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22046   %}
22047   ins_pipe( pipe_slow );
22048 %}
22049 
22050 instruct vcastDtoF_reg(vec dst, vec src) %{
22051   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22052   match(Set dst (VectorCastD2X src));
22053   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22054   ins_encode %{
22055     int vlen_enc = vector_length_encoding(this, $src);
22056     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22057   %}
22058   ins_pipe( pipe_slow );
22059 %}
22060 
22061 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22062   predicate(!VM_Version::supports_avx10_2() &&
22063             !VM_Version::supports_avx512vl() &&
22064             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22065             is_integral_type(Matcher::vector_element_basic_type(n)));
22066   match(Set dst (VectorCastD2X src));
22067   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22068   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22069   ins_encode %{
22070     int vlen_enc = vector_length_encoding(this, $src);
22071     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22072     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22073                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22074                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22075   %}
22076   ins_pipe( pipe_slow );
22077 %}
22078 
22079 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22080   predicate(!VM_Version::supports_avx10_2() &&
22081             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22082             is_integral_type(Matcher::vector_element_basic_type(n)));
22083   match(Set dst (VectorCastD2X src));
22084   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22085   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22086   ins_encode %{
22087     int vlen_enc = vector_length_encoding(this, $src);
22088     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22089     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22090                               ExternalAddress(vector_float_signflip());
22091     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22092                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22093   %}
22094   ins_pipe( pipe_slow );
22095 %}
22096 
22097 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22098   predicate(VM_Version::supports_avx10_2() &&
22099             is_integral_type(Matcher::vector_element_basic_type(n)));
22100   match(Set dst (VectorCastD2X src));
22101   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22102   ins_encode %{
22103     int vlen_enc = vector_length_encoding(this, $src);
22104     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22105     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22106   %}
22107   ins_pipe( pipe_slow );
22108 %}
22109 
22110 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22111   predicate(VM_Version::supports_avx10_2() &&
22112             is_integral_type(Matcher::vector_element_basic_type(n)));
22113   match(Set dst (VectorCastD2X (LoadVector src)));
22114   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22115   ins_encode %{
22116     int vlen = Matcher::vector_length(this);
22117     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22118     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22119     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22120   %}
22121   ins_pipe( pipe_slow );
22122 %}
22123 
22124 instruct vucast(vec dst, vec src) %{
22125   match(Set dst (VectorUCastB2X src));
22126   match(Set dst (VectorUCastS2X src));
22127   match(Set dst (VectorUCastI2X src));
22128   format %{ "vector_ucast $dst,$src\t!" %}
22129   ins_encode %{
22130     assert(UseAVX > 0, "required");
22131 
22132     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22133     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22134     int vlen_enc = vector_length_encoding(this);
22135     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22136   %}
22137   ins_pipe( pipe_slow );
22138 %}
22139 
22140 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22141   predicate(!VM_Version::supports_avx512vl() &&
22142             Matcher::vector_length_in_bytes(n) < 64 &&
22143             Matcher::vector_element_basic_type(n) == T_INT);
22144   match(Set dst (RoundVF src));
22145   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22146   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22147   ins_encode %{
22148     int vlen_enc = vector_length_encoding(this);
22149     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22150     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22151                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22152                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22153   %}
22154   ins_pipe( pipe_slow );
22155 %}
22156 
22157 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22158   predicate((VM_Version::supports_avx512vl() ||
22159              Matcher::vector_length_in_bytes(n) == 64) &&
22160              Matcher::vector_element_basic_type(n) == T_INT);
22161   match(Set dst (RoundVF src));
22162   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22163   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22164   ins_encode %{
22165     int vlen_enc = vector_length_encoding(this);
22166     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22167     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22168                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22169                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22170   %}
22171   ins_pipe( pipe_slow );
22172 %}
22173 
22174 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22175   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22176   match(Set dst (RoundVD src));
22177   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22178   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22179   ins_encode %{
22180     int vlen_enc = vector_length_encoding(this);
22181     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22182     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22183                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22184                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22185   %}
22186   ins_pipe( pipe_slow );
22187 %}
22188 
22189 // --------------------------------- VectorMaskCmp --------------------------------------
22190 
22191 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22192   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22193             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22194             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22195             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22196   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22197   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22198   ins_encode %{
22199     int vlen_enc = vector_length_encoding(this, $src1);
22200     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22201     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22202       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22203     } else {
22204       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22205     }
22206   %}
22207   ins_pipe( pipe_slow );
22208 %}
22209 
22210 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22211   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22212             n->bottom_type()->isa_vectmask() == nullptr &&
22213             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22214   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22215   effect(TEMP ktmp);
22216   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22217   ins_encode %{
22218     int vlen_enc = Assembler::AVX_512bit;
22219     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22220     KRegister mask = k0; // The comparison itself is not being masked.
22221     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22222       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22223       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22224     } else {
22225       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22226       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22227     }
22228   %}
22229   ins_pipe( pipe_slow );
22230 %}
22231 
22232 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22233   predicate(n->bottom_type()->isa_vectmask() &&
22234             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22235   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22236   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22237   ins_encode %{
22238     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22239     int vlen_enc = vector_length_encoding(this, $src1);
22240     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22241     KRegister mask = k0; // The comparison itself is not being masked.
22242     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22243       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22244     } else {
22245       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22246     }
22247   %}
22248   ins_pipe( pipe_slow );
22249 %}
22250 
22251 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22252   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22253             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22254             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22255             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22256             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22257             (n->in(2)->get_int() == BoolTest::eq ||
22258              n->in(2)->get_int() == BoolTest::lt ||
22259              n->in(2)->get_int() == BoolTest::gt)); // cond
22260   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22261   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22262   ins_encode %{
22263     int vlen_enc = vector_length_encoding(this, $src1);
22264     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22265     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22266     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22267   %}
22268   ins_pipe( pipe_slow );
22269 %}
22270 
22271 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22272   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22273             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22274             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22275             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22276             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22277             (n->in(2)->get_int() == BoolTest::ne ||
22278              n->in(2)->get_int() == BoolTest::le ||
22279              n->in(2)->get_int() == BoolTest::ge)); // cond
22280   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22281   effect(TEMP dst, TEMP xtmp);
22282   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22283   ins_encode %{
22284     int vlen_enc = vector_length_encoding(this, $src1);
22285     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22286     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22287     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22288   %}
22289   ins_pipe( pipe_slow );
22290 %}
22291 
22292 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22293   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22294             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22295             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22296             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22297             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22298   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22299   effect(TEMP dst, TEMP xtmp);
22300   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22301   ins_encode %{
22302     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22303     int vlen_enc = vector_length_encoding(this, $src1);
22304     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22305     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22306 
22307     if (vlen_enc == Assembler::AVX_128bit) {
22308       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22309     } else {
22310       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22311     }
22312     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22313     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22314     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22315   %}
22316   ins_pipe( pipe_slow );
22317 %}
22318 
22319 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22320   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22321              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22322              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22323   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22324   effect(TEMP ktmp);
22325   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22326   ins_encode %{
22327     assert(UseAVX > 2, "required");
22328 
22329     int vlen_enc = vector_length_encoding(this, $src1);
22330     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22331     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22332     KRegister mask = k0; // The comparison itself is not being masked.
22333     bool merge = false;
22334     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22335 
22336     switch (src1_elem_bt) {
22337       case T_INT: {
22338         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22339         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22340         break;
22341       }
22342       case T_LONG: {
22343         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22344         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22345         break;
22346       }
22347       default: assert(false, "%s", type2name(src1_elem_bt));
22348     }
22349   %}
22350   ins_pipe( pipe_slow );
22351 %}
22352 
22353 
22354 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22355   predicate(n->bottom_type()->isa_vectmask() &&
22356             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22357   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22358   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22359   ins_encode %{
22360     assert(UseAVX > 2, "required");
22361     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22362 
22363     int vlen_enc = vector_length_encoding(this, $src1);
22364     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22365     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22366     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22367 
22368     // Comparison i
22369     switch (src1_elem_bt) {
22370       case T_BYTE: {
22371         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22372         break;
22373       }
22374       case T_SHORT: {
22375         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22376         break;
22377       }
22378       case T_INT: {
22379         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22380         break;
22381       }
22382       case T_LONG: {
22383         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22384         break;
22385       }
22386       default: assert(false, "%s", type2name(src1_elem_bt));
22387     }
22388   %}
22389   ins_pipe( pipe_slow );
22390 %}
22391 
22392 // Extract
22393 
22394 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22395   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22396   match(Set dst (ExtractI src idx));
22397   match(Set dst (ExtractS src idx));
22398   match(Set dst (ExtractB src idx));
22399   format %{ "extractI $dst,$src,$idx\t!" %}
22400   ins_encode %{
22401     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22402 
22403     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22404     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22405   %}
22406   ins_pipe( pipe_slow );
22407 %}
22408 
22409 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22410   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22411             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22412   match(Set dst (ExtractI src idx));
22413   match(Set dst (ExtractS src idx));
22414   match(Set dst (ExtractB src idx));
22415   effect(TEMP vtmp);
22416   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22417   ins_encode %{
22418     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22419 
22420     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22421     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22422     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22423   %}
22424   ins_pipe( pipe_slow );
22425 %}
22426 
22427 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22428   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22429   match(Set dst (ExtractL src idx));
22430   format %{ "extractL $dst,$src,$idx\t!" %}
22431   ins_encode %{
22432     assert(UseSSE >= 4, "required");
22433     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22434 
22435     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22436   %}
22437   ins_pipe( pipe_slow );
22438 %}
22439 
22440 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22441   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22442             Matcher::vector_length(n->in(1)) == 8);  // src
22443   match(Set dst (ExtractL src idx));
22444   effect(TEMP vtmp);
22445   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22446   ins_encode %{
22447     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22448 
22449     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22450     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22451   %}
22452   ins_pipe( pipe_slow );
22453 %}
22454 
22455 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22456   predicate(Matcher::vector_length(n->in(1)) <= 4);
22457   match(Set dst (ExtractF src idx));
22458   effect(TEMP dst, TEMP vtmp);
22459   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22460   ins_encode %{
22461     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22462 
22463     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22464   %}
22465   ins_pipe( pipe_slow );
22466 %}
22467 
22468 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22469   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22470             Matcher::vector_length(n->in(1)/*src*/) == 16);
22471   match(Set dst (ExtractF src idx));
22472   effect(TEMP vtmp);
22473   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22474   ins_encode %{
22475     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22476 
22477     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22478     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22479   %}
22480   ins_pipe( pipe_slow );
22481 %}
22482 
22483 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22484   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22485   match(Set dst (ExtractD src idx));
22486   format %{ "extractD $dst,$src,$idx\t!" %}
22487   ins_encode %{
22488     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22489 
22490     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22491   %}
22492   ins_pipe( pipe_slow );
22493 %}
22494 
22495 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22496   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22497             Matcher::vector_length(n->in(1)) == 8);  // src
22498   match(Set dst (ExtractD src idx));
22499   effect(TEMP vtmp);
22500   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22501   ins_encode %{
22502     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22503 
22504     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22505     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22506   %}
22507   ins_pipe( pipe_slow );
22508 %}
22509 
22510 // --------------------------------- Vector Blend --------------------------------------
22511 
22512 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22513   predicate(UseAVX == 0);
22514   match(Set dst (VectorBlend (Binary dst src) mask));
22515   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22516   effect(TEMP tmp);
22517   ins_encode %{
22518     assert(UseSSE >= 4, "required");
22519 
22520     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22521       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22522     }
22523     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22524   %}
22525   ins_pipe( pipe_slow );
22526 %}
22527 
22528 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22529   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22530             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22531             Matcher::vector_length_in_bytes(n) <= 32 &&
22532             is_integral_type(Matcher::vector_element_basic_type(n)));
22533   match(Set dst (VectorBlend (Binary src1 src2) mask));
22534   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22535   ins_encode %{
22536     int vlen_enc = vector_length_encoding(this);
22537     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22538   %}
22539   ins_pipe( pipe_slow );
22540 %}
22541 
22542 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22543   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22544             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22545             Matcher::vector_length_in_bytes(n) <= 32 &&
22546             !is_integral_type(Matcher::vector_element_basic_type(n)));
22547   match(Set dst (VectorBlend (Binary src1 src2) mask));
22548   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22549   ins_encode %{
22550     int vlen_enc = vector_length_encoding(this);
22551     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22552   %}
22553   ins_pipe( pipe_slow );
22554 %}
22555 
22556 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22557   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22558             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22559             Matcher::vector_length_in_bytes(n) <= 32);
22560   match(Set dst (VectorBlend (Binary src1 src2) mask));
22561   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22562   effect(TEMP vtmp, TEMP dst);
22563   ins_encode %{
22564     int vlen_enc = vector_length_encoding(this);
22565     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22566     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22567     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22568   %}
22569   ins_pipe( pipe_slow );
22570 %}
22571 
22572 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22573   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22574             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22575   match(Set dst (VectorBlend (Binary src1 src2) mask));
22576   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22577   effect(TEMP ktmp);
22578   ins_encode %{
22579      int vlen_enc = Assembler::AVX_512bit;
22580      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22581     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22582     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22583   %}
22584   ins_pipe( pipe_slow );
22585 %}
22586 
22587 
22588 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22589   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22590             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22591              VM_Version::supports_avx512bw()));
22592   match(Set dst (VectorBlend (Binary src1 src2) mask));
22593   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22594   ins_encode %{
22595     int vlen_enc = vector_length_encoding(this);
22596     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22597     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22598   %}
22599   ins_pipe( pipe_slow );
22600 %}
22601 
22602 // --------------------------------- ABS --------------------------------------
22603 // a = |a|
22604 instruct vabsB_reg(vec dst, vec src) %{
22605   match(Set dst (AbsVB  src));
22606   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22607   ins_encode %{
22608     uint vlen = Matcher::vector_length(this);
22609     if (vlen <= 16) {
22610       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22611     } else {
22612       int vlen_enc = vector_length_encoding(this);
22613       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22614     }
22615   %}
22616   ins_pipe( pipe_slow );
22617 %}
22618 
22619 instruct vabsS_reg(vec dst, vec src) %{
22620   match(Set dst (AbsVS  src));
22621   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22622   ins_encode %{
22623     uint vlen = Matcher::vector_length(this);
22624     if (vlen <= 8) {
22625       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22626     } else {
22627       int vlen_enc = vector_length_encoding(this);
22628       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22629     }
22630   %}
22631   ins_pipe( pipe_slow );
22632 %}
22633 
22634 instruct vabsI_reg(vec dst, vec src) %{
22635   match(Set dst (AbsVI  src));
22636   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22637   ins_encode %{
22638     uint vlen = Matcher::vector_length(this);
22639     if (vlen <= 4) {
22640       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22641     } else {
22642       int vlen_enc = vector_length_encoding(this);
22643       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22644     }
22645   %}
22646   ins_pipe( pipe_slow );
22647 %}
22648 
22649 instruct vabsL_reg(vec dst, vec src) %{
22650   match(Set dst (AbsVL  src));
22651   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22652   ins_encode %{
22653     assert(UseAVX > 2, "required");
22654     int vlen_enc = vector_length_encoding(this);
22655     if (!VM_Version::supports_avx512vl()) {
22656       vlen_enc = Assembler::AVX_512bit;
22657     }
22658     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22659   %}
22660   ins_pipe( pipe_slow );
22661 %}
22662 
22663 // --------------------------------- ABSNEG --------------------------------------
22664 
22665 instruct vabsnegF(vec dst, vec src) %{
22666   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22667   match(Set dst (AbsVF src));
22668   match(Set dst (NegVF src));
22669   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22670   ins_cost(150);
22671   ins_encode %{
22672     int opcode = this->ideal_Opcode();
22673     int vlen = Matcher::vector_length(this);
22674     if (vlen == 2) {
22675       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22676     } else {
22677       assert(vlen == 8 || vlen == 16, "required");
22678       int vlen_enc = vector_length_encoding(this);
22679       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22680     }
22681   %}
22682   ins_pipe( pipe_slow );
22683 %}
22684 
22685 instruct vabsneg4F(vec dst) %{
22686   predicate(Matcher::vector_length(n) == 4);
22687   match(Set dst (AbsVF dst));
22688   match(Set dst (NegVF dst));
22689   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22690   ins_cost(150);
22691   ins_encode %{
22692     int opcode = this->ideal_Opcode();
22693     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22694   %}
22695   ins_pipe( pipe_slow );
22696 %}
22697 
22698 instruct vabsnegD(vec dst, vec src) %{
22699   match(Set dst (AbsVD  src));
22700   match(Set dst (NegVD  src));
22701   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22702   ins_encode %{
22703     int opcode = this->ideal_Opcode();
22704     uint vlen = Matcher::vector_length(this);
22705     if (vlen == 2) {
22706       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22707     } else {
22708       int vlen_enc = vector_length_encoding(this);
22709       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22710     }
22711   %}
22712   ins_pipe( pipe_slow );
22713 %}
22714 
22715 //------------------------------------- VectorTest --------------------------------------------
22716 
22717 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22718   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22719   match(Set cr (VectorTest src1 src2));
22720   effect(TEMP vtmp);
22721   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22722   ins_encode %{
22723     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22724     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22725     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22726   %}
22727   ins_pipe( pipe_slow );
22728 %}
22729 
22730 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22731   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22732   match(Set cr (VectorTest src1 src2));
22733   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22734   ins_encode %{
22735     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22736     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22737     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22738   %}
22739   ins_pipe( pipe_slow );
22740 %}
22741 
22742 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22743   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22744              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22745             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22746   match(Set cr (VectorTest src1 src2));
22747   effect(TEMP tmp);
22748   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22749   ins_encode %{
22750     uint masklen = Matcher::vector_length(this, $src1);
22751     __ kmovwl($tmp$$Register, $src1$$KRegister);
22752     __ andl($tmp$$Register, (1 << masklen) - 1);
22753     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22754   %}
22755   ins_pipe( pipe_slow );
22756 %}
22757 
22758 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22759   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22760              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22761             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22762   match(Set cr (VectorTest src1 src2));
22763   effect(TEMP tmp);
22764   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22765   ins_encode %{
22766     uint masklen = Matcher::vector_length(this, $src1);
22767     __ kmovwl($tmp$$Register, $src1$$KRegister);
22768     __ andl($tmp$$Register, (1 << masklen) - 1);
22769   %}
22770   ins_pipe( pipe_slow );
22771 %}
22772 
22773 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22774   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22775             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22776   match(Set cr (VectorTest src1 src2));
22777   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22778   ins_encode %{
22779     uint masklen = Matcher::vector_length(this, $src1);
22780     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22781   %}
22782   ins_pipe( pipe_slow );
22783 %}
22784 
22785 //------------------------------------- LoadMask --------------------------------------------
22786 
22787 instruct loadMask(legVec dst, legVec src) %{
22788   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22789   match(Set dst (VectorLoadMask src));
22790   effect(TEMP dst);
22791   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22792   ins_encode %{
22793     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22794     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22795     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22796   %}
22797   ins_pipe( pipe_slow );
22798 %}
22799 
22800 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22801   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22802   match(Set dst (VectorLoadMask src));
22803   effect(TEMP xtmp);
22804   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22805   ins_encode %{
22806     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22807                         true, Assembler::AVX_512bit);
22808   %}
22809   ins_pipe( pipe_slow );
22810 %}
22811 
22812 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22813   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22814   match(Set dst (VectorLoadMask src));
22815   effect(TEMP xtmp);
22816   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22817   ins_encode %{
22818     int vlen_enc = vector_length_encoding(in(1));
22819     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22820                         false, vlen_enc);
22821   %}
22822   ins_pipe( pipe_slow );
22823 %}
22824 
22825 //------------------------------------- StoreMask --------------------------------------------
22826 
22827 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22828   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22829   match(Set dst (VectorStoreMask src size));
22830   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22831   ins_encode %{
22832     int vlen = Matcher::vector_length(this);
22833     if (vlen <= 16 && UseAVX <= 2) {
22834       assert(UseSSE >= 3, "required");
22835       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22836     } else {
22837       assert(UseAVX > 0, "required");
22838       int src_vlen_enc = vector_length_encoding(this, $src);
22839       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22840     }
22841   %}
22842   ins_pipe( pipe_slow );
22843 %}
22844 
22845 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22846   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22847   match(Set dst (VectorStoreMask src size));
22848   effect(TEMP_DEF dst, TEMP xtmp);
22849   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22850   ins_encode %{
22851     int vlen_enc = Assembler::AVX_128bit;
22852     int vlen = Matcher::vector_length(this);
22853     if (vlen <= 8) {
22854       assert(UseSSE >= 3, "required");
22855       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22856       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22857       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22858     } else {
22859       assert(UseAVX > 0, "required");
22860       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22861       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22862       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22863     }
22864   %}
22865   ins_pipe( pipe_slow );
22866 %}
22867 
22868 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22869   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22870   match(Set dst (VectorStoreMask src size));
22871   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22872   effect(TEMP_DEF dst, TEMP xtmp);
22873   ins_encode %{
22874     int vlen_enc = Assembler::AVX_128bit;
22875     int vlen = Matcher::vector_length(this);
22876     if (vlen <= 4) {
22877       assert(UseSSE >= 3, "required");
22878       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22879       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22880       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22881       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22882     } else {
22883       assert(UseAVX > 0, "required");
22884       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22885       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22886       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22887       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22888       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22889     }
22890   %}
22891   ins_pipe( pipe_slow );
22892 %}
22893 
22894 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22895   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22896   match(Set dst (VectorStoreMask src size));
22897   effect(TEMP_DEF dst, TEMP xtmp);
22898   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22899   ins_encode %{
22900     assert(UseSSE >= 3, "required");
22901     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22902     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22903     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22904     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22905     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22906   %}
22907   ins_pipe( pipe_slow );
22908 %}
22909 
22910 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22911   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22912   match(Set dst (VectorStoreMask src size));
22913   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22914   effect(TEMP_DEF dst, TEMP vtmp);
22915   ins_encode %{
22916     int vlen_enc = Assembler::AVX_128bit;
22917     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22918     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22919     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22920     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22921     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22922     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22923     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22924   %}
22925   ins_pipe( pipe_slow );
22926 %}
22927 
22928 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22929   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22930   match(Set dst (VectorStoreMask src size));
22931   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22932   ins_encode %{
22933     int src_vlen_enc = vector_length_encoding(this, $src);
22934     int dst_vlen_enc = vector_length_encoding(this);
22935     if (!VM_Version::supports_avx512vl()) {
22936       src_vlen_enc = Assembler::AVX_512bit;
22937     }
22938     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22939     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22940   %}
22941   ins_pipe( pipe_slow );
22942 %}
22943 
22944 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22945   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22946   match(Set dst (VectorStoreMask src size));
22947   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22948   ins_encode %{
22949     int src_vlen_enc = vector_length_encoding(this, $src);
22950     int dst_vlen_enc = vector_length_encoding(this);
22951     if (!VM_Version::supports_avx512vl()) {
22952       src_vlen_enc = Assembler::AVX_512bit;
22953     }
22954     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22955     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22956   %}
22957   ins_pipe( pipe_slow );
22958 %}
22959 
22960 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22961   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22962   match(Set dst (VectorStoreMask mask size));
22963   effect(TEMP_DEF dst);
22964   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22965   ins_encode %{
22966     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22967     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22968                  false, Assembler::AVX_512bit, noreg);
22969     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22970   %}
22971   ins_pipe( pipe_slow );
22972 %}
22973 
22974 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22975   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22976   match(Set dst (VectorStoreMask mask size));
22977   effect(TEMP_DEF dst);
22978   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22979   ins_encode %{
22980     int dst_vlen_enc = vector_length_encoding(this);
22981     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22982     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22983   %}
22984   ins_pipe( pipe_slow );
22985 %}
22986 
22987 instruct vmaskcast_evex(kReg dst) %{
22988   match(Set dst (VectorMaskCast dst));
22989   ins_cost(0);
22990   format %{ "vector_mask_cast $dst" %}
22991   ins_encode %{
22992     // empty
22993   %}
22994   ins_pipe(empty);
22995 %}
22996 
22997 instruct vmaskcast(vec dst) %{
22998   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22999   match(Set dst (VectorMaskCast dst));
23000   ins_cost(0);
23001   format %{ "vector_mask_cast $dst" %}
23002   ins_encode %{
23003     // empty
23004   %}
23005   ins_pipe(empty);
23006 %}
23007 
23008 instruct vmaskcast_avx(vec dst, vec src) %{
23009   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23010   match(Set dst (VectorMaskCast src));
23011   format %{ "vector_mask_cast $dst, $src" %}
23012   ins_encode %{
23013     int vlen = Matcher::vector_length(this);
23014     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23015     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23016     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23017   %}
23018   ins_pipe(pipe_slow);
23019 %}
23020 
23021 //-------------------------------- Load Iota Indices ----------------------------------
23022 
23023 instruct loadIotaIndices(vec dst, immI_0 src) %{
23024   match(Set dst (VectorLoadConst src));
23025   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23026   ins_encode %{
23027      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23028      BasicType bt = Matcher::vector_element_basic_type(this);
23029      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23030   %}
23031   ins_pipe( pipe_slow );
23032 %}
23033 
23034 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23035   match(Set dst (PopulateIndex src1 src2));
23036   effect(TEMP dst, TEMP vtmp);
23037   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23038   ins_encode %{
23039      assert($src2$$constant == 1, "required");
23040      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23041      int vlen_enc = vector_length_encoding(this);
23042      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23043      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23044      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23045      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23046   %}
23047   ins_pipe( pipe_slow );
23048 %}
23049 
23050 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23051   match(Set dst (PopulateIndex src1 src2));
23052   effect(TEMP dst, TEMP vtmp);
23053   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23054   ins_encode %{
23055      assert($src2$$constant == 1, "required");
23056      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23057      int vlen_enc = vector_length_encoding(this);
23058      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23059      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23060      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23061      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23062   %}
23063   ins_pipe( pipe_slow );
23064 %}
23065 
23066 //-------------------------------- Rearrange ----------------------------------
23067 
23068 // LoadShuffle/Rearrange for Byte
23069 instruct rearrangeB(vec dst, vec shuffle) %{
23070   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23071             Matcher::vector_length(n) < 32);
23072   match(Set dst (VectorRearrange dst shuffle));
23073   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23074   ins_encode %{
23075     assert(UseSSE >= 4, "required");
23076     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23077   %}
23078   ins_pipe( pipe_slow );
23079 %}
23080 
23081 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23082   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23083             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23084   match(Set dst (VectorRearrange src shuffle));
23085   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23086   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23087   ins_encode %{
23088     assert(UseAVX >= 2, "required");
23089     // Swap src into vtmp1
23090     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23091     // Shuffle swapped src to get entries from other 128 bit lane
23092     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23093     // Shuffle original src to get entries from self 128 bit lane
23094     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23095     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23096     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23097     // Perform the blend
23098     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23099   %}
23100   ins_pipe( pipe_slow );
23101 %}
23102 
23103 
23104 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23105   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23106             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23107   match(Set dst (VectorRearrange src shuffle));
23108   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23109   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23110   ins_encode %{
23111     int vlen_enc = vector_length_encoding(this);
23112     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23113                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23114                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23115   %}
23116   ins_pipe( pipe_slow );
23117 %}
23118 
23119 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23120   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23121             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23122   match(Set dst (VectorRearrange src shuffle));
23123   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23124   ins_encode %{
23125     int vlen_enc = vector_length_encoding(this);
23126     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23127   %}
23128   ins_pipe( pipe_slow );
23129 %}
23130 
23131 // LoadShuffle/Rearrange for Short
23132 
23133 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23134   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23135             !VM_Version::supports_avx512bw());
23136   match(Set dst (VectorLoadShuffle src));
23137   effect(TEMP dst, TEMP vtmp);
23138   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23139   ins_encode %{
23140     // Create a byte shuffle mask from short shuffle mask
23141     // only byte shuffle instruction available on these platforms
23142     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23143     if (UseAVX == 0) {
23144       assert(vlen_in_bytes <= 16, "required");
23145       // Multiply each shuffle by two to get byte index
23146       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23147       __ psllw($vtmp$$XMMRegister, 1);
23148 
23149       // Duplicate to create 2 copies of byte index
23150       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23151       __ psllw($dst$$XMMRegister, 8);
23152       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23153 
23154       // Add one to get alternate byte index
23155       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23156       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23157     } else {
23158       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23159       int vlen_enc = vector_length_encoding(this);
23160       // Multiply each shuffle by two to get byte index
23161       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23162 
23163       // Duplicate to create 2 copies of byte index
23164       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23165       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23166 
23167       // Add one to get alternate byte index
23168       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23169     }
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 instruct rearrangeS(vec dst, vec shuffle) %{
23175   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23176             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23177   match(Set dst (VectorRearrange dst shuffle));
23178   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23179   ins_encode %{
23180     assert(UseSSE >= 4, "required");
23181     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23182   %}
23183   ins_pipe( pipe_slow );
23184 %}
23185 
23186 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23187   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23188             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23189   match(Set dst (VectorRearrange src shuffle));
23190   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23191   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23192   ins_encode %{
23193     assert(UseAVX >= 2, "required");
23194     // Swap src into vtmp1
23195     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23196     // Shuffle swapped src to get entries from other 128 bit lane
23197     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23198     // Shuffle original src to get entries from self 128 bit lane
23199     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23200     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23201     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23202     // Perform the blend
23203     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23204   %}
23205   ins_pipe( pipe_slow );
23206 %}
23207 
23208 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23209   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23210             VM_Version::supports_avx512bw());
23211   match(Set dst (VectorRearrange src shuffle));
23212   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23213   ins_encode %{
23214     int vlen_enc = vector_length_encoding(this);
23215     if (!VM_Version::supports_avx512vl()) {
23216       vlen_enc = Assembler::AVX_512bit;
23217     }
23218     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23219   %}
23220   ins_pipe( pipe_slow );
23221 %}
23222 
23223 // LoadShuffle/Rearrange for Integer and Float
23224 
23225 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23226   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23227             Matcher::vector_length(n) == 4 && UseAVX == 0);
23228   match(Set dst (VectorLoadShuffle src));
23229   effect(TEMP dst, TEMP vtmp);
23230   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23231   ins_encode %{
23232     assert(UseSSE >= 4, "required");
23233 
23234     // Create a byte shuffle mask from int shuffle mask
23235     // only byte shuffle instruction available on these platforms
23236 
23237     // Duplicate and multiply each shuffle by 4
23238     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23239     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23240     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23241     __ psllw($vtmp$$XMMRegister, 2);
23242 
23243     // Duplicate again to create 4 copies of byte index
23244     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23245     __ psllw($dst$$XMMRegister, 8);
23246     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23247 
23248     // Add 3,2,1,0 to get alternate byte index
23249     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23250     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23251   %}
23252   ins_pipe( pipe_slow );
23253 %}
23254 
23255 instruct rearrangeI(vec dst, vec shuffle) %{
23256   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23257             UseAVX == 0);
23258   match(Set dst (VectorRearrange dst shuffle));
23259   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23260   ins_encode %{
23261     assert(UseSSE >= 4, "required");
23262     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23263   %}
23264   ins_pipe( pipe_slow );
23265 %}
23266 
23267 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23268   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23269             UseAVX > 0);
23270   match(Set dst (VectorRearrange src shuffle));
23271   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23272   ins_encode %{
23273     int vlen_enc = vector_length_encoding(this);
23274     BasicType bt = Matcher::vector_element_basic_type(this);
23275     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23276   %}
23277   ins_pipe( pipe_slow );
23278 %}
23279 
23280 // LoadShuffle/Rearrange for Long and Double
23281 
23282 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23283   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23284             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23285   match(Set dst (VectorLoadShuffle src));
23286   effect(TEMP dst, TEMP vtmp);
23287   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23288   ins_encode %{
23289     assert(UseAVX >= 2, "required");
23290 
23291     int vlen_enc = vector_length_encoding(this);
23292     // Create a double word shuffle mask from long shuffle mask
23293     // only double word shuffle instruction available on these platforms
23294 
23295     // Multiply each shuffle by two to get double word index
23296     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23297 
23298     // Duplicate each double word shuffle
23299     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23300     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23301 
23302     // Add one to get alternate double word index
23303     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23304   %}
23305   ins_pipe( pipe_slow );
23306 %}
23307 
23308 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23309   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23310             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23311   match(Set dst (VectorRearrange src shuffle));
23312   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23313   ins_encode %{
23314     assert(UseAVX >= 2, "required");
23315 
23316     int vlen_enc = vector_length_encoding(this);
23317     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23318   %}
23319   ins_pipe( pipe_slow );
23320 %}
23321 
23322 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23323   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23324             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23325   match(Set dst (VectorRearrange src shuffle));
23326   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23327   ins_encode %{
23328     assert(UseAVX > 2, "required");
23329 
23330     int vlen_enc = vector_length_encoding(this);
23331     if (vlen_enc == Assembler::AVX_128bit) {
23332       vlen_enc = Assembler::AVX_256bit;
23333     }
23334     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23335   %}
23336   ins_pipe( pipe_slow );
23337 %}
23338 
23339 // --------------------------------- FMA --------------------------------------
23340 // a * b + c
23341 
23342 instruct vfmaF_reg(vec a, vec b, vec c) %{
23343   match(Set c (FmaVF  c (Binary a b)));
23344   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23345   ins_cost(150);
23346   ins_encode %{
23347     assert(UseFMA, "not enabled");
23348     int vlen_enc = vector_length_encoding(this);
23349     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23350   %}
23351   ins_pipe( pipe_slow );
23352 %}
23353 
23354 instruct vfmaF_mem(vec a, memory b, vec c) %{
23355   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23356   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23357   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23358   ins_cost(150);
23359   ins_encode %{
23360     assert(UseFMA, "not enabled");
23361     int vlen_enc = vector_length_encoding(this);
23362     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23363   %}
23364   ins_pipe( pipe_slow );
23365 %}
23366 
23367 instruct vfmaD_reg(vec a, vec b, vec c) %{
23368   match(Set c (FmaVD  c (Binary a b)));
23369   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23370   ins_cost(150);
23371   ins_encode %{
23372     assert(UseFMA, "not enabled");
23373     int vlen_enc = vector_length_encoding(this);
23374     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23375   %}
23376   ins_pipe( pipe_slow );
23377 %}
23378 
23379 instruct vfmaD_mem(vec a, memory b, vec c) %{
23380   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23381   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23382   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23383   ins_cost(150);
23384   ins_encode %{
23385     assert(UseFMA, "not enabled");
23386     int vlen_enc = vector_length_encoding(this);
23387     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23388   %}
23389   ins_pipe( pipe_slow );
23390 %}
23391 
23392 // --------------------------------- Vector Multiply Add --------------------------------------
23393 
23394 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23395   predicate(UseAVX == 0);
23396   match(Set dst (MulAddVS2VI dst src1));
23397   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23398   ins_encode %{
23399     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23400   %}
23401   ins_pipe( pipe_slow );
23402 %}
23403 
23404 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23405   predicate(UseAVX > 0);
23406   match(Set dst (MulAddVS2VI src1 src2));
23407   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23408   ins_encode %{
23409     int vlen_enc = vector_length_encoding(this);
23410     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23411   %}
23412   ins_pipe( pipe_slow );
23413 %}
23414 
23415 // --------------------------------- Vector Multiply Add Add ----------------------------------
23416 
23417 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23418   predicate(VM_Version::supports_avx512_vnni());
23419   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23420   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23421   ins_encode %{
23422     assert(UseAVX > 2, "required");
23423     int vlen_enc = vector_length_encoding(this);
23424     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23425   %}
23426   ins_pipe( pipe_slow );
23427   ins_cost(10);
23428 %}
23429 
23430 // --------------------------------- PopCount --------------------------------------
23431 
23432 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23433   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23434   match(Set dst (PopCountVI src));
23435   match(Set dst (PopCountVL src));
23436   format %{ "vector_popcount_integral $dst, $src" %}
23437   ins_encode %{
23438     int opcode = this->ideal_Opcode();
23439     int vlen_enc = vector_length_encoding(this, $src);
23440     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23441     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23442   %}
23443   ins_pipe( pipe_slow );
23444 %}
23445 
23446 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23447   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23448   match(Set dst (PopCountVI src mask));
23449   match(Set dst (PopCountVL src mask));
23450   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23451   ins_encode %{
23452     int vlen_enc = vector_length_encoding(this, $src);
23453     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23454     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23455     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23456   %}
23457   ins_pipe( pipe_slow );
23458 %}
23459 
23460 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23461   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23462   match(Set dst (PopCountVI src));
23463   match(Set dst (PopCountVL src));
23464   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23465   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23466   ins_encode %{
23467     int opcode = this->ideal_Opcode();
23468     int vlen_enc = vector_length_encoding(this, $src);
23469     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23470     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23471                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23472   %}
23473   ins_pipe( pipe_slow );
23474 %}
23475 
23476 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23477 
23478 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23479   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23480                                               Matcher::vector_length_in_bytes(n->in(1))));
23481   match(Set dst (CountTrailingZerosV src));
23482   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23483   ins_cost(400);
23484   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23485   ins_encode %{
23486     int vlen_enc = vector_length_encoding(this, $src);
23487     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23488     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23489                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23490   %}
23491   ins_pipe( pipe_slow );
23492 %}
23493 
23494 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23495   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23496             VM_Version::supports_avx512cd() &&
23497             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23498   match(Set dst (CountTrailingZerosV src));
23499   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23500   ins_cost(400);
23501   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23502   ins_encode %{
23503     int vlen_enc = vector_length_encoding(this, $src);
23504     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23505     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23506                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23507   %}
23508   ins_pipe( pipe_slow );
23509 %}
23510 
23511 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23512   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23513   match(Set dst (CountTrailingZerosV src));
23514   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23515   ins_cost(400);
23516   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23517   ins_encode %{
23518     int vlen_enc = vector_length_encoding(this, $src);
23519     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23520     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23521                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23522                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23523   %}
23524   ins_pipe( pipe_slow );
23525 %}
23526 
23527 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23528   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23529   match(Set dst (CountTrailingZerosV src));
23530   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23531   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23532   ins_encode %{
23533     int vlen_enc = vector_length_encoding(this, $src);
23534     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23535     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23536                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23537   %}
23538   ins_pipe( pipe_slow );
23539 %}
23540 
23541 
23542 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23543 
23544 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23545   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23546   effect(TEMP dst);
23547   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23548   ins_encode %{
23549     int vector_len = vector_length_encoding(this);
23550     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23551   %}
23552   ins_pipe( pipe_slow );
23553 %}
23554 
23555 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23556   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23557   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23558   effect(TEMP dst);
23559   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23560   ins_encode %{
23561     int vector_len = vector_length_encoding(this);
23562     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23563   %}
23564   ins_pipe( pipe_slow );
23565 %}
23566 
23567 // --------------------------------- Rotation Operations ----------------------------------
23568 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23569   match(Set dst (RotateLeftV src shift));
23570   match(Set dst (RotateRightV src shift));
23571   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23572   ins_encode %{
23573     int opcode      = this->ideal_Opcode();
23574     int vector_len  = vector_length_encoding(this);
23575     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23576     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23577   %}
23578   ins_pipe( pipe_slow );
23579 %}
23580 
23581 instruct vprorate(vec dst, vec src, vec shift) %{
23582   match(Set dst (RotateLeftV src shift));
23583   match(Set dst (RotateRightV src shift));
23584   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23585   ins_encode %{
23586     int opcode      = this->ideal_Opcode();
23587     int vector_len  = vector_length_encoding(this);
23588     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23589     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23590   %}
23591   ins_pipe( pipe_slow );
23592 %}
23593 
23594 // ---------------------------------- Masked Operations ------------------------------------
23595 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23596   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23597   match(Set dst (LoadVectorMasked mem mask));
23598   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23599   ins_encode %{
23600     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23601     int vlen_enc = vector_length_encoding(this);
23602     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23603   %}
23604   ins_pipe( pipe_slow );
23605 %}
23606 
23607 
23608 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23609   predicate(n->in(3)->bottom_type()->isa_vectmask());
23610   match(Set dst (LoadVectorMasked mem mask));
23611   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23612   ins_encode %{
23613     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23614     int vector_len = vector_length_encoding(this);
23615     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23616   %}
23617   ins_pipe( pipe_slow );
23618 %}
23619 
23620 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23621   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23622   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23623   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23624   ins_encode %{
23625     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23626     int vlen_enc = vector_length_encoding(src_node);
23627     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23628     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23629   %}
23630   ins_pipe( pipe_slow );
23631 %}
23632 
23633 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23634   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23635   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23636   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23637   ins_encode %{
23638     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23639     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23640     int vlen_enc = vector_length_encoding(src_node);
23641     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23642   %}
23643   ins_pipe( pipe_slow );
23644 %}
23645 
23646 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23647   match(Set addr (VerifyVectorAlignment addr mask));
23648   effect(KILL cr);
23649   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23650   ins_encode %{
23651     Label Lskip;
23652     // check if masked bits of addr are zero
23653     __ testq($addr$$Register, $mask$$constant);
23654     __ jccb(Assembler::equal, Lskip);
23655     __ stop("verify_vector_alignment found a misaligned vector memory access");
23656     __ bind(Lskip);
23657   %}
23658   ins_pipe(pipe_slow);
23659 %}
23660 
23661 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23662   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23663   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23664   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23665   ins_encode %{
23666     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23667     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23668 
23669     Label DONE;
23670     int vlen_enc = vector_length_encoding(this, $src1);
23671     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23672 
23673     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23674     __ mov64($dst$$Register, -1L);
23675     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23676     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23677     __ jccb(Assembler::carrySet, DONE);
23678     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23679     __ notq($dst$$Register);
23680     __ tzcntq($dst$$Register, $dst$$Register);
23681     __ bind(DONE);
23682   %}
23683   ins_pipe( pipe_slow );
23684 %}
23685 
23686 
23687 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23688   match(Set dst (VectorMaskGen len));
23689   effect(TEMP temp, KILL cr);
23690   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23691   ins_encode %{
23692     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23693   %}
23694   ins_pipe( pipe_slow );
23695 %}
23696 
23697 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23698   match(Set dst (VectorMaskGen len));
23699   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23700   effect(TEMP temp);
23701   ins_encode %{
23702     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23703     __ kmovql($dst$$KRegister, $temp$$Register);
23704   %}
23705   ins_pipe( pipe_slow );
23706 %}
23707 
23708 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23709   predicate(n->in(1)->bottom_type()->isa_vectmask());
23710   match(Set dst (VectorMaskToLong mask));
23711   effect(TEMP dst, KILL cr);
23712   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23713   ins_encode %{
23714     int opcode = this->ideal_Opcode();
23715     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23716     int mask_len = Matcher::vector_length(this, $mask);
23717     int mask_size = mask_len * type2aelembytes(mbt);
23718     int vlen_enc = vector_length_encoding(this, $mask);
23719     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23720                              $dst$$Register, mask_len, mask_size, vlen_enc);
23721   %}
23722   ins_pipe( pipe_slow );
23723 %}
23724 
23725 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23726   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23727   match(Set dst (VectorMaskToLong mask));
23728   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23729   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23730   ins_encode %{
23731     int opcode = this->ideal_Opcode();
23732     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23733     int mask_len = Matcher::vector_length(this, $mask);
23734     int vlen_enc = vector_length_encoding(this, $mask);
23735     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23736                              $dst$$Register, mask_len, mbt, vlen_enc);
23737   %}
23738   ins_pipe( pipe_slow );
23739 %}
23740 
23741 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23742   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23743   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23744   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23745   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23746   ins_encode %{
23747     int opcode = this->ideal_Opcode();
23748     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23749     int mask_len = Matcher::vector_length(this, $mask);
23750     int vlen_enc = vector_length_encoding(this, $mask);
23751     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23752                              $dst$$Register, mask_len, mbt, vlen_enc);
23753   %}
23754   ins_pipe( pipe_slow );
23755 %}
23756 
23757 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23758   predicate(n->in(1)->bottom_type()->isa_vectmask());
23759   match(Set dst (VectorMaskTrueCount mask));
23760   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23761   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23762   ins_encode %{
23763     int opcode = this->ideal_Opcode();
23764     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23765     int mask_len = Matcher::vector_length(this, $mask);
23766     int mask_size = mask_len * type2aelembytes(mbt);
23767     int vlen_enc = vector_length_encoding(this, $mask);
23768     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23769                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23770   %}
23771   ins_pipe( pipe_slow );
23772 %}
23773 
23774 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23775   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23776   match(Set dst (VectorMaskTrueCount mask));
23777   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23778   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23779   ins_encode %{
23780     int opcode = this->ideal_Opcode();
23781     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23782     int mask_len = Matcher::vector_length(this, $mask);
23783     int vlen_enc = vector_length_encoding(this, $mask);
23784     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23785                              $tmp$$Register, mask_len, mbt, vlen_enc);
23786   %}
23787   ins_pipe( pipe_slow );
23788 %}
23789 
23790 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23791   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23792   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23793   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23794   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23795   ins_encode %{
23796     int opcode = this->ideal_Opcode();
23797     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23798     int mask_len = Matcher::vector_length(this, $mask);
23799     int vlen_enc = vector_length_encoding(this, $mask);
23800     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23801                              $tmp$$Register, mask_len, mbt, vlen_enc);
23802   %}
23803   ins_pipe( pipe_slow );
23804 %}
23805 
23806 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23807   predicate(n->in(1)->bottom_type()->isa_vectmask());
23808   match(Set dst (VectorMaskFirstTrue mask));
23809   match(Set dst (VectorMaskLastTrue mask));
23810   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23811   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23812   ins_encode %{
23813     int opcode = this->ideal_Opcode();
23814     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23815     int mask_len = Matcher::vector_length(this, $mask);
23816     int mask_size = mask_len * type2aelembytes(mbt);
23817     int vlen_enc = vector_length_encoding(this, $mask);
23818     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23819                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23820   %}
23821   ins_pipe( pipe_slow );
23822 %}
23823 
23824 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23825   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23826   match(Set dst (VectorMaskFirstTrue mask));
23827   match(Set dst (VectorMaskLastTrue mask));
23828   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23829   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23830   ins_encode %{
23831     int opcode = this->ideal_Opcode();
23832     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23833     int mask_len = Matcher::vector_length(this, $mask);
23834     int vlen_enc = vector_length_encoding(this, $mask);
23835     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23836                              $tmp$$Register, mask_len, mbt, vlen_enc);
23837   %}
23838   ins_pipe( pipe_slow );
23839 %}
23840 
23841 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23842   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23843   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23844   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23845   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23846   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23847   ins_encode %{
23848     int opcode = this->ideal_Opcode();
23849     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23850     int mask_len = Matcher::vector_length(this, $mask);
23851     int vlen_enc = vector_length_encoding(this, $mask);
23852     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23853                              $tmp$$Register, mask_len, mbt, vlen_enc);
23854   %}
23855   ins_pipe( pipe_slow );
23856 %}
23857 
23858 // --------------------------------- Compress/Expand Operations ---------------------------
23859 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23860   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23861   match(Set dst (CompressV src mask));
23862   match(Set dst (ExpandV src mask));
23863   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23864   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23865   ins_encode %{
23866     int opcode = this->ideal_Opcode();
23867     int vlen_enc = vector_length_encoding(this);
23868     BasicType bt  = Matcher::vector_element_basic_type(this);
23869     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23870                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23871   %}
23872   ins_pipe( pipe_slow );
23873 %}
23874 
23875 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23876   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23877   match(Set dst (CompressV src mask));
23878   match(Set dst (ExpandV src mask));
23879   format %{ "vector_compress_expand $dst, $src, $mask" %}
23880   ins_encode %{
23881     int opcode = this->ideal_Opcode();
23882     int vector_len = vector_length_encoding(this);
23883     BasicType bt  = Matcher::vector_element_basic_type(this);
23884     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23885   %}
23886   ins_pipe( pipe_slow );
23887 %}
23888 
23889 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23890   match(Set dst (CompressM mask));
23891   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23892   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23893   ins_encode %{
23894     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23895     int mask_len = Matcher::vector_length(this);
23896     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23897   %}
23898   ins_pipe( pipe_slow );
23899 %}
23900 
23901 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23902 
23903 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23904   predicate(!VM_Version::supports_gfni());
23905   match(Set dst (ReverseV src));
23906   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23907   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23908   ins_encode %{
23909     int vec_enc = vector_length_encoding(this);
23910     BasicType bt = Matcher::vector_element_basic_type(this);
23911     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23912                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23913   %}
23914   ins_pipe( pipe_slow );
23915 %}
23916 
23917 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23918   predicate(VM_Version::supports_gfni());
23919   match(Set dst (ReverseV src));
23920   effect(TEMP dst, TEMP xtmp);
23921   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23922   ins_encode %{
23923     int vec_enc = vector_length_encoding(this);
23924     BasicType bt  = Matcher::vector_element_basic_type(this);
23925     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23926     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23927                                $xtmp$$XMMRegister);
23928   %}
23929   ins_pipe( pipe_slow );
23930 %}
23931 
23932 instruct vreverse_byte_reg(vec dst, vec src) %{
23933   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23934   match(Set dst (ReverseBytesV src));
23935   effect(TEMP dst);
23936   format %{ "vector_reverse_byte $dst, $src" %}
23937   ins_encode %{
23938     int vec_enc = vector_length_encoding(this);
23939     BasicType bt = Matcher::vector_element_basic_type(this);
23940     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23941   %}
23942   ins_pipe( pipe_slow );
23943 %}
23944 
23945 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23946   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23947   match(Set dst (ReverseBytesV src));
23948   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23949   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23950   ins_encode %{
23951     int vec_enc = vector_length_encoding(this);
23952     BasicType bt = Matcher::vector_element_basic_type(this);
23953     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23954                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23955   %}
23956   ins_pipe( pipe_slow );
23957 %}
23958 
23959 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23960 
23961 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23962   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23963                                               Matcher::vector_length_in_bytes(n->in(1))));
23964   match(Set dst (CountLeadingZerosV src));
23965   format %{ "vector_count_leading_zeros $dst, $src" %}
23966   ins_encode %{
23967      int vlen_enc = vector_length_encoding(this, $src);
23968      BasicType bt = Matcher::vector_element_basic_type(this, $src);
23969      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23970                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23971   %}
23972   ins_pipe( pipe_slow );
23973 %}
23974 
23975 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23976   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23977                                               Matcher::vector_length_in_bytes(n->in(1))));
23978   match(Set dst (CountLeadingZerosV src mask));
23979   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23980   ins_encode %{
23981     int vlen_enc = vector_length_encoding(this, $src);
23982     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23983     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23984     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23985                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23986   %}
23987   ins_pipe( pipe_slow );
23988 %}
23989 
23990 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23991   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23992             VM_Version::supports_avx512cd() &&
23993             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23994   match(Set dst (CountLeadingZerosV src));
23995   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23996   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23997   ins_encode %{
23998     int vlen_enc = vector_length_encoding(this, $src);
23999     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24000     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24001                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24002   %}
24003   ins_pipe( pipe_slow );
24004 %}
24005 
24006 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24007   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24008   match(Set dst (CountLeadingZerosV src));
24009   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24010   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24011   ins_encode %{
24012     int vlen_enc = vector_length_encoding(this, $src);
24013     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24014     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24015                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24016                                        $rtmp$$Register, true, vlen_enc);
24017   %}
24018   ins_pipe( pipe_slow );
24019 %}
24020 
24021 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24022   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24023             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24024   match(Set dst (CountLeadingZerosV src));
24025   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24026   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24027   ins_encode %{
24028     int vlen_enc = vector_length_encoding(this, $src);
24029     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24032   %}
24033   ins_pipe( pipe_slow );
24034 %}
24035 
24036 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24037   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24038             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24039   match(Set dst (CountLeadingZerosV src));
24040   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24041   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24042   ins_encode %{
24043     int vlen_enc = vector_length_encoding(this, $src);
24044     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24045     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24046                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24047   %}
24048   ins_pipe( pipe_slow );
24049 %}
24050 
24051 // ---------------------------------- Vector Masked Operations ------------------------------------
24052 
24053 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24054   match(Set dst (AddVB (Binary dst src2) mask));
24055   match(Set dst (AddVS (Binary dst src2) mask));
24056   match(Set dst (AddVI (Binary dst src2) mask));
24057   match(Set dst (AddVL (Binary dst src2) mask));
24058   match(Set dst (AddVF (Binary dst src2) mask));
24059   match(Set dst (AddVD (Binary dst src2) mask));
24060   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24061   ins_encode %{
24062     int vlen_enc = vector_length_encoding(this);
24063     BasicType bt = Matcher::vector_element_basic_type(this);
24064     int opc = this->ideal_Opcode();
24065     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24066                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24067   %}
24068   ins_pipe( pipe_slow );
24069 %}
24070 
24071 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24072   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24073   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24074   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24075   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24076   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24077   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24078   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24079   ins_encode %{
24080     int vlen_enc = vector_length_encoding(this);
24081     BasicType bt = Matcher::vector_element_basic_type(this);
24082     int opc = this->ideal_Opcode();
24083     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24084                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24085   %}
24086   ins_pipe( pipe_slow );
24087 %}
24088 
24089 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24090   match(Set dst (XorV (Binary dst src2) mask));
24091   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24092   ins_encode %{
24093     int vlen_enc = vector_length_encoding(this);
24094     BasicType bt = Matcher::vector_element_basic_type(this);
24095     int opc = this->ideal_Opcode();
24096     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24097                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24098   %}
24099   ins_pipe( pipe_slow );
24100 %}
24101 
24102 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24103   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24104   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24105   ins_encode %{
24106     int vlen_enc = vector_length_encoding(this);
24107     BasicType bt = Matcher::vector_element_basic_type(this);
24108     int opc = this->ideal_Opcode();
24109     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24110                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24111   %}
24112   ins_pipe( pipe_slow );
24113 %}
24114 
24115 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24116   match(Set dst (OrV (Binary dst src2) mask));
24117   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24118   ins_encode %{
24119     int vlen_enc = vector_length_encoding(this);
24120     BasicType bt = Matcher::vector_element_basic_type(this);
24121     int opc = this->ideal_Opcode();
24122     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24123                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24124   %}
24125   ins_pipe( pipe_slow );
24126 %}
24127 
24128 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24129   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24130   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24131   ins_encode %{
24132     int vlen_enc = vector_length_encoding(this);
24133     BasicType bt = Matcher::vector_element_basic_type(this);
24134     int opc = this->ideal_Opcode();
24135     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24136                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24137   %}
24138   ins_pipe( pipe_slow );
24139 %}
24140 
24141 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24142   match(Set dst (AndV (Binary dst src2) mask));
24143   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24144   ins_encode %{
24145     int vlen_enc = vector_length_encoding(this);
24146     BasicType bt = Matcher::vector_element_basic_type(this);
24147     int opc = this->ideal_Opcode();
24148     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24149                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24150   %}
24151   ins_pipe( pipe_slow );
24152 %}
24153 
24154 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24155   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24156   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24157   ins_encode %{
24158     int vlen_enc = vector_length_encoding(this);
24159     BasicType bt = Matcher::vector_element_basic_type(this);
24160     int opc = this->ideal_Opcode();
24161     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24162                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24163   %}
24164   ins_pipe( pipe_slow );
24165 %}
24166 
24167 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24168   match(Set dst (SubVB (Binary dst src2) mask));
24169   match(Set dst (SubVS (Binary dst src2) mask));
24170   match(Set dst (SubVI (Binary dst src2) mask));
24171   match(Set dst (SubVL (Binary dst src2) mask));
24172   match(Set dst (SubVF (Binary dst src2) mask));
24173   match(Set dst (SubVD (Binary dst src2) mask));
24174   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24175   ins_encode %{
24176     int vlen_enc = vector_length_encoding(this);
24177     BasicType bt = Matcher::vector_element_basic_type(this);
24178     int opc = this->ideal_Opcode();
24179     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24180                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24181   %}
24182   ins_pipe( pipe_slow );
24183 %}
24184 
24185 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24186   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24187   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24188   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24189   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24190   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24191   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24192   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24193   ins_encode %{
24194     int vlen_enc = vector_length_encoding(this);
24195     BasicType bt = Matcher::vector_element_basic_type(this);
24196     int opc = this->ideal_Opcode();
24197     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24198                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24199   %}
24200   ins_pipe( pipe_slow );
24201 %}
24202 
24203 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24204   match(Set dst (MulVS (Binary dst src2) mask));
24205   match(Set dst (MulVI (Binary dst src2) mask));
24206   match(Set dst (MulVL (Binary dst src2) mask));
24207   match(Set dst (MulVF (Binary dst src2) mask));
24208   match(Set dst (MulVD (Binary dst src2) mask));
24209   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24210   ins_encode %{
24211     int vlen_enc = vector_length_encoding(this);
24212     BasicType bt = Matcher::vector_element_basic_type(this);
24213     int opc = this->ideal_Opcode();
24214     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24215                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24216   %}
24217   ins_pipe( pipe_slow );
24218 %}
24219 
24220 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24221   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24222   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24223   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24224   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24225   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24226   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24227   ins_encode %{
24228     int vlen_enc = vector_length_encoding(this);
24229     BasicType bt = Matcher::vector_element_basic_type(this);
24230     int opc = this->ideal_Opcode();
24231     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24232                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24233   %}
24234   ins_pipe( pipe_slow );
24235 %}
24236 
24237 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24238   match(Set dst (SqrtVF dst mask));
24239   match(Set dst (SqrtVD dst mask));
24240   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24241   ins_encode %{
24242     int vlen_enc = vector_length_encoding(this);
24243     BasicType bt = Matcher::vector_element_basic_type(this);
24244     int opc = this->ideal_Opcode();
24245     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24246                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24247   %}
24248   ins_pipe( pipe_slow );
24249 %}
24250 
24251 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24252   match(Set dst (DivVF (Binary dst src2) mask));
24253   match(Set dst (DivVD (Binary dst src2) mask));
24254   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24255   ins_encode %{
24256     int vlen_enc = vector_length_encoding(this);
24257     BasicType bt = Matcher::vector_element_basic_type(this);
24258     int opc = this->ideal_Opcode();
24259     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24260                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24261   %}
24262   ins_pipe( pipe_slow );
24263 %}
24264 
24265 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24266   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24267   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24268   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24269   ins_encode %{
24270     int vlen_enc = vector_length_encoding(this);
24271     BasicType bt = Matcher::vector_element_basic_type(this);
24272     int opc = this->ideal_Opcode();
24273     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24274                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24275   %}
24276   ins_pipe( pipe_slow );
24277 %}
24278 
24279 
24280 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24281   match(Set dst (RotateLeftV (Binary dst shift) mask));
24282   match(Set dst (RotateRightV (Binary dst shift) mask));
24283   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24284   ins_encode %{
24285     int vlen_enc = vector_length_encoding(this);
24286     BasicType bt = Matcher::vector_element_basic_type(this);
24287     int opc = this->ideal_Opcode();
24288     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24289                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24290   %}
24291   ins_pipe( pipe_slow );
24292 %}
24293 
24294 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24295   match(Set dst (RotateLeftV (Binary dst src2) mask));
24296   match(Set dst (RotateRightV (Binary dst src2) mask));
24297   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24298   ins_encode %{
24299     int vlen_enc = vector_length_encoding(this);
24300     BasicType bt = Matcher::vector_element_basic_type(this);
24301     int opc = this->ideal_Opcode();
24302     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24303                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24304   %}
24305   ins_pipe( pipe_slow );
24306 %}
24307 
24308 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24309   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24310   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24311   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24312   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24313   ins_encode %{
24314     int vlen_enc = vector_length_encoding(this);
24315     BasicType bt = Matcher::vector_element_basic_type(this);
24316     int opc = this->ideal_Opcode();
24317     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24318                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24319   %}
24320   ins_pipe( pipe_slow );
24321 %}
24322 
24323 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24324   predicate(!n->as_ShiftV()->is_var_shift());
24325   match(Set dst (LShiftVS (Binary dst src2) mask));
24326   match(Set dst (LShiftVI (Binary dst src2) mask));
24327   match(Set dst (LShiftVL (Binary dst src2) mask));
24328   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24329   ins_encode %{
24330     int vlen_enc = vector_length_encoding(this);
24331     BasicType bt = Matcher::vector_element_basic_type(this);
24332     int opc = this->ideal_Opcode();
24333     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24334                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24335   %}
24336   ins_pipe( pipe_slow );
24337 %}
24338 
24339 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24340   predicate(n->as_ShiftV()->is_var_shift());
24341   match(Set dst (LShiftVS (Binary dst src2) mask));
24342   match(Set dst (LShiftVI (Binary dst src2) mask));
24343   match(Set dst (LShiftVL (Binary dst src2) mask));
24344   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24345   ins_encode %{
24346     int vlen_enc = vector_length_encoding(this);
24347     BasicType bt = Matcher::vector_element_basic_type(this);
24348     int opc = this->ideal_Opcode();
24349     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24350                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24351   %}
24352   ins_pipe( pipe_slow );
24353 %}
24354 
24355 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24356   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24357   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24358   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24359   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24360   ins_encode %{
24361     int vlen_enc = vector_length_encoding(this);
24362     BasicType bt = Matcher::vector_element_basic_type(this);
24363     int opc = this->ideal_Opcode();
24364     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24366   %}
24367   ins_pipe( pipe_slow );
24368 %}
24369 
24370 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24371   predicate(!n->as_ShiftV()->is_var_shift());
24372   match(Set dst (RShiftVS (Binary dst src2) mask));
24373   match(Set dst (RShiftVI (Binary dst src2) mask));
24374   match(Set dst (RShiftVL (Binary dst src2) mask));
24375   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24376   ins_encode %{
24377     int vlen_enc = vector_length_encoding(this);
24378     BasicType bt = Matcher::vector_element_basic_type(this);
24379     int opc = this->ideal_Opcode();
24380     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24381                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24382   %}
24383   ins_pipe( pipe_slow );
24384 %}
24385 
24386 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24387   predicate(n->as_ShiftV()->is_var_shift());
24388   match(Set dst (RShiftVS (Binary dst src2) mask));
24389   match(Set dst (RShiftVI (Binary dst src2) mask));
24390   match(Set dst (RShiftVL (Binary dst src2) mask));
24391   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24392   ins_encode %{
24393     int vlen_enc = vector_length_encoding(this);
24394     BasicType bt = Matcher::vector_element_basic_type(this);
24395     int opc = this->ideal_Opcode();
24396     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24398   %}
24399   ins_pipe( pipe_slow );
24400 %}
24401 
24402 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24403   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24404   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24405   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24406   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24407   ins_encode %{
24408     int vlen_enc = vector_length_encoding(this);
24409     BasicType bt = Matcher::vector_element_basic_type(this);
24410     int opc = this->ideal_Opcode();
24411     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24413   %}
24414   ins_pipe( pipe_slow );
24415 %}
24416 
24417 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24418   predicate(!n->as_ShiftV()->is_var_shift());
24419   match(Set dst (URShiftVS (Binary dst src2) mask));
24420   match(Set dst (URShiftVI (Binary dst src2) mask));
24421   match(Set dst (URShiftVL (Binary dst src2) mask));
24422   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24423   ins_encode %{
24424     int vlen_enc = vector_length_encoding(this);
24425     BasicType bt = Matcher::vector_element_basic_type(this);
24426     int opc = this->ideal_Opcode();
24427     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24429   %}
24430   ins_pipe( pipe_slow );
24431 %}
24432 
24433 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24434   predicate(n->as_ShiftV()->is_var_shift());
24435   match(Set dst (URShiftVS (Binary dst src2) mask));
24436   match(Set dst (URShiftVI (Binary dst src2) mask));
24437   match(Set dst (URShiftVL (Binary dst src2) mask));
24438   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24439   ins_encode %{
24440     int vlen_enc = vector_length_encoding(this);
24441     BasicType bt = Matcher::vector_element_basic_type(this);
24442     int opc = this->ideal_Opcode();
24443     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24445   %}
24446   ins_pipe( pipe_slow );
24447 %}
24448 
24449 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24450   match(Set dst (MaxV (Binary dst src2) mask));
24451   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24452   ins_encode %{
24453     int vlen_enc = vector_length_encoding(this);
24454     BasicType bt = Matcher::vector_element_basic_type(this);
24455     int opc = this->ideal_Opcode();
24456     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24457                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24458   %}
24459   ins_pipe( pipe_slow );
24460 %}
24461 
24462 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24463   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24464   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24465   ins_encode %{
24466     int vlen_enc = vector_length_encoding(this);
24467     BasicType bt = Matcher::vector_element_basic_type(this);
24468     int opc = this->ideal_Opcode();
24469     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24470                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24471   %}
24472   ins_pipe( pipe_slow );
24473 %}
24474 
24475 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24476   match(Set dst (MinV (Binary dst src2) mask));
24477   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24478   ins_encode %{
24479     int vlen_enc = vector_length_encoding(this);
24480     BasicType bt = Matcher::vector_element_basic_type(this);
24481     int opc = this->ideal_Opcode();
24482     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24483                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24484   %}
24485   ins_pipe( pipe_slow );
24486 %}
24487 
24488 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24489   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24490   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24491   ins_encode %{
24492     int vlen_enc = vector_length_encoding(this);
24493     BasicType bt = Matcher::vector_element_basic_type(this);
24494     int opc = this->ideal_Opcode();
24495     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24496                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24497   %}
24498   ins_pipe( pipe_slow );
24499 %}
24500 
24501 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24502   match(Set dst (VectorRearrange (Binary dst src2) mask));
24503   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24504   ins_encode %{
24505     int vlen_enc = vector_length_encoding(this);
24506     BasicType bt = Matcher::vector_element_basic_type(this);
24507     int opc = this->ideal_Opcode();
24508     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24509                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24510   %}
24511   ins_pipe( pipe_slow );
24512 %}
24513 
24514 instruct vabs_masked(vec dst, kReg mask) %{
24515   match(Set dst (AbsVB dst mask));
24516   match(Set dst (AbsVS dst mask));
24517   match(Set dst (AbsVI dst mask));
24518   match(Set dst (AbsVL dst mask));
24519   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24520   ins_encode %{
24521     int vlen_enc = vector_length_encoding(this);
24522     BasicType bt = Matcher::vector_element_basic_type(this);
24523     int opc = this->ideal_Opcode();
24524     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24525                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24526   %}
24527   ins_pipe( pipe_slow );
24528 %}
24529 
24530 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24531   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24532   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24533   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24534   ins_encode %{
24535     assert(UseFMA, "Needs FMA instructions support.");
24536     int vlen_enc = vector_length_encoding(this);
24537     BasicType bt = Matcher::vector_element_basic_type(this);
24538     int opc = this->ideal_Opcode();
24539     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24540                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24541   %}
24542   ins_pipe( pipe_slow );
24543 %}
24544 
24545 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24546   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24547   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24548   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24549   ins_encode %{
24550     assert(UseFMA, "Needs FMA instructions support.");
24551     int vlen_enc = vector_length_encoding(this);
24552     BasicType bt = Matcher::vector_element_basic_type(this);
24553     int opc = this->ideal_Opcode();
24554     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24555                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24556   %}
24557   ins_pipe( pipe_slow );
24558 %}
24559 
24560 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24561   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24562   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24563   ins_encode %{
24564     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24565     int vlen_enc = vector_length_encoding(this, $src1);
24566     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24567 
24568     // Comparison i
24569     switch (src1_elem_bt) {
24570       case T_BYTE: {
24571         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24572         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24573         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24574         break;
24575       }
24576       case T_SHORT: {
24577         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24578         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24579         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24580         break;
24581       }
24582       case T_INT: {
24583         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24584         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24585         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24586         break;
24587       }
24588       case T_LONG: {
24589         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24590         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24591         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24592         break;
24593       }
24594       case T_FLOAT: {
24595         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24596         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24597         break;
24598       }
24599       case T_DOUBLE: {
24600         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24601         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24602         break;
24603       }
24604       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24605     }
24606   %}
24607   ins_pipe( pipe_slow );
24608 %}
24609 
24610 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24611   predicate(Matcher::vector_length(n) <= 32);
24612   match(Set dst (MaskAll src));
24613   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24614   ins_encode %{
24615     int mask_len = Matcher::vector_length(this);
24616     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24617   %}
24618   ins_pipe( pipe_slow );
24619 %}
24620 
24621 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24622   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24623   match(Set dst (XorVMask src (MaskAll cnt)));
24624   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24625   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24626   ins_encode %{
24627     uint masklen = Matcher::vector_length(this);
24628     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24629   %}
24630   ins_pipe( pipe_slow );
24631 %}
24632 
24633 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24634   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24635             (Matcher::vector_length(n) == 16) ||
24636             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24637   match(Set dst (XorVMask src (MaskAll cnt)));
24638   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24639   ins_encode %{
24640     uint masklen = Matcher::vector_length(this);
24641     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24642   %}
24643   ins_pipe( pipe_slow );
24644 %}
24645 
24646 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24647   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24648   match(Set dst (VectorLongToMask src));
24649   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24650   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24651   ins_encode %{
24652     int mask_len = Matcher::vector_length(this);
24653     int vec_enc  = vector_length_encoding(mask_len);
24654     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24655                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24656   %}
24657   ins_pipe( pipe_slow );
24658 %}
24659 
24660 
24661 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24662   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24663   match(Set dst (VectorLongToMask src));
24664   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24665   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24666   ins_encode %{
24667     int mask_len = Matcher::vector_length(this);
24668     assert(mask_len <= 32, "invalid mask length");
24669     int vec_enc  = vector_length_encoding(mask_len);
24670     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24671                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24672   %}
24673   ins_pipe( pipe_slow );
24674 %}
24675 
24676 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24677   predicate(n->bottom_type()->isa_vectmask());
24678   match(Set dst (VectorLongToMask src));
24679   format %{ "long_to_mask_evex $dst, $src\t!" %}
24680   ins_encode %{
24681     __ kmov($dst$$KRegister, $src$$Register);
24682   %}
24683   ins_pipe( pipe_slow );
24684 %}
24685 
24686 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24687   match(Set dst (AndVMask src1 src2));
24688   match(Set dst (OrVMask src1 src2));
24689   match(Set dst (XorVMask src1 src2));
24690   effect(TEMP kscratch);
24691   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24692   ins_encode %{
24693     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24694     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24695     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24696     uint masklen = Matcher::vector_length(this);
24697     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24698     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24699   %}
24700   ins_pipe( pipe_slow );
24701 %}
24702 
24703 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24704   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24705   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24706   ins_encode %{
24707     int vlen_enc = vector_length_encoding(this);
24708     BasicType bt = Matcher::vector_element_basic_type(this);
24709     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24710                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24711   %}
24712   ins_pipe( pipe_slow );
24713 %}
24714 
24715 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24716   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24717   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24718   ins_encode %{
24719     int vlen_enc = vector_length_encoding(this);
24720     BasicType bt = Matcher::vector_element_basic_type(this);
24721     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24722                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24723   %}
24724   ins_pipe( pipe_slow );
24725 %}
24726 
24727 instruct castMM(kReg dst)
24728 %{
24729   match(Set dst (CastVV dst));
24730 
24731   size(0);
24732   format %{ "# castVV of $dst" %}
24733   ins_encode(/* empty encoding */);
24734   ins_cost(0);
24735   ins_pipe(empty);
24736 %}
24737 
24738 instruct castVV(vec dst)
24739 %{
24740   match(Set dst (CastVV dst));
24741 
24742   size(0);
24743   format %{ "# castVV of $dst" %}
24744   ins_encode(/* empty encoding */);
24745   ins_cost(0);
24746   ins_pipe(empty);
24747 %}
24748 
24749 instruct castVVLeg(legVec dst)
24750 %{
24751   match(Set dst (CastVV dst));
24752 
24753   size(0);
24754   format %{ "# castVV of $dst" %}
24755   ins_encode(/* empty encoding */);
24756   ins_cost(0);
24757   ins_pipe(empty);
24758 %}
24759 
24760 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24761 %{
24762   match(Set dst (IsInfiniteF src));
24763   effect(TEMP ktmp, KILL cr);
24764   format %{ "float_class_check $dst, $src" %}
24765   ins_encode %{
24766     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24767     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24768   %}
24769   ins_pipe(pipe_slow);
24770 %}
24771 
24772 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24773 %{
24774   match(Set dst (IsInfiniteD src));
24775   effect(TEMP ktmp, KILL cr);
24776   format %{ "double_class_check $dst, $src" %}
24777   ins_encode %{
24778     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24779     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24780   %}
24781   ins_pipe(pipe_slow);
24782 %}
24783 
24784 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24785 %{
24786   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24787             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24788   match(Set dst (SaturatingAddV src1 src2));
24789   match(Set dst (SaturatingSubV src1 src2));
24790   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24791   ins_encode %{
24792     int vlen_enc = vector_length_encoding(this);
24793     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24794     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24795                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24796   %}
24797   ins_pipe(pipe_slow);
24798 %}
24799 
24800 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24801 %{
24802   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24803             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24804   match(Set dst (SaturatingAddV src1 src2));
24805   match(Set dst (SaturatingSubV src1 src2));
24806   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24807   ins_encode %{
24808     int vlen_enc = vector_length_encoding(this);
24809     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24810     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24811                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24812   %}
24813   ins_pipe(pipe_slow);
24814 %}
24815 
24816 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24817 %{
24818   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24819             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24820             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24821   match(Set dst (SaturatingAddV src1 src2));
24822   match(Set dst (SaturatingSubV src1 src2));
24823   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24824   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24825   ins_encode %{
24826     int vlen_enc = vector_length_encoding(this);
24827     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24828     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24829                                         $src1$$XMMRegister, $src2$$XMMRegister,
24830                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24831                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24832   %}
24833   ins_pipe(pipe_slow);
24834 %}
24835 
24836 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24837 %{
24838   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24839             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24840             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24841   match(Set dst (SaturatingAddV src1 src2));
24842   match(Set dst (SaturatingSubV src1 src2));
24843   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24844   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24845   ins_encode %{
24846     int vlen_enc = vector_length_encoding(this);
24847     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24848     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24849                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24850                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24851   %}
24852   ins_pipe(pipe_slow);
24853 %}
24854 
24855 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24856 %{
24857   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24858             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24859             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24860   match(Set dst (SaturatingAddV src1 src2));
24861   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24862   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24863   ins_encode %{
24864     int vlen_enc = vector_length_encoding(this);
24865     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24866     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24867                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24868   %}
24869   ins_pipe(pipe_slow);
24870 %}
24871 
24872 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24873 %{
24874   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24875             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24876             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24877   match(Set dst (SaturatingAddV src1 src2));
24878   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24879   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24880   ins_encode %{
24881     int vlen_enc = vector_length_encoding(this);
24882     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24883     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24884                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24885   %}
24886   ins_pipe(pipe_slow);
24887 %}
24888 
24889 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24890 %{
24891   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24892             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24893             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24894   match(Set dst (SaturatingSubV src1 src2));
24895   effect(TEMP ktmp);
24896   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24897   ins_encode %{
24898     int vlen_enc = vector_length_encoding(this);
24899     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24900     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24901                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24902   %}
24903   ins_pipe(pipe_slow);
24904 %}
24905 
24906 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24907 %{
24908   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24909             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24910             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24911   match(Set dst (SaturatingSubV src1 src2));
24912   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24913   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24914   ins_encode %{
24915     int vlen_enc = vector_length_encoding(this);
24916     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24917     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24918                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24919   %}
24920   ins_pipe(pipe_slow);
24921 %}
24922 
24923 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24924 %{
24925   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24926             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24927   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24928   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24929   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24930   ins_encode %{
24931     int vlen_enc = vector_length_encoding(this);
24932     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24933     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24934                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24935   %}
24936   ins_pipe(pipe_slow);
24937 %}
24938 
24939 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24940 %{
24941   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24942             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24943   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24944   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24945   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24946   ins_encode %{
24947     int vlen_enc = vector_length_encoding(this);
24948     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24949     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24950                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24951   %}
24952   ins_pipe(pipe_slow);
24953 %}
24954 
24955 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24956   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24957             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24958   match(Set dst (SaturatingAddV (Binary dst src) mask));
24959   match(Set dst (SaturatingSubV (Binary dst src) mask));
24960   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24961   ins_encode %{
24962     int vlen_enc = vector_length_encoding(this);
24963     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24964     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24965                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24966   %}
24967   ins_pipe( pipe_slow );
24968 %}
24969 
24970 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24971   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24972             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24973   match(Set dst (SaturatingAddV (Binary dst src) mask));
24974   match(Set dst (SaturatingSubV (Binary dst src) mask));
24975   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24976   ins_encode %{
24977     int vlen_enc = vector_length_encoding(this);
24978     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24979     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24980                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24981   %}
24982   ins_pipe( pipe_slow );
24983 %}
24984 
24985 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24986   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24987             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24988   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24989   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24990   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24991   ins_encode %{
24992     int vlen_enc = vector_length_encoding(this);
24993     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24994     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24995                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24996   %}
24997   ins_pipe( pipe_slow );
24998 %}
24999 
25000 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25001   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25002             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25003   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25004   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25005   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25006   ins_encode %{
25007     int vlen_enc = vector_length_encoding(this);
25008     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25009     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25010                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25011   %}
25012   ins_pipe( pipe_slow );
25013 %}
25014 
25015 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25016 %{
25017   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25018   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25019   ins_encode %{
25020     int vlen_enc = vector_length_encoding(this);
25021     BasicType bt = Matcher::vector_element_basic_type(this);
25022     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25023   %}
25024   ins_pipe(pipe_slow);
25025 %}
25026 
25027 instruct reinterpretS2HF(regF dst, rRegI src)
25028 %{
25029   match(Set dst (ReinterpretS2HF src));
25030   format %{ "vmovw $dst, $src" %}
25031   ins_encode %{
25032     __ vmovw($dst$$XMMRegister, $src$$Register);
25033   %}
25034   ins_pipe(pipe_slow);
25035 %}
25036 
25037 instruct reinterpretHF2S(rRegI dst, regF src)
25038 %{
25039   match(Set dst (ReinterpretHF2S src));
25040   format %{ "vmovw $dst, $src" %}
25041   ins_encode %{
25042     __ vmovw($dst$$Register, $src$$XMMRegister);
25043   %}
25044   ins_pipe(pipe_slow);
25045 %}
25046 
25047 instruct convF2HFAndS2HF(regF dst, regF src)
25048 %{
25049   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25050   format %{ "convF2HFAndS2HF $dst, $src" %}
25051   ins_encode %{
25052     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25053   %}
25054   ins_pipe(pipe_slow);
25055 %}
25056 
25057 instruct convHF2SAndHF2F(regF dst, regF src)
25058 %{
25059   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25060   format %{ "convHF2SAndHF2F $dst, $src" %}
25061   ins_encode %{
25062     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25063   %}
25064   ins_pipe(pipe_slow);
25065 %}
25066 
25067 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25068 %{
25069   match(Set dst (SqrtHF src));
25070   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25071   ins_encode %{
25072     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25073   %}
25074   ins_pipe(pipe_slow);
25075 %}
25076 
25077 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25078 %{
25079   match(Set dst (AddHF src1 src2));
25080   match(Set dst (DivHF src1 src2));
25081   match(Set dst (MulHF src1 src2));
25082   match(Set dst (SubHF src1 src2));
25083   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25084   ins_encode %{
25085     int opcode = this->ideal_Opcode();
25086     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25087   %}
25088   ins_pipe(pipe_slow);
25089 %}
25090 
25091 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25092 %{
25093   predicate(VM_Version::supports_avx10_2());
25094   match(Set dst (MaxHF src1 src2));
25095   match(Set dst (MinHF src1 src2));
25096   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25097   ins_encode %{
25098     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25099     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25100   %}
25101   ins_pipe( pipe_slow );
25102 %}
25103 
25104 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25105 %{
25106   predicate(!VM_Version::supports_avx10_2());
25107   match(Set dst (MaxHF src1 src2));
25108   match(Set dst (MinHF src1 src2));
25109   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25110   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25111   ins_encode %{
25112     int opcode = this->ideal_Opcode();
25113     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25114                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25115   %}
25116   ins_pipe( pipe_slow );
25117 %}
25118 
25119 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25120 %{
25121   match(Set dst (FmaHF  src2 (Binary dst src1)));
25122   effect(DEF dst);
25123   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25124   ins_encode %{
25125     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25126   %}
25127   ins_pipe( pipe_slow );
25128 %}
25129 
25130 
25131 instruct vector_sqrt_HF_reg(vec dst, vec src)
25132 %{
25133   match(Set dst (SqrtVHF src));
25134   format %{ "vector_sqrt_fp16 $dst, $src" %}
25135   ins_encode %{
25136     int vlen_enc = vector_length_encoding(this);
25137     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25138   %}
25139   ins_pipe(pipe_slow);
25140 %}
25141 
25142 instruct vector_sqrt_HF_mem(vec dst, memory src)
25143 %{
25144   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25145   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25146   ins_encode %{
25147     int vlen_enc = vector_length_encoding(this);
25148     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25149   %}
25150   ins_pipe(pipe_slow);
25151 %}
25152 
25153 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25154 %{
25155   match(Set dst (AddVHF src1 src2));
25156   match(Set dst (DivVHF src1 src2));
25157   match(Set dst (MulVHF src1 src2));
25158   match(Set dst (SubVHF src1 src2));
25159   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25160   ins_encode %{
25161     int vlen_enc = vector_length_encoding(this);
25162     int opcode = this->ideal_Opcode();
25163     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25164   %}
25165   ins_pipe(pipe_slow);
25166 %}
25167 
25168 
25169 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25170 %{
25171   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25172   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25173   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25174   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25175   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25176   ins_encode %{
25177     int vlen_enc = vector_length_encoding(this);
25178     int opcode = this->ideal_Opcode();
25179     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25180   %}
25181   ins_pipe(pipe_slow);
25182 %}
25183 
25184 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25185 %{
25186   match(Set dst (FmaVHF src2 (Binary dst src1)));
25187   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25188   ins_encode %{
25189     int vlen_enc = vector_length_encoding(this);
25190     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25191   %}
25192   ins_pipe( pipe_slow );
25193 %}
25194 
25195 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25196 %{
25197   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25198   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25199   ins_encode %{
25200     int vlen_enc = vector_length_encoding(this);
25201     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25202   %}
25203   ins_pipe( pipe_slow );
25204 %}
25205 
25206 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25207 %{
25208   predicate(VM_Version::supports_avx10_2());
25209   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25210   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25211   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25212   ins_encode %{
25213     int vlen_enc = vector_length_encoding(this);
25214     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25215     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25216   %}
25217   ins_pipe( pipe_slow );
25218 %}
25219 
25220 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25221 %{
25222   predicate(VM_Version::supports_avx10_2());
25223   match(Set dst (MinVHF src1 src2));
25224   match(Set dst (MaxVHF src1 src2));
25225   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25226   ins_encode %{
25227     int vlen_enc = vector_length_encoding(this);
25228     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25229     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25230   %}
25231   ins_pipe( pipe_slow );
25232 %}
25233 
25234 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25235 %{
25236   predicate(!VM_Version::supports_avx10_2());
25237   match(Set dst (MinVHF src1 src2));
25238   match(Set dst (MaxVHF src1 src2));
25239   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25240   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25241   ins_encode %{
25242     int vlen_enc = vector_length_encoding(this);
25243     int opcode = this->ideal_Opcode();
25244     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25245                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25246   %}
25247   ins_pipe( pipe_slow );
25248 %}
25249 
25250 //----------PEEPHOLE RULES-----------------------------------------------------
25251 // These must follow all instruction definitions as they use the names
25252 // defined in the instructions definitions.
25253 //
25254 // peeppredicate ( rule_predicate );
25255 // // the predicate unless which the peephole rule will be ignored
25256 //
25257 // peepmatch ( root_instr_name [preceding_instruction]* );
25258 //
25259 // peepprocedure ( procedure_name );
25260 // // provide a procedure name to perform the optimization, the procedure should
25261 // // reside in the architecture dependent peephole file, the method has the
25262 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25263 // // with the arguments being the basic block, the current node index inside the
25264 // // block, the register allocator, the functions upon invoked return a new node
25265 // // defined in peepreplace, and the rules of the nodes appearing in the
25266 // // corresponding peepmatch, the function return true if successful, else
25267 // // return false
25268 //
25269 // peepconstraint %{
25270 // (instruction_number.operand_name relational_op instruction_number.operand_name
25271 //  [, ...] );
25272 // // instruction numbers are zero-based using left to right order in peepmatch
25273 //
25274 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25275 // // provide an instruction_number.operand_name for each operand that appears
25276 // // in the replacement instruction's match rule
25277 //
25278 // ---------VM FLAGS---------------------------------------------------------
25279 //
25280 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25281 //
25282 // Each peephole rule is given an identifying number starting with zero and
25283 // increasing by one in the order seen by the parser.  An individual peephole
25284 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25285 // on the command-line.
25286 //
25287 // ---------CURRENT LIMITATIONS----------------------------------------------
25288 //
25289 // Only transformations inside a basic block (do we need more for peephole)
25290 //
25291 // ---------EXAMPLE----------------------------------------------------------
25292 //
25293 // // pertinent parts of existing instructions in architecture description
25294 // instruct movI(rRegI dst, rRegI src)
25295 // %{
25296 //   match(Set dst (CopyI src));
25297 // %}
25298 //
25299 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25300 // %{
25301 //   match(Set dst (AddI dst src));
25302 //   effect(KILL cr);
25303 // %}
25304 //
25305 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25306 // %{
25307 //   match(Set dst (AddI dst src));
25308 // %}
25309 //
25310 // 1. Simple replacement
25311 // - Only match adjacent instructions in same basic block
25312 // - Only equality constraints
25313 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25314 // - Only one replacement instruction
25315 //
25316 // // Change (inc mov) to lea
25317 // peephole %{
25318 //   // lea should only be emitted when beneficial
25319 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25320 //   // increment preceded by register-register move
25321 //   peepmatch ( incI_rReg movI );
25322 //   // require that the destination register of the increment
25323 //   // match the destination register of the move
25324 //   peepconstraint ( 0.dst == 1.dst );
25325 //   // construct a replacement instruction that sets
25326 //   // the destination to ( move's source register + one )
25327 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25328 // %}
25329 //
25330 // 2. Procedural replacement
25331 // - More flexible finding relevent nodes
25332 // - More flexible constraints
25333 // - More flexible transformations
25334 // - May utilise architecture-dependent API more effectively
25335 // - Currently only one replacement instruction due to adlc parsing capabilities
25336 //
25337 // // Change (inc mov) to lea
25338 // peephole %{
25339 //   // lea should only be emitted when beneficial
25340 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25341 //   // the rule numbers of these nodes inside are passed into the function below
25342 //   peepmatch ( incI_rReg movI );
25343 //   // the method that takes the responsibility of transformation
25344 //   peepprocedure ( inc_mov_to_lea );
25345 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25346 //   // node is passed into the function above
25347 //   peepreplace ( leaI_rReg_immI() );
25348 // %}
25349 
25350 // These instructions is not matched by the matcher but used by the peephole
25351 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25352 %{
25353   predicate(false);
25354   match(Set dst (AddI src1 src2));
25355   format %{ "leal    $dst, [$src1 + $src2]" %}
25356   ins_encode %{
25357     Register dst = $dst$$Register;
25358     Register src1 = $src1$$Register;
25359     Register src2 = $src2$$Register;
25360     if (src1 != rbp && src1 != r13) {
25361       __ leal(dst, Address(src1, src2, Address::times_1));
25362     } else {
25363       assert(src2 != rbp && src2 != r13, "");
25364       __ leal(dst, Address(src2, src1, Address::times_1));
25365     }
25366   %}
25367   ins_pipe(ialu_reg_reg);
25368 %}
25369 
25370 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25371 %{
25372   predicate(false);
25373   match(Set dst (AddI src1 src2));
25374   format %{ "leal    $dst, [$src1 + $src2]" %}
25375   ins_encode %{
25376     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25377   %}
25378   ins_pipe(ialu_reg_reg);
25379 %}
25380 
25381 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25382 %{
25383   predicate(false);
25384   match(Set dst (LShiftI src shift));
25385   format %{ "leal    $dst, [$src << $shift]" %}
25386   ins_encode %{
25387     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25388     Register src = $src$$Register;
25389     if (scale == Address::times_2 && src != rbp && src != r13) {
25390       __ leal($dst$$Register, Address(src, src, Address::times_1));
25391     } else {
25392       __ leal($dst$$Register, Address(noreg, src, scale));
25393     }
25394   %}
25395   ins_pipe(ialu_reg_reg);
25396 %}
25397 
25398 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25399 %{
25400   predicate(false);
25401   match(Set dst (AddL src1 src2));
25402   format %{ "leaq    $dst, [$src1 + $src2]" %}
25403   ins_encode %{
25404     Register dst = $dst$$Register;
25405     Register src1 = $src1$$Register;
25406     Register src2 = $src2$$Register;
25407     if (src1 != rbp && src1 != r13) {
25408       __ leaq(dst, Address(src1, src2, Address::times_1));
25409     } else {
25410       assert(src2 != rbp && src2 != r13, "");
25411       __ leaq(dst, Address(src2, src1, Address::times_1));
25412     }
25413   %}
25414   ins_pipe(ialu_reg_reg);
25415 %}
25416 
25417 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25418 %{
25419   predicate(false);
25420   match(Set dst (AddL src1 src2));
25421   format %{ "leaq    $dst, [$src1 + $src2]" %}
25422   ins_encode %{
25423     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25424   %}
25425   ins_pipe(ialu_reg_reg);
25426 %}
25427 
25428 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25429 %{
25430   predicate(false);
25431   match(Set dst (LShiftL src shift));
25432   format %{ "leaq    $dst, [$src << $shift]" %}
25433   ins_encode %{
25434     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25435     Register src = $src$$Register;
25436     if (scale == Address::times_2 && src != rbp && src != r13) {
25437       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25438     } else {
25439       __ leaq($dst$$Register, Address(noreg, src, scale));
25440     }
25441   %}
25442   ins_pipe(ialu_reg_reg);
25443 %}
25444 
25445 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25446 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25447 // processors with at least partial ALU support for lea
25448 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25449 // beneficial for processors with full ALU support
25450 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25451 
25452 peephole
25453 %{
25454   peeppredicate(VM_Version::supports_fast_2op_lea());
25455   peepmatch (addI_rReg);
25456   peepprocedure (lea_coalesce_reg);
25457   peepreplace (leaI_rReg_rReg_peep());
25458 %}
25459 
25460 peephole
25461 %{
25462   peeppredicate(VM_Version::supports_fast_2op_lea());
25463   peepmatch (addI_rReg_imm);
25464   peepprocedure (lea_coalesce_imm);
25465   peepreplace (leaI_rReg_immI_peep());
25466 %}
25467 
25468 peephole
25469 %{
25470   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25471                 VM_Version::is_intel_cascade_lake());
25472   peepmatch (incI_rReg);
25473   peepprocedure (lea_coalesce_imm);
25474   peepreplace (leaI_rReg_immI_peep());
25475 %}
25476 
25477 peephole
25478 %{
25479   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25480                 VM_Version::is_intel_cascade_lake());
25481   peepmatch (decI_rReg);
25482   peepprocedure (lea_coalesce_imm);
25483   peepreplace (leaI_rReg_immI_peep());
25484 %}
25485 
25486 peephole
25487 %{
25488   peeppredicate(VM_Version::supports_fast_2op_lea());
25489   peepmatch (salI_rReg_immI2);
25490   peepprocedure (lea_coalesce_imm);
25491   peepreplace (leaI_rReg_immI2_peep());
25492 %}
25493 
25494 peephole
25495 %{
25496   peeppredicate(VM_Version::supports_fast_2op_lea());
25497   peepmatch (addL_rReg);
25498   peepprocedure (lea_coalesce_reg);
25499   peepreplace (leaL_rReg_rReg_peep());
25500 %}
25501 
25502 peephole
25503 %{
25504   peeppredicate(VM_Version::supports_fast_2op_lea());
25505   peepmatch (addL_rReg_imm);
25506   peepprocedure (lea_coalesce_imm);
25507   peepreplace (leaL_rReg_immL32_peep());
25508 %}
25509 
25510 peephole
25511 %{
25512   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25513                 VM_Version::is_intel_cascade_lake());
25514   peepmatch (incL_rReg);
25515   peepprocedure (lea_coalesce_imm);
25516   peepreplace (leaL_rReg_immL32_peep());
25517 %}
25518 
25519 peephole
25520 %{
25521   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25522                 VM_Version::is_intel_cascade_lake());
25523   peepmatch (decL_rReg);
25524   peepprocedure (lea_coalesce_imm);
25525   peepreplace (leaL_rReg_immL32_peep());
25526 %}
25527 
25528 peephole
25529 %{
25530   peeppredicate(VM_Version::supports_fast_2op_lea());
25531   peepmatch (salL_rReg_immI2);
25532   peepprocedure (lea_coalesce_imm);
25533   peepreplace (leaL_rReg_immI2_peep());
25534 %}
25535 
25536 peephole
25537 %{
25538   peepmatch (leaPCompressedOopOffset);
25539   peepprocedure (lea_remove_redundant);
25540 %}
25541 
25542 peephole
25543 %{
25544   peepmatch (leaP8Narrow);
25545   peepprocedure (lea_remove_redundant);
25546 %}
25547 
25548 peephole
25549 %{
25550   peepmatch (leaP32Narrow);
25551   peepprocedure (lea_remove_redundant);
25552 %}
25553 
25554 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25555 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25556 
25557 //int variant
25558 peephole
25559 %{
25560   peepmatch (testI_reg);
25561   peepprocedure (test_may_remove);
25562 %}
25563 
25564 //long variant
25565 peephole
25566 %{
25567   peepmatch (testL_reg);
25568   peepprocedure (test_may_remove);
25569 %}
25570 
25571 
25572 //----------SMARTSPILL RULES---------------------------------------------------
25573 // These must follow all instruction definitions as they use the names
25574 // defined in the instructions definitions.